1/*-------------------------------------------------------------------------
2 *
3 * selfuncs.c
4 * Selectivity functions and index cost estimation functions for
5 * standard operators and index access methods.
6 *
7 * Selectivity routines are registered in the pg_operator catalog
8 * in the "oprrest" and "oprjoin" attributes.
9 *
10 * Index cost functions are located via the index AM's API struct,
11 * which is obtained from the handler function registered in pg_am.
12 *
13 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
14 * Portions Copyright (c) 1994, Regents of the University of California
15 *
16 *
17 * IDENTIFICATION
18 * src/backend/utils/adt/selfuncs.c
19 *
20 *-------------------------------------------------------------------------
21 */
22
23/*----------
24 * Operator selectivity estimation functions are called to estimate the
25 * selectivity of WHERE clauses whose top-level operator is their operator.
26 * We divide the problem into two cases:
27 * Restriction clause estimation: the clause involves vars of just
28 * one relation.
29 * Join clause estimation: the clause involves vars of multiple rels.
30 * Join selectivity estimation is far more difficult and usually less accurate
31 * than restriction estimation.
32 *
33 * When dealing with the inner scan of a nestloop join, we consider the
34 * join's joinclauses as restriction clauses for the inner relation, and
35 * treat vars of the outer relation as parameters (a/k/a constants of unknown
36 * values). So, restriction estimators need to be able to accept an argument
37 * telling which relation is to be treated as the variable.
38 *
39 * The call convention for a restriction estimator (oprrest function) is
40 *
41 * Selectivity oprrest (PlannerInfo *root,
42 * Oid operator,
43 * List *args,
44 * int varRelid);
45 *
46 * root: general information about the query (rtable and RelOptInfo lists
47 * are particularly important for the estimator).
48 * operator: OID of the specific operator in question.
49 * args: argument list from the operator clause.
50 * varRelid: if not zero, the relid (rtable index) of the relation to
51 * be treated as the variable relation. May be zero if the args list
52 * is known to contain vars of only one relation.
53 *
54 * This is represented at the SQL level (in pg_proc) as
55 *
56 * float8 oprrest (internal, oid, internal, int4);
57 *
58 * The result is a selectivity, that is, a fraction (0 to 1) of the rows
59 * of the relation that are expected to produce a TRUE result for the
60 * given operator.
61 *
62 * The call convention for a join estimator (oprjoin function) is similar
63 * except that varRelid is not needed, and instead join information is
64 * supplied:
65 *
66 * Selectivity oprjoin (PlannerInfo *root,
67 * Oid operator,
68 * List *args,
69 * JoinType jointype,
70 * SpecialJoinInfo *sjinfo);
71 *
72 * float8 oprjoin (internal, oid, internal, int2, internal);
73 *
74 * (Before Postgres 8.4, join estimators had only the first four of these
75 * parameters. That signature is still allowed, but deprecated.) The
76 * relationship between jointype and sjinfo is explained in the comments for
77 * clause_selectivity() --- the short version is that jointype is usually
78 * best ignored in favor of examining sjinfo.
79 *
80 * Join selectivity for regular inner and outer joins is defined as the
81 * fraction (0 to 1) of the cross product of the relations that is expected
82 * to produce a TRUE result for the given operator. For both semi and anti
83 * joins, however, the selectivity is defined as the fraction of the left-hand
84 * side relation's rows that are expected to have a match (ie, at least one
85 * row with a TRUE result) in the right-hand side.
86 *
87 * For both oprrest and oprjoin functions, the operator's input collation OID
88 * (if any) is passed using the standard fmgr mechanism, so that the estimator
89 * function can fetch it with PG_GET_COLLATION(). Note, however, that all
90 * statistics in pg_statistic are currently built using the relevant column's
91 * collation. Thus, in most cases where we are looking at statistics, we
92 * should ignore the operator collation and use the stats entry's collation.
93 * We expect that the error induced by doing this is usually not large enough
94 * to justify complicating matters. In any case, doing otherwise would yield
95 * entirely garbage results for ordered stats data such as histograms.
96 *----------
97 */
98
99#include "postgres.h"
100
101#include <ctype.h>
102#include <math.h>
103
104#include "access/brin.h"
105#include "access/gin.h"
106#include "access/table.h"
107#include "access/tableam.h"
108#include "access/visibilitymap.h"
109#include "catalog/pg_am.h"
110#include "catalog/pg_collation.h"
111#include "catalog/pg_operator.h"
112#include "catalog/pg_statistic.h"
113#include "catalog/pg_statistic_ext.h"
114#include "executor/nodeAgg.h"
115#include "miscadmin.h"
116#include "nodes/makefuncs.h"
117#include "nodes/nodeFuncs.h"
118#include "optimizer/clauses.h"
119#include "optimizer/cost.h"
120#include "optimizer/optimizer.h"
121#include "optimizer/pathnode.h"
122#include "optimizer/paths.h"
123#include "optimizer/plancat.h"
124#include "parser/parse_clause.h"
125#include "parser/parsetree.h"
126#include "statistics/statistics.h"
127#include "storage/bufmgr.h"
128#include "utils/builtins.h"
129#include "utils/date.h"
130#include "utils/datum.h"
131#include "utils/fmgroids.h"
132#include "utils/index_selfuncs.h"
133#include "utils/lsyscache.h"
134#include "utils/memutils.h"
135#include "utils/pg_locale.h"
136#include "utils/rel.h"
137#include "utils/selfuncs.h"
138#include "utils/snapmgr.h"
139#include "utils/spccache.h"
140#include "utils/syscache.h"
141#include "utils/timestamp.h"
142#include "utils/typcache.h"
143
144
145/* Hooks for plugins to get control when we ask for stats */
146get_relation_stats_hook_type get_relation_stats_hook = NULL;
147get_index_stats_hook_type get_index_stats_hook = NULL;
148
149static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
150static double eqjoinsel_inner(Oid opfuncoid,
151 VariableStatData *vardata1, VariableStatData *vardata2,
152 double nd1, double nd2,
153 bool isdefault1, bool isdefault2,
154 AttStatsSlot *sslot1, AttStatsSlot *sslot2,
155 Form_pg_statistic stats1, Form_pg_statistic stats2,
156 bool have_mcvs1, bool have_mcvs2);
157static double eqjoinsel_semi(Oid opfuncoid,
158 VariableStatData *vardata1, VariableStatData *vardata2,
159 double nd1, double nd2,
160 bool isdefault1, bool isdefault2,
161 AttStatsSlot *sslot1, AttStatsSlot *sslot2,
162 Form_pg_statistic stats1, Form_pg_statistic stats2,
163 bool have_mcvs1, bool have_mcvs2,
164 RelOptInfo *inner_rel);
165static bool estimate_multivariate_ndistinct(PlannerInfo *root,
166 RelOptInfo *rel, List **varinfos, double *ndistinct);
167static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid,
168 double *scaledvalue,
169 Datum lobound, Datum hibound, Oid boundstypid,
170 double *scaledlobound, double *scaledhibound);
171static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure);
172static void convert_string_to_scalar(char *value,
173 double *scaledvalue,
174 char *lobound,
175 double *scaledlobound,
176 char *hibound,
177 double *scaledhibound);
178static void convert_bytea_to_scalar(Datum value,
179 double *scaledvalue,
180 Datum lobound,
181 double *scaledlobound,
182 Datum hibound,
183 double *scaledhibound);
184static double convert_one_string_to_scalar(char *value,
185 int rangelo, int rangehi);
186static double convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
187 int rangelo, int rangehi);
188static char *convert_string_datum(Datum value, Oid typid, Oid collid,
189 bool *failure);
190static double convert_timevalue_to_scalar(Datum value, Oid typid,
191 bool *failure);
192static void examine_simple_variable(PlannerInfo *root, Var *var,
193 VariableStatData *vardata);
194static bool get_variable_range(PlannerInfo *root, VariableStatData *vardata,
195 Oid sortop, Datum *min, Datum *max);
196static bool get_actual_variable_range(PlannerInfo *root,
197 VariableStatData *vardata,
198 Oid sortop,
199 Datum *min, Datum *max);
200static bool get_actual_variable_endpoint(Relation heapRel,
201 Relation indexRel,
202 ScanDirection indexscandir,
203 ScanKey scankeys,
204 int16 typLen,
205 bool typByVal,
206 TupleTableSlot *tableslot,
207 MemoryContext outercontext,
208 Datum *endpointDatum);
209static RelOptInfo *find_join_input_rel(PlannerInfo *root, Relids relids);
210
211
212/*
213 * eqsel - Selectivity of "=" for any data types.
214 *
215 * Note: this routine is also used to estimate selectivity for some
216 * operators that are not "=" but have comparable selectivity behavior,
217 * such as "~=" (geometric approximate-match). Even for "=", we must
218 * keep in mind that the left and right datatypes may differ.
219 */
220Datum
221eqsel(PG_FUNCTION_ARGS)
222{
223 PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false));
224}
225
226/*
227 * Common code for eqsel() and neqsel()
228 */
229static double
230eqsel_internal(PG_FUNCTION_ARGS, bool negate)
231{
232 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
233 Oid operator = PG_GETARG_OID(1);
234 List *args = (List *) PG_GETARG_POINTER(2);
235 int varRelid = PG_GETARG_INT32(3);
236 VariableStatData vardata;
237 Node *other;
238 bool varonleft;
239 double selec;
240
241 /*
242 * When asked about <>, we do the estimation using the corresponding =
243 * operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
244 */
245 if (negate)
246 {
247 operator = get_negator(operator);
248 if (!OidIsValid(operator))
249 {
250 /* Use default selectivity (should we raise an error instead?) */
251 return 1.0 - DEFAULT_EQ_SEL;
252 }
253 }
254
255 /*
256 * If expression is not variable = something or something = variable, then
257 * punt and return a default estimate.
258 */
259 if (!get_restriction_variable(root, args, varRelid,
260 &vardata, &other, &varonleft))
261 return negate ? (1.0 - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL;
262
263 /*
264 * We can do a lot better if the something is a constant. (Note: the
265 * Const might result from estimation rather than being a simple constant
266 * in the query.)
267 */
268 if (IsA(other, Const))
269 selec = var_eq_const(&vardata, operator,
270 ((Const *) other)->constvalue,
271 ((Const *) other)->constisnull,
272 varonleft, negate);
273 else
274 selec = var_eq_non_const(&vardata, operator, other,
275 varonleft, negate);
276
277 ReleaseVariableStats(vardata);
278
279 return selec;
280}
281
282/*
283 * var_eq_const --- eqsel for var = const case
284 *
285 * This is exported so that some other estimation functions can use it.
286 */
287double
288var_eq_const(VariableStatData *vardata, Oid operator,
289 Datum constval, bool constisnull,
290 bool varonleft, bool negate)
291{
292 double selec;
293 double nullfrac = 0.0;
294 bool isdefault;
295 Oid opfuncoid;
296
297 /*
298 * If the constant is NULL, assume operator is strict and return zero, ie,
299 * operator will never return TRUE. (It's zero even for a negator op.)
300 */
301 if (constisnull)
302 return 0.0;
303
304 /*
305 * Grab the nullfrac for use below. Note we allow use of nullfrac
306 * regardless of security check.
307 */
308 if (HeapTupleIsValid(vardata->statsTuple))
309 {
310 Form_pg_statistic stats;
311
312 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
313 nullfrac = stats->stanullfrac;
314 }
315
316 /*
317 * If we matched the var to a unique index or DISTINCT clause, assume
318 * there is exactly one match regardless of anything else. (This is
319 * slightly bogus, since the index or clause's equality operator might be
320 * different from ours, but it's much more likely to be right than
321 * ignoring the information.)
322 */
323 if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
324 {
325 selec = 1.0 / vardata->rel->tuples;
326 }
327 else if (HeapTupleIsValid(vardata->statsTuple) &&
328 statistic_proc_security_check(vardata,
329 (opfuncoid = get_opcode(operator))))
330 {
331 AttStatsSlot sslot;
332 bool match = false;
333 int i;
334
335 /*
336 * Is the constant "=" to any of the column's most common values?
337 * (Although the given operator may not really be "=", we will assume
338 * that seeing whether it returns TRUE is an appropriate test. If you
339 * don't like this, maybe you shouldn't be using eqsel for your
340 * operator...)
341 */
342 if (get_attstatsslot(&sslot, vardata->statsTuple,
343 STATISTIC_KIND_MCV, InvalidOid,
344 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
345 {
346 FmgrInfo eqproc;
347
348 fmgr_info(opfuncoid, &eqproc);
349
350 for (i = 0; i < sslot.nvalues; i++)
351 {
352 /* be careful to apply operator right way 'round */
353 if (varonleft)
354 match = DatumGetBool(FunctionCall2Coll(&eqproc,
355 sslot.stacoll,
356 sslot.values[i],
357 constval));
358 else
359 match = DatumGetBool(FunctionCall2Coll(&eqproc,
360 sslot.stacoll,
361 constval,
362 sslot.values[i]));
363 if (match)
364 break;
365 }
366 }
367 else
368 {
369 /* no most-common-value info available */
370 i = 0; /* keep compiler quiet */
371 }
372
373 if (match)
374 {
375 /*
376 * Constant is "=" to this common value. We know selectivity
377 * exactly (or as exactly as ANALYZE could calculate it, anyway).
378 */
379 selec = sslot.numbers[i];
380 }
381 else
382 {
383 /*
384 * Comparison is against a constant that is neither NULL nor any
385 * of the common values. Its selectivity cannot be more than
386 * this:
387 */
388 double sumcommon = 0.0;
389 double otherdistinct;
390
391 for (i = 0; i < sslot.nnumbers; i++)
392 sumcommon += sslot.numbers[i];
393 selec = 1.0 - sumcommon - nullfrac;
394 CLAMP_PROBABILITY(selec);
395
396 /*
397 * and in fact it's probably a good deal less. We approximate that
398 * all the not-common values share this remaining fraction
399 * equally, so we divide by the number of other distinct values.
400 */
401 otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
402 sslot.nnumbers;
403 if (otherdistinct > 1)
404 selec /= otherdistinct;
405
406 /*
407 * Another cross-check: selectivity shouldn't be estimated as more
408 * than the least common "most common value".
409 */
410 if (sslot.nnumbers > 0 && selec > sslot.numbers[sslot.nnumbers - 1])
411 selec = sslot.numbers[sslot.nnumbers - 1];
412 }
413
414 free_attstatsslot(&sslot);
415 }
416 else
417 {
418 /*
419 * No ANALYZE stats available, so make a guess using estimated number
420 * of distinct values and assuming they are equally common. (The guess
421 * is unlikely to be very good, but we do know a few special cases.)
422 */
423 selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
424 }
425
426 /* now adjust if we wanted <> rather than = */
427 if (negate)
428 selec = 1.0 - selec - nullfrac;
429
430 /* result should be in range, but make sure... */
431 CLAMP_PROBABILITY(selec);
432
433 return selec;
434}
435
436/*
437 * var_eq_non_const --- eqsel for var = something-other-than-const case
438 *
439 * This is exported so that some other estimation functions can use it.
440 */
441double
442var_eq_non_const(VariableStatData *vardata, Oid operator,
443 Node *other,
444 bool varonleft, bool negate)
445{
446 double selec;
447 double nullfrac = 0.0;
448 bool isdefault;
449
450 /*
451 * Grab the nullfrac for use below.
452 */
453 if (HeapTupleIsValid(vardata->statsTuple))
454 {
455 Form_pg_statistic stats;
456
457 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
458 nullfrac = stats->stanullfrac;
459 }
460
461 /*
462 * If we matched the var to a unique index or DISTINCT clause, assume
463 * there is exactly one match regardless of anything else. (This is
464 * slightly bogus, since the index or clause's equality operator might be
465 * different from ours, but it's much more likely to be right than
466 * ignoring the information.)
467 */
468 if (vardata->isunique && vardata->rel && vardata->rel->tuples >= 1.0)
469 {
470 selec = 1.0 / vardata->rel->tuples;
471 }
472 else if (HeapTupleIsValid(vardata->statsTuple))
473 {
474 double ndistinct;
475 AttStatsSlot sslot;
476
477 /*
478 * Search is for a value that we do not know a priori, but we will
479 * assume it is not NULL. Estimate the selectivity as non-null
480 * fraction divided by number of distinct values, so that we get a
481 * result averaged over all possible values whether common or
482 * uncommon. (Essentially, we are assuming that the not-yet-known
483 * comparison value is equally likely to be any of the possible
484 * values, regardless of their frequency in the table. Is that a good
485 * idea?)
486 */
487 selec = 1.0 - nullfrac;
488 ndistinct = get_variable_numdistinct(vardata, &isdefault);
489 if (ndistinct > 1)
490 selec /= ndistinct;
491
492 /*
493 * Cross-check: selectivity should never be estimated as more than the
494 * most common value's.
495 */
496 if (get_attstatsslot(&sslot, vardata->statsTuple,
497 STATISTIC_KIND_MCV, InvalidOid,
498 ATTSTATSSLOT_NUMBERS))
499 {
500 if (sslot.nnumbers > 0 && selec > sslot.numbers[0])
501 selec = sslot.numbers[0];
502 free_attstatsslot(&sslot);
503 }
504 }
505 else
506 {
507 /*
508 * No ANALYZE stats available, so make a guess using estimated number
509 * of distinct values and assuming they are equally common. (The guess
510 * is unlikely to be very good, but we do know a few special cases.)
511 */
512 selec = 1.0 / get_variable_numdistinct(vardata, &isdefault);
513 }
514
515 /* now adjust if we wanted <> rather than = */
516 if (negate)
517 selec = 1.0 - selec - nullfrac;
518
519 /* result should be in range, but make sure... */
520 CLAMP_PROBABILITY(selec);
521
522 return selec;
523}
524
525/*
526 * neqsel - Selectivity of "!=" for any data types.
527 *
528 * This routine is also used for some operators that are not "!="
529 * but have comparable selectivity behavior. See above comments
530 * for eqsel().
531 */
532Datum
533neqsel(PG_FUNCTION_ARGS)
534{
535 PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, true));
536}
537
538/*
539 * scalarineqsel - Selectivity of "<", "<=", ">", ">=" for scalars.
540 *
541 * This is the guts of scalarltsel/scalarlesel/scalargtsel/scalargesel.
542 * The isgt and iseq flags distinguish which of the four cases apply.
543 *
544 * The caller has commuted the clause, if necessary, so that we can treat
545 * the variable as being on the left. The caller must also make sure that
546 * the other side of the clause is a non-null Const, and dissect that into
547 * a value and datatype. (This definition simplifies some callers that
548 * want to estimate against a computed value instead of a Const node.)
549 *
550 * This routine works for any datatype (or pair of datatypes) known to
551 * convert_to_scalar(). If it is applied to some other datatype,
552 * it will return an approximate estimate based on assuming that the constant
553 * value falls in the middle of the bin identified by binary search.
554 */
555static double
556scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
557 VariableStatData *vardata, Datum constval, Oid consttype)
558{
559 Form_pg_statistic stats;
560 FmgrInfo opproc;
561 double mcv_selec,
562 hist_selec,
563 sumcommon;
564 double selec;
565
566 if (!HeapTupleIsValid(vardata->statsTuple))
567 {
568 /*
569 * No stats are available. Typically this means we have to fall back
570 * on the default estimate; but if the variable is CTID then we can
571 * make an estimate based on comparing the constant to the table size.
572 */
573 if (vardata->var && IsA(vardata->var, Var) &&
574 ((Var *) vardata->var)->varattno == SelfItemPointerAttributeNumber)
575 {
576 ItemPointer itemptr;
577 double block;
578 double density;
579
580 /*
581 * If the relation's empty, we're going to include all of it.
582 * (This is mostly to avoid divide-by-zero below.)
583 */
584 if (vardata->rel->pages == 0)
585 return 1.0;
586
587 itemptr = (ItemPointer) DatumGetPointer(constval);
588 block = ItemPointerGetBlockNumberNoCheck(itemptr);
589
590 /*
591 * Determine the average number of tuples per page (density).
592 *
593 * Since the last page will, on average, be only half full, we can
594 * estimate it to have half as many tuples as earlier pages. So
595 * give it half the weight of a regular page.
596 */
597 density = vardata->rel->tuples / (vardata->rel->pages - 0.5);
598
599 /* If target is the last page, use half the density. */
600 if (block >= vardata->rel->pages - 1)
601 density *= 0.5;
602
603 /*
604 * Using the average tuples per page, calculate how far into the
605 * page the itemptr is likely to be and adjust block accordingly,
606 * by adding that fraction of a whole block (but never more than a
607 * whole block, no matter how high the itemptr's offset is). Here
608 * we are ignoring the possibility of dead-tuple line pointers,
609 * which is fairly bogus, but we lack the info to do better.
610 */
611 if (density > 0.0)
612 {
613 OffsetNumber offset = ItemPointerGetOffsetNumberNoCheck(itemptr);
614
615 block += Min(offset / density, 1.0);
616 }
617
618 /*
619 * Convert relative block number to selectivity. Again, the last
620 * page has only half weight.
621 */
622 selec = block / (vardata->rel->pages - 0.5);
623
624 /*
625 * The calculation so far gave us a selectivity for the "<=" case.
626 * We'll have one less tuple for "<" and one additional tuple for
627 * ">=", the latter of which we'll reverse the selectivity for
628 * below, so we can simply subtract one tuple for both cases. The
629 * cases that need this adjustment can be identified by iseq being
630 * equal to isgt.
631 */
632 if (iseq == isgt && vardata->rel->tuples >= 1.0)
633 selec -= (1.0 / vardata->rel->tuples);
634
635 /* Finally, reverse the selectivity for the ">", ">=" cases. */
636 if (isgt)
637 selec = 1.0 - selec;
638
639 CLAMP_PROBABILITY(selec);
640 return selec;
641 }
642
643 /* no stats available, so default result */
644 return DEFAULT_INEQ_SEL;
645 }
646 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
647
648 fmgr_info(get_opcode(operator), &opproc);
649
650 /*
651 * If we have most-common-values info, add up the fractions of the MCV
652 * entries that satisfy MCV OP CONST. These fractions contribute directly
653 * to the result selectivity. Also add up the total fraction represented
654 * by MCV entries.
655 */
656 mcv_selec = mcv_selectivity(vardata, &opproc, constval, true,
657 &sumcommon);
658
659 /*
660 * If there is a histogram, determine which bin the constant falls in, and
661 * compute the resulting contribution to selectivity.
662 */
663 hist_selec = ineq_histogram_selectivity(root, vardata,
664 &opproc, isgt, iseq,
665 constval, consttype);
666
667 /*
668 * Now merge the results from the MCV and histogram calculations,
669 * realizing that the histogram covers only the non-null values that are
670 * not listed in MCV.
671 */
672 selec = 1.0 - stats->stanullfrac - sumcommon;
673
674 if (hist_selec >= 0.0)
675 selec *= hist_selec;
676 else
677 {
678 /*
679 * If no histogram but there are values not accounted for by MCV,
680 * arbitrarily assume half of them will match.
681 */
682 selec *= 0.5;
683 }
684
685 selec += mcv_selec;
686
687 /* result should be in range, but make sure... */
688 CLAMP_PROBABILITY(selec);
689
690 return selec;
691}
692
693/*
694 * mcv_selectivity - Examine the MCV list for selectivity estimates
695 *
696 * Determine the fraction of the variable's MCV population that satisfies
697 * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft. Also
698 * compute the fraction of the total column population represented by the MCV
699 * list. This code will work for any boolean-returning predicate operator.
700 *
701 * The function result is the MCV selectivity, and the fraction of the
702 * total population is returned into *sumcommonp. Zeroes are returned
703 * if there is no MCV list.
704 */
705double
706mcv_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
707 Datum constval, bool varonleft,
708 double *sumcommonp)
709{
710 double mcv_selec,
711 sumcommon;
712 AttStatsSlot sslot;
713 int i;
714
715 mcv_selec = 0.0;
716 sumcommon = 0.0;
717
718 if (HeapTupleIsValid(vardata->statsTuple) &&
719 statistic_proc_security_check(vardata, opproc->fn_oid) &&
720 get_attstatsslot(&sslot, vardata->statsTuple,
721 STATISTIC_KIND_MCV, InvalidOid,
722 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS))
723 {
724 for (i = 0; i < sslot.nvalues; i++)
725 {
726 if (varonleft ?
727 DatumGetBool(FunctionCall2Coll(opproc,
728 sslot.stacoll,
729 sslot.values[i],
730 constval)) :
731 DatumGetBool(FunctionCall2Coll(opproc,
732 sslot.stacoll,
733 constval,
734 sslot.values[i])))
735 mcv_selec += sslot.numbers[i];
736 sumcommon += sslot.numbers[i];
737 }
738 free_attstatsslot(&sslot);
739 }
740
741 *sumcommonp = sumcommon;
742 return mcv_selec;
743}
744
745/*
746 * histogram_selectivity - Examine the histogram for selectivity estimates
747 *
748 * Determine the fraction of the variable's histogram entries that satisfy
749 * the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.
750 *
751 * This code will work for any boolean-returning predicate operator, whether
752 * or not it has anything to do with the histogram sort operator. We are
753 * essentially using the histogram just as a representative sample. However,
754 * small histograms are unlikely to be all that representative, so the caller
755 * should be prepared to fall back on some other estimation approach when the
756 * histogram is missing or very small. It may also be prudent to combine this
757 * approach with another one when the histogram is small.
758 *
759 * If the actual histogram size is not at least min_hist_size, we won't bother
760 * to do the calculation at all. Also, if the n_skip parameter is > 0, we
761 * ignore the first and last n_skip histogram elements, on the grounds that
762 * they are outliers and hence not very representative. Typical values for
763 * these parameters are 10 and 1.
764 *
765 * The function result is the selectivity, or -1 if there is no histogram
766 * or it's smaller than min_hist_size.
767 *
768 * The output parameter *hist_size receives the actual histogram size,
769 * or zero if no histogram. Callers may use this number to decide how
770 * much faith to put in the function result.
771 *
772 * Note that the result disregards both the most-common-values (if any) and
773 * null entries. The caller is expected to combine this result with
774 * statistics for those portions of the column population. It may also be
775 * prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs.
776 */
777double
778histogram_selectivity(VariableStatData *vardata, FmgrInfo *opproc,
779 Datum constval, bool varonleft,
780 int min_hist_size, int n_skip,
781 int *hist_size)
782{
783 double result;
784 AttStatsSlot sslot;
785
786 /* check sanity of parameters */
787 Assert(n_skip >= 0);
788 Assert(min_hist_size > 2 * n_skip);
789
790 if (HeapTupleIsValid(vardata->statsTuple) &&
791 statistic_proc_security_check(vardata, opproc->fn_oid) &&
792 get_attstatsslot(&sslot, vardata->statsTuple,
793 STATISTIC_KIND_HISTOGRAM, InvalidOid,
794 ATTSTATSSLOT_VALUES))
795 {
796 *hist_size = sslot.nvalues;
797 if (sslot.nvalues >= min_hist_size)
798 {
799 int nmatch = 0;
800 int i;
801
802 for (i = n_skip; i < sslot.nvalues - n_skip; i++)
803 {
804 if (varonleft ?
805 DatumGetBool(FunctionCall2Coll(opproc,
806 sslot.stacoll,
807 sslot.values[i],
808 constval)) :
809 DatumGetBool(FunctionCall2Coll(opproc,
810 sslot.stacoll,
811 constval,
812 sslot.values[i])))
813 nmatch++;
814 }
815 result = ((double) nmatch) / ((double) (sslot.nvalues - 2 * n_skip));
816 }
817 else
818 result = -1;
819 free_attstatsslot(&sslot);
820 }
821 else
822 {
823 *hist_size = 0;
824 result = -1;
825 }
826
827 return result;
828}
829
830/*
831 * ineq_histogram_selectivity - Examine the histogram for scalarineqsel
832 *
833 * Determine the fraction of the variable's histogram population that
834 * satisfies the inequality condition, ie, VAR < (or <=, >, >=) CONST.
835 * The isgt and iseq flags distinguish which of the four cases apply.
836 *
837 * Returns -1 if there is no histogram (valid results will always be >= 0).
838 *
839 * Note that the result disregards both the most-common-values (if any) and
840 * null entries. The caller is expected to combine this result with
841 * statistics for those portions of the column population.
842 *
843 * This is exported so that some other estimation functions can use it.
844 */
845double
846ineq_histogram_selectivity(PlannerInfo *root,
847 VariableStatData *vardata,
848 FmgrInfo *opproc, bool isgt, bool iseq,
849 Datum constval, Oid consttype)
850{
851 double hist_selec;
852 AttStatsSlot sslot;
853
854 hist_selec = -1.0;
855
856 /*
857 * Someday, ANALYZE might store more than one histogram per rel/att,
858 * corresponding to more than one possible sort ordering defined for the
859 * column type. However, to make that work we will need to figure out
860 * which staop to search for --- it's not necessarily the one we have at
861 * hand! (For example, we might have a '<=' operator rather than the '<'
862 * operator that will appear in staop.) For now, assume that whatever
863 * appears in pg_statistic is sorted the same way our operator sorts, or
864 * the reverse way if isgt is true.
865 */
866 if (HeapTupleIsValid(vardata->statsTuple) &&
867 statistic_proc_security_check(vardata, opproc->fn_oid) &&
868 get_attstatsslot(&sslot, vardata->statsTuple,
869 STATISTIC_KIND_HISTOGRAM, InvalidOid,
870 ATTSTATSSLOT_VALUES))
871 {
872 if (sslot.nvalues > 1)
873 {
874 /*
875 * Use binary search to find the desired location, namely the
876 * right end of the histogram bin containing the comparison value,
877 * which is the leftmost entry for which the comparison operator
878 * succeeds (if isgt) or fails (if !isgt). (If the given operator
879 * isn't actually sort-compatible with the histogram, you'll get
880 * garbage results ... but probably not any more garbage-y than
881 * you would have from the old linear search.)
882 *
883 * In this loop, we pay no attention to whether the operator iseq
884 * or not; that detail will be mopped up below. (We cannot tell,
885 * anyway, whether the operator thinks the values are equal.)
886 *
887 * If the binary search accesses the first or last histogram
888 * entry, we try to replace that endpoint with the true column min
889 * or max as found by get_actual_variable_range(). This
890 * ameliorates misestimates when the min or max is moving as a
891 * result of changes since the last ANALYZE. Note that this could
892 * result in effectively including MCVs into the histogram that
893 * weren't there before, but we don't try to correct for that.
894 */
895 double histfrac;
896 int lobound = 0; /* first possible slot to search */
897 int hibound = sslot.nvalues; /* last+1 slot to search */
898 bool have_end = false;
899
900 /*
901 * If there are only two histogram entries, we'll want up-to-date
902 * values for both. (If there are more than two, we need at most
903 * one of them to be updated, so we deal with that within the
904 * loop.)
905 */
906 if (sslot.nvalues == 2)
907 have_end = get_actual_variable_range(root,
908 vardata,
909 sslot.staop,
910 &sslot.values[0],
911 &sslot.values[1]);
912
913 while (lobound < hibound)
914 {
915 int probe = (lobound + hibound) / 2;
916 bool ltcmp;
917
918 /*
919 * If we find ourselves about to compare to the first or last
920 * histogram entry, first try to replace it with the actual
921 * current min or max (unless we already did so above).
922 */
923 if (probe == 0 && sslot.nvalues > 2)
924 have_end = get_actual_variable_range(root,
925 vardata,
926 sslot.staop,
927 &sslot.values[0],
928 NULL);
929 else if (probe == sslot.nvalues - 1 && sslot.nvalues > 2)
930 have_end = get_actual_variable_range(root,
931 vardata,
932 sslot.staop,
933 NULL,
934 &sslot.values[probe]);
935
936 ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
937 sslot.stacoll,
938 sslot.values[probe],
939 constval));
940 if (isgt)
941 ltcmp = !ltcmp;
942 if (ltcmp)
943 lobound = probe + 1;
944 else
945 hibound = probe;
946 }
947
948 if (lobound <= 0)
949 {
950 /*
951 * Constant is below lower histogram boundary. More
952 * precisely, we have found that no entry in the histogram
953 * satisfies the inequality clause (if !isgt) or they all do
954 * (if isgt). We estimate that that's true of the entire
955 * table, so set histfrac to 0.0 (which we'll flip to 1.0
956 * below, if isgt).
957 */
958 histfrac = 0.0;
959 }
960 else if (lobound >= sslot.nvalues)
961 {
962 /*
963 * Inverse case: constant is above upper histogram boundary.
964 */
965 histfrac = 1.0;
966 }
967 else
968 {
969 /* We have values[i-1] <= constant <= values[i]. */
970 int i = lobound;
971 double eq_selec = 0;
972 double val,
973 high,
974 low;
975 double binfrac;
976
977 /*
978 * In the cases where we'll need it below, obtain an estimate
979 * of the selectivity of "x = constval". We use a calculation
980 * similar to what var_eq_const() does for a non-MCV constant,
981 * ie, estimate that all distinct non-MCV values occur equally
982 * often. But multiplication by "1.0 - sumcommon - nullfrac"
983 * will be done by our caller, so we shouldn't do that here.
984 * Therefore we can't try to clamp the estimate by reference
985 * to the least common MCV; the result would be too small.
986 *
987 * Note: since this is effectively assuming that constval
988 * isn't an MCV, it's logically dubious if constval in fact is
989 * one. But we have to apply *some* correction for equality,
990 * and anyway we cannot tell if constval is an MCV, since we
991 * don't have a suitable equality operator at hand.
992 */
993 if (i == 1 || isgt == iseq)
994 {
995 double otherdistinct;
996 bool isdefault;
997 AttStatsSlot mcvslot;
998
999 /* Get estimated number of distinct values */
1000 otherdistinct = get_variable_numdistinct(vardata,
1001 &isdefault);
1002
1003 /* Subtract off the number of known MCVs */
1004 if (get_attstatsslot(&mcvslot, vardata->statsTuple,
1005 STATISTIC_KIND_MCV, InvalidOid,
1006 ATTSTATSSLOT_NUMBERS))
1007 {
1008 otherdistinct -= mcvslot.nnumbers;
1009 free_attstatsslot(&mcvslot);
1010 }
1011
1012 /* If result doesn't seem sane, leave eq_selec at 0 */
1013 if (otherdistinct > 1)
1014 eq_selec = 1.0 / otherdistinct;
1015 }
1016
1017 /*
1018 * Convert the constant and the two nearest bin boundary
1019 * values to a uniform comparison scale, and do a linear
1020 * interpolation within this bin.
1021 */
1022 if (convert_to_scalar(constval, consttype, sslot.stacoll,
1023 &val,
1024 sslot.values[i - 1], sslot.values[i],
1025 vardata->vartype,
1026 &low, &high))
1027 {
1028 if (high <= low)
1029 {
1030 /* cope if bin boundaries appear identical */
1031 binfrac = 0.5;
1032 }
1033 else if (val <= low)
1034 binfrac = 0.0;
1035 else if (val >= high)
1036 binfrac = 1.0;
1037 else
1038 {
1039 binfrac = (val - low) / (high - low);
1040
1041 /*
1042 * Watch out for the possibility that we got a NaN or
1043 * Infinity from the division. This can happen
1044 * despite the previous checks, if for example "low"
1045 * is -Infinity.
1046 */
1047 if (isnan(binfrac) ||
1048 binfrac < 0.0 || binfrac > 1.0)
1049 binfrac = 0.5;
1050 }
1051 }
1052 else
1053 {
1054 /*
1055 * Ideally we'd produce an error here, on the grounds that
1056 * the given operator shouldn't have scalarXXsel
1057 * registered as its selectivity func unless we can deal
1058 * with its operand types. But currently, all manner of
1059 * stuff is invoking scalarXXsel, so give a default
1060 * estimate until that can be fixed.
1061 */
1062 binfrac = 0.5;
1063 }
1064
1065 /*
1066 * Now, compute the overall selectivity across the values
1067 * represented by the histogram. We have i-1 full bins and
1068 * binfrac partial bin below the constant.
1069 */
1070 histfrac = (double) (i - 1) + binfrac;
1071 histfrac /= (double) (sslot.nvalues - 1);
1072
1073 /*
1074 * At this point, histfrac is an estimate of the fraction of
1075 * the population represented by the histogram that satisfies
1076 * "x <= constval". Somewhat remarkably, this statement is
1077 * true regardless of which operator we were doing the probes
1078 * with, so long as convert_to_scalar() delivers reasonable
1079 * results. If the probe constant is equal to some histogram
1080 * entry, we would have considered the bin to the left of that
1081 * entry if probing with "<" or ">=", or the bin to the right
1082 * if probing with "<=" or ">"; but binfrac would have come
1083 * out as 1.0 in the first case and 0.0 in the second, leading
1084 * to the same histfrac in either case. For probe constants
1085 * between histogram entries, we find the same bin and get the
1086 * same estimate with any operator.
1087 *
1088 * The fact that the estimate corresponds to "x <= constval"
1089 * and not "x < constval" is because of the way that ANALYZE
1090 * constructs the histogram: each entry is, effectively, the
1091 * rightmost value in its sample bucket. So selectivity
1092 * values that are exact multiples of 1/(histogram_size-1)
1093 * should be understood as estimates including a histogram
1094 * entry plus everything to its left.
1095 *
1096 * However, that breaks down for the first histogram entry,
1097 * which necessarily is the leftmost value in its sample
1098 * bucket. That means the first histogram bin is slightly
1099 * narrower than the rest, by an amount equal to eq_selec.
1100 * Another way to say that is that we want "x <= leftmost" to
1101 * be estimated as eq_selec not zero. So, if we're dealing
1102 * with the first bin (i==1), rescale to make that true while
1103 * adjusting the rest of that bin linearly.
1104 */
1105 if (i == 1)
1106 histfrac += eq_selec * (1.0 - binfrac);
1107
1108 /*
1109 * "x <= constval" is good if we want an estimate for "<=" or
1110 * ">", but if we are estimating for "<" or ">=", we now need
1111 * to decrease the estimate by eq_selec.
1112 */
1113 if (isgt == iseq)
1114 histfrac -= eq_selec;
1115 }
1116
1117 /*
1118 * Now the estimate is finished for "<" and "<=" cases. If we are
1119 * estimating for ">" or ">=", flip it.
1120 */
1121 hist_selec = isgt ? (1.0 - histfrac) : histfrac;
1122
1123 /*
1124 * The histogram boundaries are only approximate to begin with,
1125 * and may well be out of date anyway. Therefore, don't believe
1126 * extremely small or large selectivity estimates --- unless we
1127 * got actual current endpoint values from the table, in which
1128 * case just do the usual sanity clamp. Somewhat arbitrarily, we
1129 * set the cutoff for other cases at a hundredth of the histogram
1130 * resolution.
1131 */
1132 if (have_end)
1133 CLAMP_PROBABILITY(hist_selec);
1134 else
1135 {
1136 double cutoff = 0.01 / (double) (sslot.nvalues - 1);
1137
1138 if (hist_selec < cutoff)
1139 hist_selec = cutoff;
1140 else if (hist_selec > 1.0 - cutoff)
1141 hist_selec = 1.0 - cutoff;
1142 }
1143 }
1144
1145 free_attstatsslot(&sslot);
1146 }
1147
1148 return hist_selec;
1149}
1150
1151/*
1152 * Common wrapper function for the selectivity estimators that simply
1153 * invoke scalarineqsel().
1154 */
1155static Datum
1156scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq)
1157{
1158 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1159 Oid operator = PG_GETARG_OID(1);
1160 List *args = (List *) PG_GETARG_POINTER(2);
1161 int varRelid = PG_GETARG_INT32(3);
1162 VariableStatData vardata;
1163 Node *other;
1164 bool varonleft;
1165 Datum constval;
1166 Oid consttype;
1167 double selec;
1168
1169 /*
1170 * If expression is not variable op something or something op variable,
1171 * then punt and return a default estimate.
1172 */
1173 if (!get_restriction_variable(root, args, varRelid,
1174 &vardata, &other, &varonleft))
1175 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1176
1177 /*
1178 * Can't do anything useful if the something is not a constant, either.
1179 */
1180 if (!IsA(other, Const))
1181 {
1182 ReleaseVariableStats(vardata);
1183 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1184 }
1185
1186 /*
1187 * If the constant is NULL, assume operator is strict and return zero, ie,
1188 * operator will never return TRUE.
1189 */
1190 if (((Const *) other)->constisnull)
1191 {
1192 ReleaseVariableStats(vardata);
1193 PG_RETURN_FLOAT8(0.0);
1194 }
1195 constval = ((Const *) other)->constvalue;
1196 consttype = ((Const *) other)->consttype;
1197
1198 /*
1199 * Force the var to be on the left to simplify logic in scalarineqsel.
1200 */
1201 if (!varonleft)
1202 {
1203 operator = get_commutator(operator);
1204 if (!operator)
1205 {
1206 /* Use default selectivity (should we raise an error instead?) */
1207 ReleaseVariableStats(vardata);
1208 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1209 }
1210 isgt = !isgt;
1211 }
1212
1213 /* The rest of the work is done by scalarineqsel(). */
1214 selec = scalarineqsel(root, operator, isgt, iseq,
1215 &vardata, constval, consttype);
1216
1217 ReleaseVariableStats(vardata);
1218
1219 PG_RETURN_FLOAT8((float8) selec);
1220}
1221
1222/*
1223 * scalarltsel - Selectivity of "<" for scalars.
1224 */
1225Datum
1226scalarltsel(PG_FUNCTION_ARGS)
1227{
1228 return scalarineqsel_wrapper(fcinfo, false, false);
1229}
1230
1231/*
1232 * scalarlesel - Selectivity of "<=" for scalars.
1233 */
1234Datum
1235scalarlesel(PG_FUNCTION_ARGS)
1236{
1237 return scalarineqsel_wrapper(fcinfo, false, true);
1238}
1239
1240/*
1241 * scalargtsel - Selectivity of ">" for scalars.
1242 */
1243Datum
1244scalargtsel(PG_FUNCTION_ARGS)
1245{
1246 return scalarineqsel_wrapper(fcinfo, true, false);
1247}
1248
1249/*
1250 * scalargesel - Selectivity of ">=" for scalars.
1251 */
1252Datum
1253scalargesel(PG_FUNCTION_ARGS)
1254{
1255 return scalarineqsel_wrapper(fcinfo, true, true);
1256}
1257
1258/*
1259 * boolvarsel - Selectivity of Boolean variable.
1260 *
1261 * This can actually be called on any boolean-valued expression. If it
1262 * involves only Vars of the specified relation, and if there are statistics
1263 * about the Var or expression (the latter is possible if it's indexed) then
1264 * we'll produce a real estimate; otherwise it's just a default.
1265 */
1266Selectivity
1267boolvarsel(PlannerInfo *root, Node *arg, int varRelid)
1268{
1269 VariableStatData vardata;
1270 double selec;
1271
1272 examine_variable(root, arg, varRelid, &vardata);
1273 if (HeapTupleIsValid(vardata.statsTuple))
1274 {
1275 /*
1276 * A boolean variable V is equivalent to the clause V = 't', so we
1277 * compute the selectivity as if that is what we have.
1278 */
1279 selec = var_eq_const(&vardata, BooleanEqualOperator,
1280 BoolGetDatum(true), false, true, false);
1281 }
1282 else
1283 {
1284 /* Otherwise, the default estimate is 0.5 */
1285 selec = 0.5;
1286 }
1287 ReleaseVariableStats(vardata);
1288 return selec;
1289}
1290
1291/*
1292 * booltestsel - Selectivity of BooleanTest Node.
1293 */
1294Selectivity
1295booltestsel(PlannerInfo *root, BoolTestType booltesttype, Node *arg,
1296 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1297{
1298 VariableStatData vardata;
1299 double selec;
1300
1301 examine_variable(root, arg, varRelid, &vardata);
1302
1303 if (HeapTupleIsValid(vardata.statsTuple))
1304 {
1305 Form_pg_statistic stats;
1306 double freq_null;
1307 AttStatsSlot sslot;
1308
1309 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1310 freq_null = stats->stanullfrac;
1311
1312 if (get_attstatsslot(&sslot, vardata.statsTuple,
1313 STATISTIC_KIND_MCV, InvalidOid,
1314 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS)
1315 && sslot.nnumbers > 0)
1316 {
1317 double freq_true;
1318 double freq_false;
1319
1320 /*
1321 * Get first MCV frequency and derive frequency for true.
1322 */
1323 if (DatumGetBool(sslot.values[0]))
1324 freq_true = sslot.numbers[0];
1325 else
1326 freq_true = 1.0 - sslot.numbers[0] - freq_null;
1327
1328 /*
1329 * Next derive frequency for false. Then use these as appropriate
1330 * to derive frequency for each case.
1331 */
1332 freq_false = 1.0 - freq_true - freq_null;
1333
1334 switch (booltesttype)
1335 {
1336 case IS_UNKNOWN:
1337 /* select only NULL values */
1338 selec = freq_null;
1339 break;
1340 case IS_NOT_UNKNOWN:
1341 /* select non-NULL values */
1342 selec = 1.0 - freq_null;
1343 break;
1344 case IS_TRUE:
1345 /* select only TRUE values */
1346 selec = freq_true;
1347 break;
1348 case IS_NOT_TRUE:
1349 /* select non-TRUE values */
1350 selec = 1.0 - freq_true;
1351 break;
1352 case IS_FALSE:
1353 /* select only FALSE values */
1354 selec = freq_false;
1355 break;
1356 case IS_NOT_FALSE:
1357 /* select non-FALSE values */
1358 selec = 1.0 - freq_false;
1359 break;
1360 default:
1361 elog(ERROR, "unrecognized booltesttype: %d",
1362 (int) booltesttype);
1363 selec = 0.0; /* Keep compiler quiet */
1364 break;
1365 }
1366
1367 free_attstatsslot(&sslot);
1368 }
1369 else
1370 {
1371 /*
1372 * No most-common-value info available. Still have null fraction
1373 * information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1374 * for null fraction and assume a 50-50 split of TRUE and FALSE.
1375 */
1376 switch (booltesttype)
1377 {
1378 case IS_UNKNOWN:
1379 /* select only NULL values */
1380 selec = freq_null;
1381 break;
1382 case IS_NOT_UNKNOWN:
1383 /* select non-NULL values */
1384 selec = 1.0 - freq_null;
1385 break;
1386 case IS_TRUE:
1387 case IS_FALSE:
1388 /* Assume we select half of the non-NULL values */
1389 selec = (1.0 - freq_null) / 2.0;
1390 break;
1391 case IS_NOT_TRUE:
1392 case IS_NOT_FALSE:
1393 /* Assume we select NULLs plus half of the non-NULLs */
1394 /* equiv. to freq_null + (1.0 - freq_null) / 2.0 */
1395 selec = (freq_null + 1.0) / 2.0;
1396 break;
1397 default:
1398 elog(ERROR, "unrecognized booltesttype: %d",
1399 (int) booltesttype);
1400 selec = 0.0; /* Keep compiler quiet */
1401 break;
1402 }
1403 }
1404 }
1405 else
1406 {
1407 /*
1408 * If we can't get variable statistics for the argument, perhaps
1409 * clause_selectivity can do something with it. We ignore the
1410 * possibility of a NULL value when using clause_selectivity, and just
1411 * assume the value is either TRUE or FALSE.
1412 */
1413 switch (booltesttype)
1414 {
1415 case IS_UNKNOWN:
1416 selec = DEFAULT_UNK_SEL;
1417 break;
1418 case IS_NOT_UNKNOWN:
1419 selec = DEFAULT_NOT_UNK_SEL;
1420 break;
1421 case IS_TRUE:
1422 case IS_NOT_FALSE:
1423 selec = (double) clause_selectivity(root, arg,
1424 varRelid,
1425 jointype, sjinfo);
1426 break;
1427 case IS_FALSE:
1428 case IS_NOT_TRUE:
1429 selec = 1.0 - (double) clause_selectivity(root, arg,
1430 varRelid,
1431 jointype, sjinfo);
1432 break;
1433 default:
1434 elog(ERROR, "unrecognized booltesttype: %d",
1435 (int) booltesttype);
1436 selec = 0.0; /* Keep compiler quiet */
1437 break;
1438 }
1439 }
1440
1441 ReleaseVariableStats(vardata);
1442
1443 /* result should be in range, but make sure... */
1444 CLAMP_PROBABILITY(selec);
1445
1446 return (Selectivity) selec;
1447}
1448
1449/*
1450 * nulltestsel - Selectivity of NullTest Node.
1451 */
1452Selectivity
1453nulltestsel(PlannerInfo *root, NullTestType nulltesttype, Node *arg,
1454 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1455{
1456 VariableStatData vardata;
1457 double selec;
1458
1459 examine_variable(root, arg, varRelid, &vardata);
1460
1461 if (HeapTupleIsValid(vardata.statsTuple))
1462 {
1463 Form_pg_statistic stats;
1464 double freq_null;
1465
1466 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1467 freq_null = stats->stanullfrac;
1468
1469 switch (nulltesttype)
1470 {
1471 case IS_NULL:
1472
1473 /*
1474 * Use freq_null directly.
1475 */
1476 selec = freq_null;
1477 break;
1478 case IS_NOT_NULL:
1479
1480 /*
1481 * Select not unknown (not null) values. Calculate from
1482 * freq_null.
1483 */
1484 selec = 1.0 - freq_null;
1485 break;
1486 default:
1487 elog(ERROR, "unrecognized nulltesttype: %d",
1488 (int) nulltesttype);
1489 return (Selectivity) 0; /* keep compiler quiet */
1490 }
1491 }
1492 else if (vardata.var && IsA(vardata.var, Var) &&
1493 ((Var *) vardata.var)->varattno < 0)
1494 {
1495 /*
1496 * There are no stats for system columns, but we know they are never
1497 * NULL.
1498 */
1499 selec = (nulltesttype == IS_NULL) ? 0.0 : 1.0;
1500 }
1501 else
1502 {
1503 /*
1504 * No ANALYZE stats available, so make a guess
1505 */
1506 switch (nulltesttype)
1507 {
1508 case IS_NULL:
1509 selec = DEFAULT_UNK_SEL;
1510 break;
1511 case IS_NOT_NULL:
1512 selec = DEFAULT_NOT_UNK_SEL;
1513 break;
1514 default:
1515 elog(ERROR, "unrecognized nulltesttype: %d",
1516 (int) nulltesttype);
1517 return (Selectivity) 0; /* keep compiler quiet */
1518 }
1519 }
1520
1521 ReleaseVariableStats(vardata);
1522
1523 /* result should be in range, but make sure... */
1524 CLAMP_PROBABILITY(selec);
1525
1526 return (Selectivity) selec;
1527}
1528
1529/*
1530 * strip_array_coercion - strip binary-compatible relabeling from an array expr
1531 *
1532 * For array values, the parser normally generates ArrayCoerceExpr conversions,
1533 * but it seems possible that RelabelType might show up. Also, the planner
1534 * is not currently tense about collapsing stacked ArrayCoerceExpr nodes,
1535 * so we need to be ready to deal with more than one level.
1536 */
1537static Node *
1538strip_array_coercion(Node *node)
1539{
1540 for (;;)
1541 {
1542 if (node && IsA(node, ArrayCoerceExpr))
1543 {
1544 ArrayCoerceExpr *acoerce = (ArrayCoerceExpr *) node;
1545
1546 /*
1547 * If the per-element expression is just a RelabelType on top of
1548 * CaseTestExpr, then we know it's a binary-compatible relabeling.
1549 */
1550 if (IsA(acoerce->elemexpr, RelabelType) &&
1551 IsA(((RelabelType *) acoerce->elemexpr)->arg, CaseTestExpr))
1552 node = (Node *) acoerce->arg;
1553 else
1554 break;
1555 }
1556 else if (node && IsA(node, RelabelType))
1557 {
1558 /* We don't really expect this case, but may as well cope */
1559 node = (Node *) ((RelabelType *) node)->arg;
1560 }
1561 else
1562 break;
1563 }
1564 return node;
1565}
1566
1567/*
1568 * scalararraysel - Selectivity of ScalarArrayOpExpr Node.
1569 */
1570Selectivity
1571scalararraysel(PlannerInfo *root,
1572 ScalarArrayOpExpr *clause,
1573 bool is_join_clause,
1574 int varRelid,
1575 JoinType jointype,
1576 SpecialJoinInfo *sjinfo)
1577{
1578 Oid operator = clause->opno;
1579 bool useOr = clause->useOr;
1580 bool isEquality = false;
1581 bool isInequality = false;
1582 Node *leftop;
1583 Node *rightop;
1584 Oid nominal_element_type;
1585 Oid nominal_element_collation;
1586 TypeCacheEntry *typentry;
1587 RegProcedure oprsel;
1588 FmgrInfo oprselproc;
1589 Selectivity s1;
1590 Selectivity s1disjoint;
1591
1592 /* First, deconstruct the expression */
1593 Assert(list_length(clause->args) == 2);
1594 leftop = (Node *) linitial(clause->args);
1595 rightop = (Node *) lsecond(clause->args);
1596
1597 /* aggressively reduce both sides to constants */
1598 leftop = estimate_expression_value(root, leftop);
1599 rightop = estimate_expression_value(root, rightop);
1600
1601 /* get nominal (after relabeling) element type of rightop */
1602 nominal_element_type = get_base_element_type(exprType(rightop));
1603 if (!OidIsValid(nominal_element_type))
1604 return (Selectivity) 0.5; /* probably shouldn't happen */
1605 /* get nominal collation, too, for generating constants */
1606 nominal_element_collation = exprCollation(rightop);
1607
1608 /* look through any binary-compatible relabeling of rightop */
1609 rightop = strip_array_coercion(rightop);
1610
1611 /*
1612 * Detect whether the operator is the default equality or inequality
1613 * operator of the array element type.
1614 */
1615 typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
1616 if (OidIsValid(typentry->eq_opr))
1617 {
1618 if (operator == typentry->eq_opr)
1619 isEquality = true;
1620 else if (get_negator(operator) == typentry->eq_opr)
1621 isInequality = true;
1622 }
1623
1624 /*
1625 * If it is equality or inequality, we might be able to estimate this as a
1626 * form of array containment; for instance "const = ANY(column)" can be
1627 * treated as "ARRAY[const] <@ column". scalararraysel_containment tries
1628 * that, and returns the selectivity estimate if successful, or -1 if not.
1629 */
1630 if ((isEquality || isInequality) && !is_join_clause)
1631 {
1632 s1 = scalararraysel_containment(root, leftop, rightop,
1633 nominal_element_type,
1634 isEquality, useOr, varRelid);
1635 if (s1 >= 0.0)
1636 return s1;
1637 }
1638
1639 /*
1640 * Look up the underlying operator's selectivity estimator. Punt if it
1641 * hasn't got one.
1642 */
1643 if (is_join_clause)
1644 oprsel = get_oprjoin(operator);
1645 else
1646 oprsel = get_oprrest(operator);
1647 if (!oprsel)
1648 return (Selectivity) 0.5;
1649 fmgr_info(oprsel, &oprselproc);
1650
1651 /*
1652 * In the array-containment check above, we must only believe that an
1653 * operator is equality or inequality if it is the default btree equality
1654 * operator (or its negator) for the element type, since those are the
1655 * operators that array containment will use. But in what follows, we can
1656 * be a little laxer, and also believe that any operators using eqsel() or
1657 * neqsel() as selectivity estimator act like equality or inequality.
1658 */
1659 if (oprsel == F_EQSEL || oprsel == F_EQJOINSEL)
1660 isEquality = true;
1661 else if (oprsel == F_NEQSEL || oprsel == F_NEQJOINSEL)
1662 isInequality = true;
1663
1664 /*
1665 * We consider three cases:
1666 *
1667 * 1. rightop is an Array constant: deconstruct the array, apply the
1668 * operator's selectivity function for each array element, and merge the
1669 * results in the same way that clausesel.c does for AND/OR combinations.
1670 *
1671 * 2. rightop is an ARRAY[] construct: apply the operator's selectivity
1672 * function for each element of the ARRAY[] construct, and merge.
1673 *
1674 * 3. otherwise, make a guess ...
1675 */
1676 if (rightop && IsA(rightop, Const))
1677 {
1678 Datum arraydatum = ((Const *) rightop)->constvalue;
1679 bool arrayisnull = ((Const *) rightop)->constisnull;
1680 ArrayType *arrayval;
1681 int16 elmlen;
1682 bool elmbyval;
1683 char elmalign;
1684 int num_elems;
1685 Datum *elem_values;
1686 bool *elem_nulls;
1687 int i;
1688
1689 if (arrayisnull) /* qual can't succeed if null array */
1690 return (Selectivity) 0.0;
1691 arrayval = DatumGetArrayTypeP(arraydatum);
1692 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
1693 &elmlen, &elmbyval, &elmalign);
1694 deconstruct_array(arrayval,
1695 ARR_ELEMTYPE(arrayval),
1696 elmlen, elmbyval, elmalign,
1697 &elem_values, &elem_nulls, &num_elems);
1698
1699 /*
1700 * For generic operators, we assume the probability of success is
1701 * independent for each array element. But for "= ANY" or "<> ALL",
1702 * if the array elements are distinct (which'd typically be the case)
1703 * then the probabilities are disjoint, and we should just sum them.
1704 *
1705 * If we were being really tense we would try to confirm that the
1706 * elements are all distinct, but that would be expensive and it
1707 * doesn't seem to be worth the cycles; it would amount to penalizing
1708 * well-written queries in favor of poorly-written ones. However, we
1709 * do protect ourselves a little bit by checking whether the
1710 * disjointness assumption leads to an impossible (out of range)
1711 * probability; if so, we fall back to the normal calculation.
1712 */
1713 s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1714
1715 for (i = 0; i < num_elems; i++)
1716 {
1717 List *args;
1718 Selectivity s2;
1719
1720 args = list_make2(leftop,
1721 makeConst(nominal_element_type,
1722 -1,
1723 nominal_element_collation,
1724 elmlen,
1725 elem_values[i],
1726 elem_nulls[i],
1727 elmbyval));
1728 if (is_join_clause)
1729 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1730 clause->inputcollid,
1731 PointerGetDatum(root),
1732 ObjectIdGetDatum(operator),
1733 PointerGetDatum(args),
1734 Int16GetDatum(jointype),
1735 PointerGetDatum(sjinfo)));
1736 else
1737 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1738 clause->inputcollid,
1739 PointerGetDatum(root),
1740 ObjectIdGetDatum(operator),
1741 PointerGetDatum(args),
1742 Int32GetDatum(varRelid)));
1743
1744 if (useOr)
1745 {
1746 s1 = s1 + s2 - s1 * s2;
1747 if (isEquality)
1748 s1disjoint += s2;
1749 }
1750 else
1751 {
1752 s1 = s1 * s2;
1753 if (isInequality)
1754 s1disjoint += s2 - 1.0;
1755 }
1756 }
1757
1758 /* accept disjoint-probability estimate if in range */
1759 if ((useOr ? isEquality : isInequality) &&
1760 s1disjoint >= 0.0 && s1disjoint <= 1.0)
1761 s1 = s1disjoint;
1762 }
1763 else if (rightop && IsA(rightop, ArrayExpr) &&
1764 !((ArrayExpr *) rightop)->multidims)
1765 {
1766 ArrayExpr *arrayexpr = (ArrayExpr *) rightop;
1767 int16 elmlen;
1768 bool elmbyval;
1769 ListCell *l;
1770
1771 get_typlenbyval(arrayexpr->element_typeid,
1772 &elmlen, &elmbyval);
1773
1774 /*
1775 * We use the assumption of disjoint probabilities here too, although
1776 * the odds of equal array elements are rather higher if the elements
1777 * are not all constants (which they won't be, else constant folding
1778 * would have reduced the ArrayExpr to a Const). In this path it's
1779 * critical to have the sanity check on the s1disjoint estimate.
1780 */
1781 s1 = s1disjoint = (useOr ? 0.0 : 1.0);
1782
1783 foreach(l, arrayexpr->elements)
1784 {
1785 Node *elem = (Node *) lfirst(l);
1786 List *args;
1787 Selectivity s2;
1788
1789 /*
1790 * Theoretically, if elem isn't of nominal_element_type we should
1791 * insert a RelabelType, but it seems unlikely that any operator
1792 * estimation function would really care ...
1793 */
1794 args = list_make2(leftop, elem);
1795 if (is_join_clause)
1796 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1797 clause->inputcollid,
1798 PointerGetDatum(root),
1799 ObjectIdGetDatum(operator),
1800 PointerGetDatum(args),
1801 Int16GetDatum(jointype),
1802 PointerGetDatum(sjinfo)));
1803 else
1804 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1805 clause->inputcollid,
1806 PointerGetDatum(root),
1807 ObjectIdGetDatum(operator),
1808 PointerGetDatum(args),
1809 Int32GetDatum(varRelid)));
1810
1811 if (useOr)
1812 {
1813 s1 = s1 + s2 - s1 * s2;
1814 if (isEquality)
1815 s1disjoint += s2;
1816 }
1817 else
1818 {
1819 s1 = s1 * s2;
1820 if (isInequality)
1821 s1disjoint += s2 - 1.0;
1822 }
1823 }
1824
1825 /* accept disjoint-probability estimate if in range */
1826 if ((useOr ? isEquality : isInequality) &&
1827 s1disjoint >= 0.0 && s1disjoint <= 1.0)
1828 s1 = s1disjoint;
1829 }
1830 else
1831 {
1832 CaseTestExpr *dummyexpr;
1833 List *args;
1834 Selectivity s2;
1835 int i;
1836
1837 /*
1838 * We need a dummy rightop to pass to the operator selectivity
1839 * routine. It can be pretty much anything that doesn't look like a
1840 * constant; CaseTestExpr is a convenient choice.
1841 */
1842 dummyexpr = makeNode(CaseTestExpr);
1843 dummyexpr->typeId = nominal_element_type;
1844 dummyexpr->typeMod = -1;
1845 dummyexpr->collation = clause->inputcollid;
1846 args = list_make2(leftop, dummyexpr);
1847 if (is_join_clause)
1848 s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1849 clause->inputcollid,
1850 PointerGetDatum(root),
1851 ObjectIdGetDatum(operator),
1852 PointerGetDatum(args),
1853 Int16GetDatum(jointype),
1854 PointerGetDatum(sjinfo)));
1855 else
1856 s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1857 clause->inputcollid,
1858 PointerGetDatum(root),
1859 ObjectIdGetDatum(operator),
1860 PointerGetDatum(args),
1861 Int32GetDatum(varRelid)));
1862 s1 = useOr ? 0.0 : 1.0;
1863
1864 /*
1865 * Arbitrarily assume 10 elements in the eventual array value (see
1866 * also estimate_array_length). We don't risk an assumption of
1867 * disjoint probabilities here.
1868 */
1869 for (i = 0; i < 10; i++)
1870 {
1871 if (useOr)
1872 s1 = s1 + s2 - s1 * s2;
1873 else
1874 s1 = s1 * s2;
1875 }
1876 }
1877
1878 /* result should be in range, but make sure... */
1879 CLAMP_PROBABILITY(s1);
1880
1881 return s1;
1882}
1883
1884/*
1885 * Estimate number of elements in the array yielded by an expression.
1886 *
1887 * It's important that this agree with scalararraysel.
1888 */
1889int
1890estimate_array_length(Node *arrayexpr)
1891{
1892 /* look through any binary-compatible relabeling of arrayexpr */
1893 arrayexpr = strip_array_coercion(arrayexpr);
1894
1895 if (arrayexpr && IsA(arrayexpr, Const))
1896 {
1897 Datum arraydatum = ((Const *) arrayexpr)->constvalue;
1898 bool arrayisnull = ((Const *) arrayexpr)->constisnull;
1899 ArrayType *arrayval;
1900
1901 if (arrayisnull)
1902 return 0;
1903 arrayval = DatumGetArrayTypeP(arraydatum);
1904 return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
1905 }
1906 else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
1907 !((ArrayExpr *) arrayexpr)->multidims)
1908 {
1909 return list_length(((ArrayExpr *) arrayexpr)->elements);
1910 }
1911 else
1912 {
1913 /* default guess --- see also scalararraysel */
1914 return 10;
1915 }
1916}
1917
1918/*
1919 * rowcomparesel - Selectivity of RowCompareExpr Node.
1920 *
1921 * We estimate RowCompare selectivity by considering just the first (high
1922 * order) columns, which makes it equivalent to an ordinary OpExpr. While
1923 * this estimate could be refined by considering additional columns, it
1924 * seems unlikely that we could do a lot better without multi-column
1925 * statistics.
1926 */
1927Selectivity
1928rowcomparesel(PlannerInfo *root,
1929 RowCompareExpr *clause,
1930 int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1931{
1932 Selectivity s1;
1933 Oid opno = linitial_oid(clause->opnos);
1934 Oid inputcollid = linitial_oid(clause->inputcollids);
1935 List *opargs;
1936 bool is_join_clause;
1937
1938 /* Build equivalent arg list for single operator */
1939 opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
1940
1941 /*
1942 * Decide if it's a join clause. This should match clausesel.c's
1943 * treat_as_join_clause(), except that we intentionally consider only the
1944 * leading columns and not the rest of the clause.
1945 */
1946 if (varRelid != 0)
1947 {
1948 /*
1949 * Caller is forcing restriction mode (eg, because we are examining an
1950 * inner indexscan qual).
1951 */
1952 is_join_clause = false;
1953 }
1954 else if (sjinfo == NULL)
1955 {
1956 /*
1957 * It must be a restriction clause, since it's being evaluated at a
1958 * scan node.
1959 */
1960 is_join_clause = false;
1961 }
1962 else
1963 {
1964 /*
1965 * Otherwise, it's a join if there's more than one relation used.
1966 */
1967 is_join_clause = (NumRelids((Node *) opargs) > 1);
1968 }
1969
1970 if (is_join_clause)
1971 {
1972 /* Estimate selectivity for a join clause. */
1973 s1 = join_selectivity(root, opno,
1974 opargs,
1975 inputcollid,
1976 jointype,
1977 sjinfo);
1978 }
1979 else
1980 {
1981 /* Estimate selectivity for a restriction clause. */
1982 s1 = restriction_selectivity(root, opno,
1983 opargs,
1984 inputcollid,
1985 varRelid);
1986 }
1987
1988 return s1;
1989}
1990
1991/*
1992 * eqjoinsel - Join selectivity of "="
1993 */
1994Datum
1995eqjoinsel(PG_FUNCTION_ARGS)
1996{
1997 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
1998 Oid operator = PG_GETARG_OID(1);
1999 List *args = (List *) PG_GETARG_POINTER(2);
2000
2001#ifdef NOT_USED
2002 JoinType jointype = (JoinType) PG_GETARG_INT16(3);
2003#endif
2004 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
2005 double selec;
2006 double selec_inner;
2007 VariableStatData vardata1;
2008 VariableStatData vardata2;
2009 double nd1;
2010 double nd2;
2011 bool isdefault1;
2012 bool isdefault2;
2013 Oid opfuncoid;
2014 AttStatsSlot sslot1;
2015 AttStatsSlot sslot2;
2016 Form_pg_statistic stats1 = NULL;
2017 Form_pg_statistic stats2 = NULL;
2018 bool have_mcvs1 = false;
2019 bool have_mcvs2 = false;
2020 bool join_is_reversed;
2021 RelOptInfo *inner_rel;
2022
2023 get_join_variables(root, args, sjinfo,
2024 &vardata1, &vardata2, &join_is_reversed);
2025
2026 nd1 = get_variable_numdistinct(&vardata1, &isdefault1);
2027 nd2 = get_variable_numdistinct(&vardata2, &isdefault2);
2028
2029 opfuncoid = get_opcode(operator);
2030
2031 memset(&sslot1, 0, sizeof(sslot1));
2032 memset(&sslot2, 0, sizeof(sslot2));
2033
2034 if (HeapTupleIsValid(vardata1.statsTuple))
2035 {
2036 /* note we allow use of nullfrac regardless of security check */
2037 stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
2038 if (statistic_proc_security_check(&vardata1, opfuncoid))
2039 have_mcvs1 = get_attstatsslot(&sslot1, vardata1.statsTuple,
2040 STATISTIC_KIND_MCV, InvalidOid,
2041 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
2042 }
2043
2044 if (HeapTupleIsValid(vardata2.statsTuple))
2045 {
2046 /* note we allow use of nullfrac regardless of security check */
2047 stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
2048 if (statistic_proc_security_check(&vardata2, opfuncoid))
2049 have_mcvs2 = get_attstatsslot(&sslot2, vardata2.statsTuple,
2050 STATISTIC_KIND_MCV, InvalidOid,
2051 ATTSTATSSLOT_VALUES | ATTSTATSSLOT_NUMBERS);
2052 }
2053
2054 /* We need to compute the inner-join selectivity in all cases */
2055 selec_inner = eqjoinsel_inner(opfuncoid,
2056 &vardata1, &vardata2,
2057 nd1, nd2,
2058 isdefault1, isdefault2,
2059 &sslot1, &sslot2,
2060 stats1, stats2,
2061 have_mcvs1, have_mcvs2);
2062
2063 switch (sjinfo->jointype)
2064 {
2065 case JOIN_INNER:
2066 case JOIN_LEFT:
2067 case JOIN_FULL:
2068 selec = selec_inner;
2069 break;
2070 case JOIN_SEMI:
2071 case JOIN_ANTI:
2072
2073 /*
2074 * Look up the join's inner relation. min_righthand is sufficient
2075 * information because neither SEMI nor ANTI joins permit any
2076 * reassociation into or out of their RHS, so the righthand will
2077 * always be exactly that set of rels.
2078 */
2079 inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
2080
2081 if (!join_is_reversed)
2082 selec = eqjoinsel_semi(opfuncoid,
2083 &vardata1, &vardata2,
2084 nd1, nd2,
2085 isdefault1, isdefault2,
2086 &sslot1, &sslot2,
2087 stats1, stats2,
2088 have_mcvs1, have_mcvs2,
2089 inner_rel);
2090 else
2091 {
2092 Oid commop = get_commutator(operator);
2093 Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
2094
2095 selec = eqjoinsel_semi(commopfuncoid,
2096 &vardata2, &vardata1,
2097 nd2, nd1,
2098 isdefault2, isdefault1,
2099 &sslot2, &sslot1,
2100 stats2, stats1,
2101 have_mcvs2, have_mcvs1,
2102 inner_rel);
2103 }
2104
2105 /*
2106 * We should never estimate the output of a semijoin to be more
2107 * rows than we estimate for an inner join with the same input
2108 * rels and join condition; it's obviously impossible for that to
2109 * happen. The former estimate is N1 * Ssemi while the latter is
2110 * N1 * N2 * Sinner, so we may clamp Ssemi <= N2 * Sinner. Doing
2111 * this is worthwhile because of the shakier estimation rules we
2112 * use in eqjoinsel_semi, particularly in cases where it has to
2113 * punt entirely.
2114 */
2115 selec = Min(selec, inner_rel->rows * selec_inner);
2116 break;
2117 default:
2118 /* other values not expected here */
2119 elog(ERROR, "unrecognized join type: %d",
2120 (int) sjinfo->jointype);
2121 selec = 0; /* keep compiler quiet */
2122 break;
2123 }
2124
2125 free_attstatsslot(&sslot1);
2126 free_attstatsslot(&sslot2);
2127
2128 ReleaseVariableStats(vardata1);
2129 ReleaseVariableStats(vardata2);
2130
2131 CLAMP_PROBABILITY(selec);
2132
2133 PG_RETURN_FLOAT8((float8) selec);
2134}
2135
2136/*
2137 * eqjoinsel_inner --- eqjoinsel for normal inner join
2138 *
2139 * We also use this for LEFT/FULL outer joins; it's not presently clear
2140 * that it's worth trying to distinguish them here.
2141 */
2142static double
2143eqjoinsel_inner(Oid opfuncoid,
2144 VariableStatData *vardata1, VariableStatData *vardata2,
2145 double nd1, double nd2,
2146 bool isdefault1, bool isdefault2,
2147 AttStatsSlot *sslot1, AttStatsSlot *sslot2,
2148 Form_pg_statistic stats1, Form_pg_statistic stats2,
2149 bool have_mcvs1, bool have_mcvs2)
2150{
2151 double selec;
2152
2153 if (have_mcvs1 && have_mcvs2)
2154 {
2155 /*
2156 * We have most-common-value lists for both relations. Run through
2157 * the lists to see which MCVs actually join to each other with the
2158 * given operator. This allows us to determine the exact join
2159 * selectivity for the portion of the relations represented by the MCV
2160 * lists. We still have to estimate for the remaining population, but
2161 * in a skewed distribution this gives us a big leg up in accuracy.
2162 * For motivation see the analysis in Y. Ioannidis and S.
2163 * Christodoulakis, "On the propagation of errors in the size of join
2164 * results", Technical Report 1018, Computer Science Dept., University
2165 * of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
2166 */
2167 FmgrInfo eqproc;
2168 bool *hasmatch1;
2169 bool *hasmatch2;
2170 double nullfrac1 = stats1->stanullfrac;
2171 double nullfrac2 = stats2->stanullfrac;
2172 double matchprodfreq,
2173 matchfreq1,
2174 matchfreq2,
2175 unmatchfreq1,
2176 unmatchfreq2,
2177 otherfreq1,
2178 otherfreq2,
2179 totalsel1,
2180 totalsel2;
2181 int i,
2182 nmatches;
2183
2184 fmgr_info(opfuncoid, &eqproc);
2185 hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
2186 hasmatch2 = (bool *) palloc0(sslot2->nvalues * sizeof(bool));
2187
2188 /*
2189 * Note we assume that each MCV will match at most one member of the
2190 * other MCV list. If the operator isn't really equality, there could
2191 * be multiple matches --- but we don't look for them, both for speed
2192 * and because the math wouldn't add up...
2193 */
2194 matchprodfreq = 0.0;
2195 nmatches = 0;
2196 for (i = 0; i < sslot1->nvalues; i++)
2197 {
2198 int j;
2199
2200 for (j = 0; j < sslot2->nvalues; j++)
2201 {
2202 if (hasmatch2[j])
2203 continue;
2204 if (DatumGetBool(FunctionCall2Coll(&eqproc,
2205 sslot1->stacoll,
2206 sslot1->values[i],
2207 sslot2->values[j])))
2208 {
2209 hasmatch1[i] = hasmatch2[j] = true;
2210 matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j];
2211 nmatches++;
2212 break;
2213 }
2214 }
2215 }
2216 CLAMP_PROBABILITY(matchprodfreq);
2217 /* Sum up frequencies of matched and unmatched MCVs */
2218 matchfreq1 = unmatchfreq1 = 0.0;
2219 for (i = 0; i < sslot1->nvalues; i++)
2220 {
2221 if (hasmatch1[i])
2222 matchfreq1 += sslot1->numbers[i];
2223 else
2224 unmatchfreq1 += sslot1->numbers[i];
2225 }
2226 CLAMP_PROBABILITY(matchfreq1);
2227 CLAMP_PROBABILITY(unmatchfreq1);
2228 matchfreq2 = unmatchfreq2 = 0.0;
2229 for (i = 0; i < sslot2->nvalues; i++)
2230 {
2231 if (hasmatch2[i])
2232 matchfreq2 += sslot2->numbers[i];
2233 else
2234 unmatchfreq2 += sslot2->numbers[i];
2235 }
2236 CLAMP_PROBABILITY(matchfreq2);
2237 CLAMP_PROBABILITY(unmatchfreq2);
2238 pfree(hasmatch1);
2239 pfree(hasmatch2);
2240
2241 /*
2242 * Compute total frequency of non-null values that are not in the MCV
2243 * lists.
2244 */
2245 otherfreq1 = 1.0 - nullfrac1 - matchfreq1 - unmatchfreq1;
2246 otherfreq2 = 1.0 - nullfrac2 - matchfreq2 - unmatchfreq2;
2247 CLAMP_PROBABILITY(otherfreq1);
2248 CLAMP_PROBABILITY(otherfreq2);
2249
2250 /*
2251 * We can estimate the total selectivity from the point of view of
2252 * relation 1 as: the known selectivity for matched MCVs, plus
2253 * unmatched MCVs that are assumed to match against random members of
2254 * relation 2's non-MCV population, plus non-MCV values that are
2255 * assumed to match against random members of relation 2's unmatched
2256 * MCVs plus non-MCV values.
2257 */
2258 totalsel1 = matchprodfreq;
2259 if (nd2 > sslot2->nvalues)
2260 totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2->nvalues);
2261 if (nd2 > nmatches)
2262 totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
2263 (nd2 - nmatches);
2264 /* Same estimate from the point of view of relation 2. */
2265 totalsel2 = matchprodfreq;
2266 if (nd1 > sslot1->nvalues)
2267 totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1->nvalues);
2268 if (nd1 > nmatches)
2269 totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
2270 (nd1 - nmatches);
2271
2272 /*
2273 * Use the smaller of the two estimates. This can be justified in
2274 * essentially the same terms as given below for the no-stats case: to
2275 * a first approximation, we are estimating from the point of view of
2276 * the relation with smaller nd.
2277 */
2278 selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
2279 }
2280 else
2281 {
2282 /*
2283 * We do not have MCV lists for both sides. Estimate the join
2284 * selectivity as MIN(1/nd1,1/nd2)*(1-nullfrac1)*(1-nullfrac2). This
2285 * is plausible if we assume that the join operator is strict and the
2286 * non-null values are about equally distributed: a given non-null
2287 * tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows
2288 * of rel2, so total join rows are at most
2289 * N1*(1-nullfrac1)*N2*(1-nullfrac2)/nd2 giving a join selectivity of
2290 * not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it
2291 * is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression
2292 * with MIN() is an upper bound. Using the MIN() means we estimate
2293 * from the point of view of the relation with smaller nd (since the
2294 * larger nd is determining the MIN). It is reasonable to assume that
2295 * most tuples in this rel will have join partners, so the bound is
2296 * probably reasonably tight and should be taken as-is.
2297 *
2298 * XXX Can we be smarter if we have an MCV list for just one side? It
2299 * seems that if we assume equal distribution for the other side, we
2300 * end up with the same answer anyway.
2301 */
2302 double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2303 double nullfrac2 = stats2 ? stats2->stanullfrac : 0.0;
2304
2305 selec = (1.0 - nullfrac1) * (1.0 - nullfrac2);
2306 if (nd1 > nd2)
2307 selec /= nd1;
2308 else
2309 selec /= nd2;
2310 }
2311
2312 return selec;
2313}
2314
2315/*
2316 * eqjoinsel_semi --- eqjoinsel for semi join
2317 *
2318 * (Also used for anti join, which we are supposed to estimate the same way.)
2319 * Caller has ensured that vardata1 is the LHS variable.
2320 * Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid.
2321 */
2322static double
2323eqjoinsel_semi(Oid opfuncoid,
2324 VariableStatData *vardata1, VariableStatData *vardata2,
2325 double nd1, double nd2,
2326 bool isdefault1, bool isdefault2,
2327 AttStatsSlot *sslot1, AttStatsSlot *sslot2,
2328 Form_pg_statistic stats1, Form_pg_statistic stats2,
2329 bool have_mcvs1, bool have_mcvs2,
2330 RelOptInfo *inner_rel)
2331{
2332 double selec;
2333
2334 /*
2335 * We clamp nd2 to be not more than what we estimate the inner relation's
2336 * size to be. This is intuitively somewhat reasonable since obviously
2337 * there can't be more than that many distinct values coming from the
2338 * inner rel. The reason for the asymmetry (ie, that we don't clamp nd1
2339 * likewise) is that this is the only pathway by which restriction clauses
2340 * applied to the inner rel will affect the join result size estimate,
2341 * since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
2342 * only the outer rel's size. If we clamped nd1 we'd be double-counting
2343 * the selectivity of outer-rel restrictions.
2344 *
2345 * We can apply this clamping both with respect to the base relation from
2346 * which the join variable comes (if there is just one), and to the
2347 * immediate inner input relation of the current join.
2348 *
2349 * If we clamp, we can treat nd2 as being a non-default estimate; it's not
2350 * great, maybe, but it didn't come out of nowhere either. This is most
2351 * helpful when the inner relation is empty and consequently has no stats.
2352 */
2353 if (vardata2->rel)
2354 {
2355 if (nd2 >= vardata2->rel->rows)
2356 {
2357 nd2 = vardata2->rel->rows;
2358 isdefault2 = false;
2359 }
2360 }
2361 if (nd2 >= inner_rel->rows)
2362 {
2363 nd2 = inner_rel->rows;
2364 isdefault2 = false;
2365 }
2366
2367 if (have_mcvs1 && have_mcvs2 && OidIsValid(opfuncoid))
2368 {
2369 /*
2370 * We have most-common-value lists for both relations. Run through
2371 * the lists to see which MCVs actually join to each other with the
2372 * given operator. This allows us to determine the exact join
2373 * selectivity for the portion of the relations represented by the MCV
2374 * lists. We still have to estimate for the remaining population, but
2375 * in a skewed distribution this gives us a big leg up in accuracy.
2376 */
2377 FmgrInfo eqproc;
2378 bool *hasmatch1;
2379 bool *hasmatch2;
2380 double nullfrac1 = stats1->stanullfrac;
2381 double matchfreq1,
2382 uncertainfrac,
2383 uncertain;
2384 int i,
2385 nmatches,
2386 clamped_nvalues2;
2387
2388 /*
2389 * The clamping above could have resulted in nd2 being less than
2390 * sslot2->nvalues; in which case, we assume that precisely the nd2
2391 * most common values in the relation will appear in the join input,
2392 * and so compare to only the first nd2 members of the MCV list. Of
2393 * course this is frequently wrong, but it's the best bet we can make.
2394 */
2395 clamped_nvalues2 = Min(sslot2->nvalues, nd2);
2396
2397 fmgr_info(opfuncoid, &eqproc);
2398 hasmatch1 = (bool *) palloc0(sslot1->nvalues * sizeof(bool));
2399 hasmatch2 = (bool *) palloc0(clamped_nvalues2 * sizeof(bool));
2400
2401 /*
2402 * Note we assume that each MCV will match at most one member of the
2403 * other MCV list. If the operator isn't really equality, there could
2404 * be multiple matches --- but we don't look for them, both for speed
2405 * and because the math wouldn't add up...
2406 */
2407 nmatches = 0;
2408 for (i = 0; i < sslot1->nvalues; i++)
2409 {
2410 int j;
2411
2412 for (j = 0; j < clamped_nvalues2; j++)
2413 {
2414 if (hasmatch2[j])
2415 continue;
2416 if (DatumGetBool(FunctionCall2Coll(&eqproc,
2417 sslot1->stacoll,
2418 sslot1->values[i],
2419 sslot2->values[j])))
2420 {
2421 hasmatch1[i] = hasmatch2[j] = true;
2422 nmatches++;
2423 break;
2424 }
2425 }
2426 }
2427 /* Sum up frequencies of matched MCVs */
2428 matchfreq1 = 0.0;
2429 for (i = 0; i < sslot1->nvalues; i++)
2430 {
2431 if (hasmatch1[i])
2432 matchfreq1 += sslot1->numbers[i];
2433 }
2434 CLAMP_PROBABILITY(matchfreq1);
2435 pfree(hasmatch1);
2436 pfree(hasmatch2);
2437
2438 /*
2439 * Now we need to estimate the fraction of relation 1 that has at
2440 * least one join partner. We know for certain that the matched MCVs
2441 * do, so that gives us a lower bound, but we're really in the dark
2442 * about everything else. Our crude approach is: if nd1 <= nd2 then
2443 * assume all non-null rel1 rows have join partners, else assume for
2444 * the uncertain rows that a fraction nd2/nd1 have join partners. We
2445 * can discount the known-matched MCVs from the distinct-values counts
2446 * before doing the division.
2447 *
2448 * Crude as the above is, it's completely useless if we don't have
2449 * reliable ndistinct values for both sides. Hence, if either nd1 or
2450 * nd2 is default, punt and assume half of the uncertain rows have
2451 * join partners.
2452 */
2453 if (!isdefault1 && !isdefault2)
2454 {
2455 nd1 -= nmatches;
2456 nd2 -= nmatches;
2457 if (nd1 <= nd2 || nd2 < 0)
2458 uncertainfrac = 1.0;
2459 else
2460 uncertainfrac = nd2 / nd1;
2461 }
2462 else
2463 uncertainfrac = 0.5;
2464 uncertain = 1.0 - matchfreq1 - nullfrac1;
2465 CLAMP_PROBABILITY(uncertain);
2466 selec = matchfreq1 + uncertainfrac * uncertain;
2467 }
2468 else
2469 {
2470 /*
2471 * Without MCV lists for both sides, we can only use the heuristic
2472 * about nd1 vs nd2.
2473 */
2474 double nullfrac1 = stats1 ? stats1->stanullfrac : 0.0;
2475
2476 if (!isdefault1 && !isdefault2)
2477 {
2478 if (nd1 <= nd2 || nd2 < 0)
2479 selec = 1.0 - nullfrac1;
2480 else
2481 selec = (nd2 / nd1) * (1.0 - nullfrac1);
2482 }
2483 else
2484 selec = 0.5 * (1.0 - nullfrac1);
2485 }
2486
2487 return selec;
2488}
2489
2490/*
2491 * neqjoinsel - Join selectivity of "!="
2492 */
2493Datum
2494neqjoinsel(PG_FUNCTION_ARGS)
2495{
2496 PlannerInfo *root = (PlannerInfo *) PG_GETARG_POINTER(0);
2497 Oid operator = PG_GETARG_OID(1);
2498 List *args = (List *) PG_GETARG_POINTER(2);
2499 JoinType jointype = (JoinType) PG_GETARG_INT16(3);
2500 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) PG_GETARG_POINTER(4);
2501 float8 result;
2502
2503 if (jointype == JOIN_SEMI || jointype == JOIN_ANTI)
2504 {
2505 /*
2506 * For semi-joins, if there is more than one distinct value in the RHS
2507 * relation then every non-null LHS row must find a row to join since
2508 * it can only be equal to one of them. We'll assume that there is
2509 * always more than one distinct RHS value for the sake of stability,
2510 * though in theory we could have special cases for empty RHS
2511 * (selectivity = 0) and single-distinct-value RHS (selectivity =
2512 * fraction of LHS that has the same value as the single RHS value).
2513 *
2514 * For anti-joins, if we use the same assumption that there is more
2515 * than one distinct key in the RHS relation, then every non-null LHS
2516 * row must be suppressed by the anti-join.
2517 *
2518 * So either way, the selectivity estimate should be 1 - nullfrac.
2519 */
2520 VariableStatData leftvar;
2521 VariableStatData rightvar;
2522 bool reversed;
2523 HeapTuple statsTuple;
2524 double nullfrac;
2525
2526 get_join_variables(root, args, sjinfo, &leftvar, &rightvar, &reversed);
2527 statsTuple = reversed ? rightvar.statsTuple : leftvar.statsTuple;
2528 if (HeapTupleIsValid(statsTuple))
2529 nullfrac = ((Form_pg_statistic) GETSTRUCT(statsTuple))->stanullfrac;
2530 else
2531 nullfrac = 0.0;
2532 ReleaseVariableStats(leftvar);
2533 ReleaseVariableStats(rightvar);
2534
2535 result = 1.0 - nullfrac;
2536 }
2537 else
2538 {
2539 /*
2540 * We want 1 - eqjoinsel() where the equality operator is the one
2541 * associated with this != operator, that is, its negator.
2542 */
2543 Oid eqop = get_negator(operator);
2544
2545 if (eqop)
2546 {
2547 result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
2548 PointerGetDatum(root),
2549 ObjectIdGetDatum(eqop),
2550 PointerGetDatum(args),
2551 Int16GetDatum(jointype),
2552 PointerGetDatum(sjinfo)));
2553 }
2554 else
2555 {
2556 /* Use default selectivity (should we raise an error instead?) */
2557 result = DEFAULT_EQ_SEL;
2558 }
2559 result = 1.0 - result;
2560 }
2561
2562 PG_RETURN_FLOAT8(result);
2563}
2564
2565/*
2566 * scalarltjoinsel - Join selectivity of "<" for scalars
2567 */
2568Datum
2569scalarltjoinsel(PG_FUNCTION_ARGS)
2570{
2571 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2572}
2573
2574/*
2575 * scalarlejoinsel - Join selectivity of "<=" for scalars
2576 */
2577Datum
2578scalarlejoinsel(PG_FUNCTION_ARGS)
2579{
2580 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2581}
2582
2583/*
2584 * scalargtjoinsel - Join selectivity of ">" for scalars
2585 */
2586Datum
2587scalargtjoinsel(PG_FUNCTION_ARGS)
2588{
2589 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2590}
2591
2592/*
2593 * scalargejoinsel - Join selectivity of ">=" for scalars
2594 */
2595Datum
2596scalargejoinsel(PG_FUNCTION_ARGS)
2597{
2598 PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2599}
2600
2601
2602/*
2603 * mergejoinscansel - Scan selectivity of merge join.
2604 *
2605 * A merge join will stop as soon as it exhausts either input stream.
2606 * Therefore, if we can estimate the ranges of both input variables,
2607 * we can estimate how much of the input will actually be read. This
2608 * can have a considerable impact on the cost when using indexscans.
2609 *
2610 * Also, we can estimate how much of each input has to be read before the
2611 * first join pair is found, which will affect the join's startup time.
2612 *
2613 * clause should be a clause already known to be mergejoinable. opfamily,
2614 * strategy, and nulls_first specify the sort ordering being used.
2615 *
2616 * The outputs are:
2617 * *leftstart is set to the fraction of the left-hand variable expected
2618 * to be scanned before the first join pair is found (0 to 1).
2619 * *leftend is set to the fraction of the left-hand variable expected
2620 * to be scanned before the join terminates (0 to 1).
2621 * *rightstart, *rightend similarly for the right-hand variable.
2622 */
2623void
2624mergejoinscansel(PlannerInfo *root, Node *clause,
2625 Oid opfamily, int strategy, bool nulls_first,
2626 Selectivity *leftstart, Selectivity *leftend,
2627 Selectivity *rightstart, Selectivity *rightend)
2628{
2629 Node *left,
2630 *right;
2631 VariableStatData leftvar,
2632 rightvar;
2633 int op_strategy;
2634 Oid op_lefttype;
2635 Oid op_righttype;
2636 Oid opno,
2637 lsortop,
2638 rsortop,
2639 lstatop,
2640 rstatop,
2641 ltop,
2642 leop,
2643 revltop,
2644 revleop;
2645 bool isgt;
2646 Datum leftmin,
2647 leftmax,
2648 rightmin,
2649 rightmax;
2650 double selec;
2651
2652 /* Set default results if we can't figure anything out. */
2653 /* XXX should default "start" fraction be a bit more than 0? */
2654 *leftstart = *rightstart = 0.0;
2655 *leftend = *rightend = 1.0;
2656
2657 /* Deconstruct the merge clause */
2658 if (!is_opclause(clause))
2659 return; /* shouldn't happen */
2660 opno = ((OpExpr *) clause)->opno;
2661 left = get_leftop((Expr *) clause);
2662 right = get_rightop((Expr *) clause);
2663 if (!right)
2664 return; /* shouldn't happen */
2665
2666 /* Look for stats for the inputs */
2667 examine_variable(root, left, 0, &leftvar);
2668 examine_variable(root, right, 0, &rightvar);
2669
2670 /* Extract the operator's declared left/right datatypes */
2671 get_op_opfamily_properties(opno, opfamily, false,
2672 &op_strategy,
2673 &op_lefttype,
2674 &op_righttype);
2675 Assert(op_strategy == BTEqualStrategyNumber);
2676
2677 /*
2678 * Look up the various operators we need. If we don't find them all, it
2679 * probably means the opfamily is broken, but we just fail silently.
2680 *
2681 * Note: we expect that pg_statistic histograms will be sorted by the '<'
2682 * operator, regardless of which sort direction we are considering.
2683 */
2684 switch (strategy)
2685 {
2686 case BTLessStrategyNumber:
2687 isgt = false;
2688 if (op_lefttype == op_righttype)
2689 {
2690 /* easy case */
2691 ltop = get_opfamily_member(opfamily,
2692 op_lefttype, op_righttype,
2693 BTLessStrategyNumber);
2694 leop = get_opfamily_member(opfamily,
2695 op_lefttype, op_righttype,
2696 BTLessEqualStrategyNumber);
2697 lsortop = ltop;
2698 rsortop = ltop;
2699 lstatop = lsortop;
2700 rstatop = rsortop;
2701 revltop = ltop;
2702 revleop = leop;
2703 }
2704 else
2705 {
2706 ltop = get_opfamily_member(opfamily,
2707 op_lefttype, op_righttype,
2708 BTLessStrategyNumber);
2709 leop = get_opfamily_member(opfamily,
2710 op_lefttype, op_righttype,
2711 BTLessEqualStrategyNumber);
2712 lsortop = get_opfamily_member(opfamily,
2713 op_lefttype, op_lefttype,
2714 BTLessStrategyNumber);
2715 rsortop = get_opfamily_member(opfamily,
2716 op_righttype, op_righttype,
2717 BTLessStrategyNumber);
2718 lstatop = lsortop;
2719 rstatop = rsortop;
2720 revltop = get_opfamily_member(opfamily,
2721 op_righttype, op_lefttype,
2722 BTLessStrategyNumber);
2723 revleop = get_opfamily_member(opfamily,
2724 op_righttype, op_lefttype,
2725 BTLessEqualStrategyNumber);
2726 }
2727 break;
2728 case BTGreaterStrategyNumber:
2729 /* descending-order case */
2730 isgt = true;
2731 if (op_lefttype == op_righttype)
2732 {
2733 /* easy case */
2734 ltop = get_opfamily_member(opfamily,
2735 op_lefttype, op_righttype,
2736 BTGreaterStrategyNumber);
2737 leop = get_opfamily_member(opfamily,
2738 op_lefttype, op_righttype,
2739 BTGreaterEqualStrategyNumber);
2740 lsortop = ltop;
2741 rsortop = ltop;
2742 lstatop = get_opfamily_member(opfamily,
2743 op_lefttype, op_lefttype,
2744 BTLessStrategyNumber);
2745 rstatop = lstatop;
2746 revltop = ltop;
2747 revleop = leop;
2748 }
2749 else
2750 {
2751 ltop = get_opfamily_member(opfamily,
2752 op_lefttype, op_righttype,
2753 BTGreaterStrategyNumber);
2754 leop = get_opfamily_member(opfamily,
2755 op_lefttype, op_righttype,
2756 BTGreaterEqualStrategyNumber);
2757 lsortop = get_opfamily_member(opfamily,
2758 op_lefttype, op_lefttype,
2759 BTGreaterStrategyNumber);
2760 rsortop = get_opfamily_member(opfamily,
2761 op_righttype, op_righttype,
2762 BTGreaterStrategyNumber);
2763 lstatop = get_opfamily_member(opfamily,
2764 op_lefttype, op_lefttype,
2765 BTLessStrategyNumber);
2766 rstatop = get_opfamily_member(opfamily,
2767 op_righttype, op_righttype,
2768 BTLessStrategyNumber);
2769 revltop = get_opfamily_member(opfamily,
2770 op_righttype, op_lefttype,
2771 BTGreaterStrategyNumber);
2772 revleop = get_opfamily_member(opfamily,
2773 op_righttype, op_lefttype,
2774 BTGreaterEqualStrategyNumber);
2775 }
2776 break;
2777 default:
2778 goto fail; /* shouldn't get here */
2779 }
2780
2781 if (!OidIsValid(lsortop) ||
2782 !OidIsValid(rsortop) ||
2783 !OidIsValid(lstatop) ||
2784 !OidIsValid(rstatop) ||
2785 !OidIsValid(ltop) ||
2786 !OidIsValid(leop) ||
2787 !OidIsValid(revltop) ||
2788 !OidIsValid(revleop))
2789 goto fail; /* insufficient info in catalogs */
2790
2791 /* Try to get ranges of both inputs */
2792 if (!isgt)
2793 {
2794 if (!get_variable_range(root, &leftvar, lstatop,
2795 &leftmin, &leftmax))
2796 goto fail; /* no range available from stats */
2797 if (!get_variable_range(root, &rightvar, rstatop,
2798 &rightmin, &rightmax))
2799 goto fail; /* no range available from stats */
2800 }
2801 else
2802 {
2803 /* need to swap the max and min */
2804 if (!get_variable_range(root, &leftvar, lstatop,
2805 &leftmax, &leftmin))
2806 goto fail; /* no range available from stats */
2807 if (!get_variable_range(root, &rightvar, rstatop,
2808 &rightmax, &rightmin))
2809 goto fail; /* no range available from stats */
2810 }
2811
2812 /*
2813 * Now, the fraction of the left variable that will be scanned is the
2814 * fraction that's <= the right-side maximum value. But only believe
2815 * non-default estimates, else stick with our 1.0.
2816 */
2817 selec = scalarineqsel(root, leop, isgt, true, &leftvar,
2818 rightmax, op_righttype);
2819 if (selec != DEFAULT_INEQ_SEL)
2820 *leftend = selec;
2821
2822 /* And similarly for the right variable. */
2823 selec = scalarineqsel(root, revleop, isgt, true, &rightvar,
2824 leftmax, op_lefttype);
2825 if (selec != DEFAULT_INEQ_SEL)
2826 *rightend = selec;
2827
2828 /*
2829 * Only one of the two "end" fractions can really be less than 1.0;
2830 * believe the smaller estimate and reset the other one to exactly 1.0. If
2831 * we get exactly equal estimates (as can easily happen with self-joins),
2832 * believe neither.
2833 */
2834 if (*leftend > *rightend)
2835 *leftend = 1.0;
2836 else if (*leftend < *rightend)
2837 *rightend = 1.0;
2838 else
2839 *leftend = *rightend = 1.0;
2840
2841 /*
2842 * Also, the fraction of the left variable that will be scanned before the
2843 * first join pair is found is the fraction that's < the right-side
2844 * minimum value. But only believe non-default estimates, else stick with
2845 * our own default.
2846 */
2847 selec = scalarineqsel(root, ltop, isgt, false, &leftvar,
2848 rightmin, op_righttype);
2849 if (selec != DEFAULT_INEQ_SEL)
2850 *leftstart = selec;
2851
2852 /* And similarly for the right variable. */
2853 selec = scalarineqsel(root, revltop, isgt, false, &rightvar,
2854 leftmin, op_lefttype);
2855 if (selec != DEFAULT_INEQ_SEL)
2856 *rightstart = selec;
2857
2858 /*
2859 * Only one of the two "start" fractions can really be more than zero;
2860 * believe the larger estimate and reset the other one to exactly 0.0. If
2861 * we get exactly equal estimates (as can easily happen with self-joins),
2862 * believe neither.
2863 */
2864 if (*leftstart < *rightstart)
2865 *leftstart = 0.0;
2866 else if (*leftstart > *rightstart)
2867 *rightstart = 0.0;
2868 else
2869 *leftstart = *rightstart = 0.0;
2870
2871 /*
2872 * If the sort order is nulls-first, we're going to have to skip over any
2873 * nulls too. These would not have been counted by scalarineqsel, and we
2874 * can safely add in this fraction regardless of whether we believe
2875 * scalarineqsel's results or not. But be sure to clamp the sum to 1.0!
2876 */
2877 if (nulls_first)
2878 {
2879 Form_pg_statistic stats;
2880
2881 if (HeapTupleIsValid(leftvar.statsTuple))
2882 {
2883 stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
2884 *leftstart += stats->stanullfrac;
2885 CLAMP_PROBABILITY(*leftstart);
2886 *leftend += stats->stanullfrac;
2887 CLAMP_PROBABILITY(*leftend);
2888 }
2889 if (HeapTupleIsValid(rightvar.statsTuple))
2890 {
2891 stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
2892 *rightstart += stats->stanullfrac;
2893 CLAMP_PROBABILITY(*rightstart);
2894 *rightend += stats->stanullfrac;
2895 CLAMP_PROBABILITY(*rightend);
2896 }
2897 }
2898
2899 /* Disbelieve start >= end, just in case that can happen */
2900 if (*leftstart >= *leftend)
2901 {
2902 *leftstart = 0.0;
2903 *leftend = 1.0;
2904 }
2905 if (*rightstart >= *rightend)
2906 {
2907 *rightstart = 0.0;
2908 *rightend = 1.0;
2909 }
2910
2911fail:
2912 ReleaseVariableStats(leftvar);
2913 ReleaseVariableStats(rightvar);
2914}
2915
2916
2917/*
2918 * Helper routine for estimate_num_groups: add an item to a list of
2919 * GroupVarInfos, but only if it's not known equal to any of the existing
2920 * entries.
2921 */
2922typedef struct
2923{
2924 Node *var; /* might be an expression, not just a Var */
2925 RelOptInfo *rel; /* relation it belongs to */
2926 double ndistinct; /* # distinct values */
2927} GroupVarInfo;
2928
2929static List *
2930add_unique_group_var(PlannerInfo *root, List *varinfos,
2931 Node *var, VariableStatData *vardata)
2932{
2933 GroupVarInfo *varinfo;
2934 double ndistinct;
2935 bool isdefault;
2936 ListCell *lc;
2937
2938 ndistinct = get_variable_numdistinct(vardata, &isdefault);
2939
2940 /* cannot use foreach here because of possible list_delete */
2941 lc = list_head(varinfos);
2942 while (lc)
2943 {
2944 varinfo = (GroupVarInfo *) lfirst(lc);
2945
2946 /* must advance lc before list_delete possibly pfree's it */
2947 lc = lnext(lc);
2948
2949 /* Drop exact duplicates */
2950 if (equal(var, varinfo->var))
2951 return varinfos;
2952
2953 /*
2954 * Drop known-equal vars, but only if they belong to different
2955 * relations (see comments for estimate_num_groups)
2956 */
2957 if (vardata->rel != varinfo->rel &&
2958 exprs_known_equal(root, var, varinfo->var))
2959 {
2960 if (varinfo->ndistinct <= ndistinct)
2961 {
2962 /* Keep older item, forget new one */
2963 return varinfos;
2964 }
2965 else
2966 {
2967 /* Delete the older item */
2968 varinfos = list_delete_ptr(varinfos, varinfo);
2969 }
2970 }
2971 }
2972
2973 varinfo = (GroupVarInfo *) palloc(sizeof(GroupVarInfo));
2974
2975 varinfo->var = var;
2976 varinfo->rel = vardata->rel;
2977 varinfo->ndistinct = ndistinct;
2978 varinfos = lappend(varinfos, varinfo);
2979 return varinfos;
2980}
2981
2982/*
2983 * estimate_num_groups - Estimate number of groups in a grouped query
2984 *
2985 * Given a query having a GROUP BY clause, estimate how many groups there
2986 * will be --- ie, the number of distinct combinations of the GROUP BY
2987 * expressions.
2988 *
2989 * This routine is also used to estimate the number of rows emitted by
2990 * a DISTINCT filtering step; that is an isomorphic problem. (Note:
2991 * actually, we only use it for DISTINCT when there's no grouping or
2992 * aggregation ahead of the DISTINCT.)
2993 *
2994 * Inputs:
2995 * root - the query
2996 * groupExprs - list of expressions being grouped by
2997 * input_rows - number of rows estimated to arrive at the group/unique
2998 * filter step
2999 * pgset - NULL, or a List** pointing to a grouping set to filter the
3000 * groupExprs against
3001 *
3002 * Given the lack of any cross-correlation statistics in the system, it's
3003 * impossible to do anything really trustworthy with GROUP BY conditions
3004 * involving multiple Vars. We should however avoid assuming the worst
3005 * case (all possible cross-product terms actually appear as groups) since
3006 * very often the grouped-by Vars are highly correlated. Our current approach
3007 * is as follows:
3008 * 1. Expressions yielding boolean are assumed to contribute two groups,
3009 * independently of their content, and are ignored in the subsequent
3010 * steps. This is mainly because tests like "col IS NULL" break the
3011 * heuristic used in step 2 especially badly.
3012 * 2. Reduce the given expressions to a list of unique Vars used. For
3013 * example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
3014 * It is clearly correct not to count the same Var more than once.
3015 * It is also reasonable to treat f(x) the same as x: f() cannot
3016 * increase the number of distinct values (unless it is volatile,
3017 * which we consider unlikely for grouping), but it probably won't
3018 * reduce the number of distinct values much either.
3019 * As a special case, if a GROUP BY expression can be matched to an
3020 * expressional index for which we have statistics, then we treat the
3021 * whole expression as though it were just a Var.
3022 * 3. If the list contains Vars of different relations that are known equal
3023 * due to equivalence classes, then drop all but one of the Vars from each
3024 * known-equal set, keeping the one with smallest estimated # of values
3025 * (since the extra values of the others can't appear in joined rows).
3026 * Note the reason we only consider Vars of different relations is that
3027 * if we considered ones of the same rel, we'd be double-counting the
3028 * restriction selectivity of the equality in the next step.
3029 * 4. For Vars within a single source rel, we multiply together the numbers
3030 * of values, clamp to the number of rows in the rel (divided by 10 if
3031 * more than one Var), and then multiply by a factor based on the
3032 * selectivity of the restriction clauses for that rel. When there's
3033 * more than one Var, the initial product is probably too high (it's the
3034 * worst case) but clamping to a fraction of the rel's rows seems to be a
3035 * helpful heuristic for not letting the estimate get out of hand. (The
3036 * factor of 10 is derived from pre-Postgres-7.4 practice.) The factor
3037 * we multiply by to adjust for the restriction selectivity assumes that
3038 * the restriction clauses are independent of the grouping, which may not
3039 * be a valid assumption, but it's hard to do better.
3040 * 5. If there are Vars from multiple rels, we repeat step 4 for each such
3041 * rel, and multiply the results together.
3042 * Note that rels not containing grouped Vars are ignored completely, as are
3043 * join clauses. Such rels cannot increase the number of groups, and we
3044 * assume such clauses do not reduce the number either (somewhat bogus,
3045 * but we don't have the info to do better).
3046 */
3047double
3048estimate_num_groups(PlannerInfo *root, List *groupExprs, double input_rows,
3049 List **pgset)
3050{
3051 List *varinfos = NIL;
3052 double srf_multiplier = 1.0;
3053 double numdistinct;
3054 ListCell *l;
3055 int i;
3056
3057 /*
3058 * We don't ever want to return an estimate of zero groups, as that tends
3059 * to lead to division-by-zero and other unpleasantness. The input_rows
3060 * estimate is usually already at least 1, but clamp it just in case it
3061 * isn't.
3062 */
3063 input_rows = clamp_row_est(input_rows);
3064
3065 /*
3066 * If no grouping columns, there's exactly one group. (This can't happen
3067 * for normal cases with GROUP BY or DISTINCT, but it is possible for
3068 * corner cases with set operations.)
3069 */
3070 if (groupExprs == NIL || (pgset && list_length(*pgset) < 1))
3071 return 1.0;
3072
3073 /*
3074 * Count groups derived from boolean grouping expressions. For other
3075 * expressions, find the unique Vars used, treating an expression as a Var
3076 * if we can find stats for it. For each one, record the statistical
3077 * estimate of number of distinct values (total in its table, without
3078 * regard for filtering).
3079 */
3080 numdistinct = 1.0;
3081
3082 i = 0;
3083 foreach(l, groupExprs)
3084 {
3085 Node *groupexpr = (Node *) lfirst(l);
3086 double this_srf_multiplier;
3087 VariableStatData vardata;
3088 List *varshere;
3089 ListCell *l2;
3090
3091 /* is expression in this grouping set? */
3092 if (pgset && !list_member_int(*pgset, i++))
3093 continue;
3094
3095 /*
3096 * Set-returning functions in grouping columns are a bit problematic.
3097 * The code below will effectively ignore their SRF nature and come up
3098 * with a numdistinct estimate as though they were scalar functions.
3099 * We compensate by scaling up the end result by the largest SRF
3100 * rowcount estimate. (This will be an overestimate if the SRF
3101 * produces multiple copies of any output value, but it seems best to
3102 * assume the SRF's outputs are distinct. In any case, it's probably
3103 * pointless to worry too much about this without much better
3104 * estimates for SRF output rowcounts than we have today.)
3105 */
3106 this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
3107 if (srf_multiplier < this_srf_multiplier)
3108 srf_multiplier = this_srf_multiplier;
3109
3110 /* Short-circuit for expressions returning boolean */
3111 if (exprType(groupexpr) == BOOLOID)
3112 {
3113 numdistinct *= 2.0;
3114 continue;
3115 }
3116
3117 /*
3118 * If examine_variable is able to deduce anything about the GROUP BY
3119 * expression, treat it as a single variable even if it's really more
3120 * complicated.
3121 */
3122 examine_variable(root, groupexpr, 0, &vardata);
3123 if (HeapTupleIsValid(vardata.statsTuple) || vardata.isunique)
3124 {
3125 varinfos = add_unique_group_var(root, varinfos,
3126 groupexpr, &vardata);
3127 ReleaseVariableStats(vardata);
3128 continue;
3129 }
3130 ReleaseVariableStats(vardata);
3131
3132 /*
3133 * Else pull out the component Vars. Handle PlaceHolderVars by
3134 * recursing into their arguments (effectively assuming that the
3135 * PlaceHolderVar doesn't change the number of groups, which boils
3136 * down to ignoring the possible addition of nulls to the result set).
3137 */
3138 varshere = pull_var_clause(groupexpr,
3139 PVC_RECURSE_AGGREGATES |
3140 PVC_RECURSE_WINDOWFUNCS |
3141 PVC_RECURSE_PLACEHOLDERS);
3142
3143 /*
3144 * If we find any variable-free GROUP BY item, then either it is a
3145 * constant (and we can ignore it) or it contains a volatile function;
3146 * in the latter case we punt and assume that each input row will
3147 * yield a distinct group.
3148 */
3149 if (varshere == NIL)
3150 {
3151 if (contain_volatile_functions(groupexpr))
3152 return input_rows;
3153 continue;
3154 }
3155
3156 /*
3157 * Else add variables to varinfos list
3158 */
3159 foreach(l2, varshere)
3160 {
3161 Node *var = (Node *) lfirst(l2);
3162
3163 examine_variable(root, var, 0, &vardata);
3164 varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3165 ReleaseVariableStats(vardata);
3166 }
3167 }
3168
3169 /*
3170 * If now no Vars, we must have an all-constant or all-boolean GROUP BY
3171 * list.
3172 */
3173 if (varinfos == NIL)
3174 {
3175 /* Apply SRF multiplier as we would do in the long path */
3176 numdistinct *= srf_multiplier;
3177 /* Round off */
3178 numdistinct = ceil(numdistinct);
3179 /* Guard against out-of-range answers */
3180 if (numdistinct > input_rows)
3181 numdistinct = input_rows;
3182 if (numdistinct < 1.0)
3183 numdistinct = 1.0;
3184 return numdistinct;
3185 }
3186
3187 /*
3188 * Group Vars by relation and estimate total numdistinct.
3189 *
3190 * For each iteration of the outer loop, we process the frontmost Var in
3191 * varinfos, plus all other Vars in the same relation. We remove these
3192 * Vars from the newvarinfos list for the next iteration. This is the
3193 * easiest way to group Vars of same rel together.
3194 */
3195 do
3196 {
3197 GroupVarInfo *varinfo1 = (GroupVarInfo *) linitial(varinfos);
3198 RelOptInfo *rel = varinfo1->rel;
3199 double reldistinct = 1;
3200 double relmaxndistinct = reldistinct;
3201 int relvarcount = 0;
3202 List *newvarinfos = NIL;
3203 List *relvarinfos = NIL;
3204
3205 /*
3206 * Split the list of varinfos in two - one for the current rel, one
3207 * for remaining Vars on other rels.
3208 */
3209 relvarinfos = lcons(varinfo1, relvarinfos);
3210 for_each_cell(l, lnext(list_head(varinfos)))
3211 {
3212 GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3213
3214 if (varinfo2->rel == varinfo1->rel)
3215 {
3216 /* varinfos on current rel */
3217 relvarinfos = lcons(varinfo2, relvarinfos);
3218 }
3219 else
3220 {
3221 /* not time to process varinfo2 yet */
3222 newvarinfos = lcons(varinfo2, newvarinfos);
3223 }
3224 }
3225
3226 /*
3227 * Get the numdistinct estimate for the Vars of this rel. We
3228 * iteratively search for multivariate n-distinct with maximum number
3229 * of vars; assuming that each var group is independent of the others,
3230 * we multiply them together. Any remaining relvarinfos after no more
3231 * multivariate matches are found are assumed independent too, so
3232 * their individual ndistinct estimates are multiplied also.
3233 *
3234 * While iterating, count how many separate numdistinct values we
3235 * apply. We apply a fudge factor below, but only if we multiplied
3236 * more than one such values.
3237 */
3238 while (relvarinfos)
3239 {
3240 double mvndistinct;
3241
3242 if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3243 &mvndistinct))
3244 {
3245 reldistinct *= mvndistinct;
3246 if (relmaxndistinct < mvndistinct)
3247 relmaxndistinct = mvndistinct;
3248 relvarcount++;
3249 }
3250 else
3251 {
3252 foreach(l, relvarinfos)
3253 {
3254 GroupVarInfo *varinfo2 = (GroupVarInfo *) lfirst(l);
3255
3256 reldistinct *= varinfo2->ndistinct;
3257 if (relmaxndistinct < varinfo2->ndistinct)
3258 relmaxndistinct = varinfo2->ndistinct;
3259 relvarcount++;
3260 }
3261
3262 /* we're done with this relation */
3263 relvarinfos = NIL;
3264 }
3265 }
3266
3267 /*
3268 * Sanity check --- don't divide by zero if empty relation.
3269 */
3270 Assert(IS_SIMPLE_REL(rel));
3271 if (rel->tuples > 0)
3272 {
3273 /*
3274 * Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3275 * fudge factor is because the Vars are probably correlated but we
3276 * don't know by how much. We should never clamp to less than the
3277 * largest ndistinct value for any of the Vars, though, since
3278 * there will surely be at least that many groups.
3279 */
3280 double clamp = rel->tuples;
3281
3282 if (relvarcount > 1)
3283 {
3284 clamp *= 0.1;
3285 if (clamp < relmaxndistinct)
3286 {
3287 clamp = relmaxndistinct;
3288 /* for sanity in case some ndistinct is too large: */
3289 if (clamp > rel->tuples)
3290 clamp = rel->tuples;
3291 }
3292 }
3293 if (reldistinct > clamp)
3294 reldistinct = clamp;
3295
3296 /*
3297 * Update the estimate based on the restriction selectivity,
3298 * guarding against division by zero when reldistinct is zero.
3299 * Also skip this if we know that we are returning all rows.
3300 */
3301 if (reldistinct > 0 && rel->rows < rel->tuples)
3302 {
3303 /*
3304 * Given a table containing N rows with n distinct values in a
3305 * uniform distribution, if we select p rows at random then
3306 * the expected number of distinct values selected is
3307 *
3308 * n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3309 *
3310 * = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3311 *
3312 * See "Approximating block accesses in database
3313 * organizations", S. B. Yao, Communications of the ACM,
3314 * Volume 20 Issue 4, April 1977 Pages 260-261.
3315 *
3316 * Alternatively, re-arranging the terms from the factorials,
3317 * this may be written as
3318 *
3319 * n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3320 *
3321 * This form of the formula is more efficient to compute in
3322 * the common case where p is larger than N/n. Additionally,
3323 * as pointed out by Dell'Era, if i << N for all terms in the
3324 * product, it can be approximated by
3325 *
3326 * n * (1 - ((N-p)/N)^(N/n))
3327 *
3328 * See "Expected distinct values when selecting from a bag
3329 * without replacement", Alberto Dell'Era,
3330 * http://www.adellera.it/investigations/distinct_balls/.
3331 *
3332 * The condition i << N is equivalent to n >> 1, so this is a
3333 * good approximation when the number of distinct values in
3334 * the table is large. It turns out that this formula also
3335 * works well even when n is small.
3336 */
3337 reldistinct *=
3338 (1 - pow((rel->tuples - rel->rows) / rel->tuples,
3339 rel->tuples / reldistinct));
3340 }
3341 reldistinct = clamp_row_est(reldistinct);
3342
3343 /*
3344 * Update estimate of total distinct groups.
3345 */
3346 numdistinct *= reldistinct;
3347 }
3348
3349 varinfos = newvarinfos;
3350 } while (varinfos != NIL);
3351
3352 /* Now we can account for the effects of any SRFs */
3353 numdistinct *= srf_multiplier;
3354
3355 /* Round off */
3356 numdistinct = ceil(numdistinct);
3357
3358 /* Guard against out-of-range answers */
3359 if (numdistinct > input_rows)
3360 numdistinct = input_rows;
3361 if (numdistinct < 1.0)
3362 numdistinct = 1.0;
3363
3364 return numdistinct;
3365}
3366
3367/*
3368 * Estimate hash bucket statistics when the specified expression is used
3369 * as a hash key for the given number of buckets.
3370 *
3371 * This attempts to determine two values:
3372 *
3373 * 1. The frequency of the most common value of the expression (returns
3374 * zero into *mcv_freq if we can't get that).
3375 *
3376 * 2. The "bucketsize fraction", ie, average number of entries in a bucket
3377 * divided by total tuples in relation.
3378 *
3379 * XXX This is really pretty bogus since we're effectively assuming that the
3380 * distribution of hash keys will be the same after applying restriction
3381 * clauses as it was in the underlying relation. However, we are not nearly
3382 * smart enough to figure out how the restrict clauses might change the
3383 * distribution, so this will have to do for now.
3384 *
3385 * We are passed the number of buckets the executor will use for the given
3386 * input relation. If the data were perfectly distributed, with the same
3387 * number of tuples going into each available bucket, then the bucketsize
3388 * fraction would be 1/nbuckets. But this happy state of affairs will occur
3389 * only if (a) there are at least nbuckets distinct data values, and (b)
3390 * we have a not-too-skewed data distribution. Otherwise the buckets will
3391 * be nonuniformly occupied. If the other relation in the join has a key
3392 * distribution similar to this one's, then the most-loaded buckets are
3393 * exactly those that will be probed most often. Therefore, the "average"
3394 * bucket size for costing purposes should really be taken as something close
3395 * to the "worst case" bucket size. We try to estimate this by adjusting the
3396 * fraction if there are too few distinct data values, and then scaling up
3397 * by the ratio of the most common value's frequency to the average frequency.
3398 *
3399 * If no statistics are available, use a default estimate of 0.1. This will
3400 * discourage use of a hash rather strongly if the inner relation is large,
3401 * which is what we want. We do not want to hash unless we know that the
3402 * inner rel is well-dispersed (or the alternatives seem much worse).
3403 *
3404 * The caller should also check that the mcv_freq is not so large that the
3405 * most common value would by itself require an impractically large bucket.
3406 * In a hash join, the executor can split buckets if they get too big, but
3407 * obviously that doesn't help for a bucket that contains many duplicates of
3408 * the same value.
3409 */
3410void
3411estimate_hash_bucket_stats(PlannerInfo *root, Node *hashkey, double nbuckets,
3412 Selectivity *mcv_freq,
3413 Selectivity *bucketsize_frac)
3414{
3415 VariableStatData vardata;
3416 double estfract,
3417 ndistinct,
3418 stanullfrac,
3419 avgfreq;
3420 bool isdefault;
3421 AttStatsSlot sslot;
3422
3423 examine_variable(root, hashkey, 0, &vardata);
3424
3425 /* Look up the frequency of the most common value, if available */
3426 *mcv_freq = 0.0;
3427
3428 if (HeapTupleIsValid(vardata.statsTuple))
3429 {
3430 if (get_attstatsslot(&sslot, vardata.statsTuple,
3431 STATISTIC_KIND_MCV, InvalidOid,
3432 ATTSTATSSLOT_NUMBERS))
3433 {
3434 /*
3435 * The first MCV stat is for the most common value.
3436 */
3437 if (sslot.nnumbers > 0)
3438 *mcv_freq = sslot.numbers[0];
3439 free_attstatsslot(&sslot);
3440 }
3441 }
3442
3443 /* Get number of distinct values */
3444 ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3445
3446 /*
3447 * If ndistinct isn't real, punt. We normally return 0.1, but if the
3448 * mcv_freq is known to be even higher than that, use it instead.
3449 */
3450 if (isdefault)
3451 {
3452 *bucketsize_frac = (Selectivity) Max(0.1, *mcv_freq);
3453 ReleaseVariableStats(vardata);
3454 return;
3455 }
3456
3457 /* Get fraction that are null */
3458 if (HeapTupleIsValid(vardata.statsTuple))
3459 {
3460 Form_pg_statistic stats;
3461
3462 stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3463 stanullfrac = stats->stanullfrac;
3464 }
3465 else
3466 stanullfrac = 0.0;
3467
3468 /* Compute avg freq of all distinct data values in raw relation */
3469 avgfreq = (1.0 - stanullfrac) / ndistinct;
3470
3471 /*
3472 * Adjust ndistinct to account for restriction clauses. Observe we are
3473 * assuming that the data distribution is affected uniformly by the
3474 * restriction clauses!
3475 *
3476 * XXX Possibly better way, but much more expensive: multiply by
3477 * selectivity of rel's restriction clauses that mention the target Var.
3478 */
3479 if (vardata.rel && vardata.rel->tuples > 0)
3480 {
3481 ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3482 ndistinct = clamp_row_est(ndistinct);
3483 }
3484
3485 /*
3486 * Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3487 * number of buckets is less than the expected number of distinct values;
3488 * otherwise it is 1/ndistinct.
3489 */
3490 if (ndistinct > nbuckets)
3491 estfract = 1.0 / nbuckets;
3492 else
3493 estfract = 1.0 / ndistinct;
3494
3495 /*
3496 * Adjust estimated bucketsize upward to account for skewed distribution.
3497 */
3498 if (avgfreq > 0.0 && *mcv_freq > avgfreq)
3499 estfract *= *mcv_freq / avgfreq;
3500
3501 /*
3502 * Clamp bucketsize to sane range (the above adjustment could easily
3503 * produce an out-of-range result). We set the lower bound a little above
3504 * zero, since zero isn't a very sane result.
3505 */
3506 if (estfract < 1.0e-6)
3507 estfract = 1.0e-6;
3508 else if (estfract > 1.0)
3509 estfract = 1.0;
3510
3511 *bucketsize_frac = (Selectivity) estfract;
3512
3513 ReleaseVariableStats(vardata);
3514}
3515
3516/*
3517 * estimate_hashagg_tablesize
3518 * estimate the number of bytes that a hash aggregate hashtable will
3519 * require based on the agg_costs, path width and number of groups.
3520 *
3521 * We return the result as "double" to forestall any possible overflow
3522 * problem in the multiplication by dNumGroups.
3523 *
3524 * XXX this may be over-estimating the size now that hashagg knows to omit
3525 * unneeded columns from the hashtable. Also for mixed-mode grouping sets,
3526 * grouping columns not in the hashed set are counted here even though hashagg
3527 * won't store them. Is this a problem?
3528 */
3529double
3530estimate_hashagg_tablesize(Path *path, const AggClauseCosts *agg_costs,
3531 double dNumGroups)
3532{
3533 Size hashentrysize;
3534
3535 /* Estimate per-hash-entry space at tuple width... */
3536 hashentrysize = MAXALIGN(path->pathtarget->width) +
3537 MAXALIGN(SizeofMinimalTupleHeader);
3538
3539 /* plus space for pass-by-ref transition values... */
3540 hashentrysize += agg_costs->transitionSpace;
3541 /* plus the per-hash-entry overhead */
3542 hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3543
3544 /*
3545 * Note that this disregards the effect of fill-factor and growth policy
3546 * of the hash table. That's probably ok, given that the default
3547 * fill-factor is relatively high. It'd be hard to meaningfully factor in
3548 * "double-in-size" growth policies here.
3549 */
3550 return hashentrysize * dNumGroups;
3551}
3552
3553
3554/*-------------------------------------------------------------------------
3555 *
3556 * Support routines
3557 *
3558 *-------------------------------------------------------------------------
3559 */
3560
3561/*
3562 * Find applicable ndistinct statistics for the given list of VarInfos (which
3563 * must all belong to the given rel), and update *ndistinct to the estimate of
3564 * the MVNDistinctItem that best matches. If a match it found, *varinfos is
3565 * updated to remove the list of matched varinfos.
3566 *
3567 * Varinfos that aren't for simple Vars are ignored.
3568 *
3569 * Return true if we're able to find a match, false otherwise.
3570 */
3571static bool
3572estimate_multivariate_ndistinct(PlannerInfo *root, RelOptInfo *rel,
3573 List **varinfos, double *ndistinct)
3574{
3575 ListCell *lc;
3576 Bitmapset *attnums = NULL;
3577 int nmatches;
3578 Oid statOid = InvalidOid;
3579 MVNDistinct *stats;
3580 Bitmapset *matched = NULL;
3581
3582 /* bail out immediately if the table has no extended statistics */
3583 if (!rel->statlist)
3584 return false;
3585
3586 /* Determine the attnums we're looking for */
3587 foreach(lc, *varinfos)
3588 {
3589 GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
3590
3591 Assert(varinfo->rel == rel);
3592
3593 if (IsA(varinfo->var, Var))
3594 {
3595 attnums = bms_add_member(attnums,
3596 ((Var *) varinfo->var)->varattno);
3597 }
3598 }
3599
3600 /* look for the ndistinct statistics matching the most vars */
3601 nmatches = 1; /* we require at least two matches */
3602 foreach(lc, rel->statlist)
3603 {
3604 StatisticExtInfo *info = (StatisticExtInfo *) lfirst(lc);
3605 Bitmapset *shared;
3606 int nshared;
3607
3608 /* skip statistics of other kinds */
3609 if (info->kind != STATS_EXT_NDISTINCT)
3610 continue;
3611
3612 /* compute attnums shared by the vars and the statistics object */
3613 shared = bms_intersect(info->keys, attnums);
3614 nshared = bms_num_members(shared);
3615
3616 /*
3617 * Does this statistics object match more columns than the currently
3618 * best object? If so, use this one instead.
3619 *
3620 * XXX This should break ties using name of the object, or something
3621 * like that, to make the outcome stable.
3622 */
3623 if (nshared > nmatches)
3624 {
3625 statOid = info->statOid;
3626 nmatches = nshared;
3627 matched = shared;
3628 }
3629 }
3630
3631 /* No match? */
3632 if (statOid == InvalidOid)
3633 return false;
3634 Assert(nmatches > 1 && matched != NULL);
3635
3636 stats = statext_ndistinct_load(statOid);
3637
3638 /*
3639 * If we have a match, search it for the specific item that matches (there
3640 * must be one), and construct the output values.
3641 */
3642 if (stats)
3643 {
3644 int i;
3645 List *newlist = NIL;
3646 MVNDistinctItem *item = NULL;
3647
3648 /* Find the specific item that exactly matches the combination */
3649 for (i = 0; i < stats->nitems; i++)
3650 {
3651 MVNDistinctItem *tmpitem = &stats->items[i];
3652
3653 if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
3654 {
3655 item = tmpitem;
3656 break;
3657 }
3658 }
3659
3660 /* make sure we found an item */
3661 if (!item)
3662 elog(ERROR, "corrupt MVNDistinct entry");
3663
3664 /* Form the output varinfo list, keeping only unmatched ones */
3665 foreach(lc, *varinfos)
3666 {
3667 GroupVarInfo *varinfo = (GroupVarInfo *) lfirst(lc);
3668 AttrNumber attnum;
3669
3670 if (!IsA(varinfo->var, Var))
3671 {
3672 newlist = lappend(newlist, varinfo);
3673 continue;
3674 }
3675
3676 attnum = ((Var *) varinfo->var)->varattno;
3677 if (!bms_is_member(attnum, matched))
3678 newlist = lappend(newlist, varinfo);
3679 }
3680
3681 *varinfos = newlist;
3682 *ndistinct = item->ndistinct;
3683 return true;
3684 }
3685
3686 return false;
3687}
3688
3689/*
3690 * convert_to_scalar
3691 * Convert non-NULL values of the indicated types to the comparison
3692 * scale needed by scalarineqsel().
3693 * Returns "true" if successful.
3694 *
3695 * XXX this routine is a hack: ideally we should look up the conversion
3696 * subroutines in pg_type.
3697 *
3698 * All numeric datatypes are simply converted to their equivalent
3699 * "double" values. (NUMERIC values that are outside the range of "double"
3700 * are clamped to +/- HUGE_VAL.)
3701 *
3702 * String datatypes are converted by convert_string_to_scalar(),
3703 * which is explained below. The reason why this routine deals with
3704 * three values at a time, not just one, is that we need it for strings.
3705 *
3706 * The bytea datatype is just enough different from strings that it has
3707 * to be treated separately.
3708 *
3709 * The several datatypes representing absolute times are all converted
3710 * to Timestamp, which is actually a double, and then we just use that
3711 * double value. Note this will give correct results even for the "special"
3712 * values of Timestamp, since those are chosen to compare correctly;
3713 * see timestamp_cmp.
3714 *
3715 * The several datatypes representing relative times (intervals) are all
3716 * converted to measurements expressed in seconds.
3717 */
3718static bool
3719convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue,
3720 Datum lobound, Datum hibound, Oid boundstypid,
3721 double *scaledlobound, double *scaledhibound)
3722{
3723 bool failure = false;
3724
3725 /*
3726 * Both the valuetypid and the boundstypid should exactly match the
3727 * declared input type(s) of the operator we are invoked for. However,
3728 * extensions might try to use scalarineqsel as estimator for operators
3729 * with input type(s) we don't handle here; in such cases, we want to
3730 * return false, not fail. In any case, we mustn't assume that valuetypid
3731 * and boundstypid are identical.
3732 *
3733 * XXX The histogram we are interpolating between points of could belong
3734 * to a column that's only binary-compatible with the declared type. In
3735 * essence we are assuming that the semantics of binary-compatible types
3736 * are enough alike that we can use a histogram generated with one type's
3737 * operators to estimate selectivity for the other's. This is outright
3738 * wrong in some cases --- in particular signed versus unsigned
3739 * interpretation could trip us up. But it's useful enough in the
3740 * majority of cases that we do it anyway. Should think about more
3741 * rigorous ways to do it.
3742 */
3743 switch (valuetypid)
3744 {
3745 /*
3746 * Built-in numeric types
3747 */
3748 case BOOLOID:
3749 case INT2OID:
3750 case INT4OID:
3751 case INT8OID:
3752 case FLOAT4OID:
3753 case FLOAT8OID:
3754 case NUMERICOID:
3755 case OIDOID:
3756 case REGPROCOID:
3757 case REGPROCEDUREOID:
3758 case REGOPEROID:
3759 case REGOPERATOROID:
3760 case REGCLASSOID:
3761 case REGTYPEOID:
3762 case REGCONFIGOID:
3763 case REGDICTIONARYOID:
3764 case REGROLEOID:
3765 case REGNAMESPACEOID:
3766 *scaledvalue = convert_numeric_to_scalar(value, valuetypid,
3767 &failure);
3768 *scaledlobound = convert_numeric_to_scalar(lobound, boundstypid,
3769 &failure);
3770 *scaledhibound = convert_numeric_to_scalar(hibound, boundstypid,
3771 &failure);
3772 return !failure;
3773
3774 /*
3775 * Built-in string types
3776 */
3777 case CHAROID:
3778 case BPCHAROID:
3779 case VARCHAROID:
3780 case TEXTOID:
3781 case NAMEOID:
3782 {
3783 char *valstr = convert_string_datum(value, valuetypid,
3784 collid, &failure);
3785 char *lostr = convert_string_datum(lobound, boundstypid,
3786 collid, &failure);
3787 char *histr = convert_string_datum(hibound, boundstypid,
3788 collid, &failure);
3789
3790 /*
3791 * Bail out if any of the values is not of string type. We
3792 * might leak converted strings for the other value(s), but
3793 * that's not worth troubling over.
3794 */
3795 if (failure)
3796 return false;
3797
3798 convert_string_to_scalar(valstr, scaledvalue,
3799 lostr, scaledlobound,
3800 histr, scaledhibound);
3801 pfree(valstr);
3802 pfree(lostr);
3803 pfree(histr);
3804 return true;
3805 }
3806
3807 /*
3808 * Built-in bytea type
3809 */
3810 case BYTEAOID:
3811 {
3812 /* We only support bytea vs bytea comparison */
3813 if (boundstypid != BYTEAOID)
3814 return false;
3815 convert_bytea_to_scalar(value, scaledvalue,
3816 lobound, scaledlobound,
3817 hibound, scaledhibound);
3818 return true;
3819 }
3820
3821 /*
3822 * Built-in time types
3823 */
3824 case TIMESTAMPOID:
3825 case TIMESTAMPTZOID:
3826 case DATEOID:
3827 case INTERVALOID:
3828 case TIMEOID:
3829 case TIMETZOID:
3830 *scaledvalue = convert_timevalue_to_scalar(value, valuetypid,
3831 &failure);
3832 *scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid,
3833 &failure);
3834 *scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid,
3835 &failure);
3836 return !failure;
3837
3838 /*
3839 * Built-in network types
3840 */
3841 case INETOID:
3842 case CIDROID:
3843 case MACADDROID:
3844 case MACADDR8OID:
3845 *scaledvalue = convert_network_to_scalar(value, valuetypid,
3846 &failure);
3847 *scaledlobound = convert_network_to_scalar(lobound, boundstypid,
3848 &failure);
3849 *scaledhibound = convert_network_to_scalar(hibound, boundstypid,
3850 &failure);
3851 return !failure;
3852 }
3853 /* Don't know how to convert */
3854 *scaledvalue = *scaledlobound = *scaledhibound = 0;
3855 return false;
3856}
3857
3858/*
3859 * Do convert_to_scalar()'s work for any numeric data type.
3860 *
3861 * On failure (e.g., unsupported typid), set *failure to true;
3862 * otherwise, that variable is not changed.
3863 */
3864static double
3865convert_numeric_to_scalar(Datum value, Oid typid, bool *failure)
3866{
3867 switch (typid)
3868 {
3869 case BOOLOID:
3870 return (double) DatumGetBool(value);
3871 case INT2OID:
3872 return (double) DatumGetInt16(value);
3873 case INT4OID:
3874 return (double) DatumGetInt32(value);
3875 case INT8OID:
3876 return (double) DatumGetInt64(value);
3877 case FLOAT4OID:
3878 return (double) DatumGetFloat4(value);
3879 case FLOAT8OID:
3880 return (double) DatumGetFloat8(value);
3881 case NUMERICOID:
3882 /* Note: out-of-range values will be clamped to +-HUGE_VAL */
3883 return (double)
3884 DatumGetFloat8(DirectFunctionCall1(numeric_float8_no_overflow,
3885 value));
3886 case OIDOID:
3887 case REGPROCOID:
3888 case REGPROCEDUREOID:
3889 case REGOPEROID:
3890 case REGOPERATOROID:
3891 case REGCLASSOID:
3892 case REGTYPEOID:
3893 case REGCONFIGOID:
3894 case REGDICTIONARYOID:
3895 case REGROLEOID:
3896 case REGNAMESPACEOID:
3897 /* we can treat OIDs as integers... */
3898 return (double) DatumGetObjectId(value);
3899 }
3900
3901 *failure = true;
3902 return 0;
3903}
3904
3905/*
3906 * Do convert_to_scalar()'s work for any character-string data type.
3907 *
3908 * String datatypes are converted to a scale that ranges from 0 to 1,
3909 * where we visualize the bytes of the string as fractional digits.
3910 *
3911 * We do not want the base to be 256, however, since that tends to
3912 * generate inflated selectivity estimates; few databases will have
3913 * occurrences of all 256 possible byte values at each position.
3914 * Instead, use the smallest and largest byte values seen in the bounds
3915 * as the estimated range for each byte, after some fudging to deal with
3916 * the fact that we probably aren't going to see the full range that way.
3917 *
3918 * An additional refinement is that we discard any common prefix of the
3919 * three strings before computing the scaled values. This allows us to
3920 * "zoom in" when we encounter a narrow data range. An example is a phone
3921 * number database where all the values begin with the same area code.
3922 * (Actually, the bounds will be adjacent histogram-bin-boundary values,
3923 * so this is more likely to happen than you might think.)
3924 */
3925static void
3926convert_string_to_scalar(char *value,
3927 double *scaledvalue,
3928 char *lobound,
3929 double *scaledlobound,
3930 char *hibound,
3931 double *scaledhibound)
3932{
3933 int rangelo,
3934 rangehi;
3935 char *sptr;
3936
3937 rangelo = rangehi = (unsigned char) hibound[0];
3938 for (sptr = lobound; *sptr; sptr++)
3939 {
3940 if (rangelo > (unsigned char) *sptr)
3941 rangelo = (unsigned char) *sptr;
3942 if (rangehi < (unsigned char) *sptr)
3943 rangehi = (unsigned char) *sptr;
3944 }
3945 for (sptr = hibound; *sptr; sptr++)
3946 {
3947 if (rangelo > (unsigned char) *sptr)
3948 rangelo = (unsigned char) *sptr;
3949 if (rangehi < (unsigned char) *sptr)
3950 rangehi = (unsigned char) *sptr;
3951 }
3952 /* If range includes any upper-case ASCII chars, make it include all */
3953 if (rangelo <= 'Z' && rangehi >= 'A')
3954 {
3955 if (rangelo > 'A')
3956 rangelo = 'A';
3957 if (rangehi < 'Z')
3958 rangehi = 'Z';
3959 }
3960 /* Ditto lower-case */
3961 if (rangelo <= 'z' && rangehi >= 'a')
3962 {
3963 if (rangelo > 'a')
3964 rangelo = 'a';
3965 if (rangehi < 'z')
3966 rangehi = 'z';
3967 }
3968 /* Ditto digits */
3969 if (rangelo <= '9' && rangehi >= '0')
3970 {
3971 if (rangelo > '0')
3972 rangelo = '0';
3973 if (rangehi < '9')
3974 rangehi = '9';
3975 }
3976
3977 /*
3978 * If range includes less than 10 chars, assume we have not got enough
3979 * data, and make it include regular ASCII set.
3980 */
3981 if (rangehi - rangelo < 9)
3982 {
3983 rangelo = ' ';
3984 rangehi = 127;
3985 }
3986
3987 /*
3988 * Now strip any common prefix of the three strings.
3989 */
3990 while (*lobound)
3991 {
3992 if (*lobound != *hibound || *lobound != *value)
3993 break;
3994 lobound++, hibound++, value++;
3995 }
3996
3997 /*
3998 * Now we can do the conversions.
3999 */
4000 *scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
4001 *scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
4002 *scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
4003}
4004
4005static double
4006convert_one_string_to_scalar(char *value, int rangelo, int rangehi)
4007{
4008 int slen = strlen(value);
4009 double num,
4010 denom,
4011 base;
4012
4013 if (slen <= 0)
4014 return 0.0; /* empty string has scalar value 0 */
4015
4016 /*
4017 * There seems little point in considering more than a dozen bytes from
4018 * the string. Since base is at least 10, that will give us nominal
4019 * resolution of at least 12 decimal digits, which is surely far more
4020 * precision than this estimation technique has got anyway (especially in
4021 * non-C locales). Also, even with the maximum possible base of 256, this
4022 * ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
4023 * overflow on any known machine.
4024 */
4025 if (slen > 12)
4026 slen = 12;
4027
4028 /* Convert initial characters to fraction */
4029 base = rangehi - rangelo + 1;
4030 num = 0.0;
4031 denom = base;
4032 while (slen-- > 0)
4033 {
4034 int ch = (unsigned char) *value++;
4035
4036 if (ch < rangelo)
4037 ch = rangelo - 1;
4038 else if (ch > rangehi)
4039 ch = rangehi + 1;
4040 num += ((double) (ch - rangelo)) / denom;
4041 denom *= base;
4042 }
4043
4044 return num;
4045}
4046
4047/*
4048 * Convert a string-type Datum into a palloc'd, null-terminated string.
4049 *
4050 * On failure (e.g., unsupported typid), set *failure to true;
4051 * otherwise, that variable is not changed. (We'll return NULL on failure.)
4052 *
4053 * When using a non-C locale, we must pass the string through strxfrm()
4054 * before continuing, so as to generate correct locale-specific results.
4055 */
4056static char *
4057convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
4058{
4059 char *val;
4060
4061 switch (typid)
4062 {
4063 case CHAROID:
4064 val = (char *) palloc(2);
4065 val[0] = DatumGetChar(value);
4066 val[1] = '\0';
4067 break;
4068 case BPCHAROID:
4069 case VARCHAROID:
4070 case TEXTOID:
4071 val = TextDatumGetCString(value);
4072 break;
4073 case NAMEOID:
4074 {
4075 NameData *nm = (NameData *) DatumGetPointer(value);
4076
4077 val = pstrdup(NameStr(*nm));
4078 break;
4079 }
4080 default:
4081 *failure = true;
4082 return NULL;
4083 }
4084
4085 if (!lc_collate_is_c(collid))
4086 {
4087 char *xfrmstr;
4088 size_t xfrmlen;
4089 size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
4090
4091 /*
4092 * XXX: We could guess at a suitable output buffer size and only call
4093 * strxfrm twice if our guess is too small.
4094 *
4095 * XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
4096 * bogus data or set an error. This is not really a problem unless it
4097 * crashes since it will only give an estimation error and nothing
4098 * fatal.
4099 */
4100#if _MSC_VER == 1400 /* VS.Net 2005 */
4101
4102 /*
4103 *
4104 * http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=99694
4105 */
4106 {
4107 char x[1];
4108
4109 xfrmlen = strxfrm(x, val, 0);
4110 }
4111#else
4112 xfrmlen = strxfrm(NULL, val, 0);
4113#endif
4114#ifdef WIN32
4115
4116 /*
4117 * On Windows, strxfrm returns INT_MAX when an error occurs. Instead
4118 * of trying to allocate this much memory (and fail), just return the
4119 * original string unmodified as if we were in the C locale.
4120 */
4121 if (xfrmlen == INT_MAX)
4122 return val;
4123#endif
4124 xfrmstr = (char *) palloc(xfrmlen + 1);
4125 xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + 1);
4126
4127 /*
4128 * Some systems (e.g., glibc) can return a smaller value from the
4129 * second call than the first; thus the Assert must be <= not ==.
4130 */
4131 Assert(xfrmlen2 <= xfrmlen);
4132 pfree(val);
4133 val = xfrmstr;
4134 }
4135
4136 return val;
4137}
4138
4139/*
4140 * Do convert_to_scalar()'s work for any bytea data type.
4141 *
4142 * Very similar to convert_string_to_scalar except we can't assume
4143 * null-termination and therefore pass explicit lengths around.
4144 *
4145 * Also, assumptions about likely "normal" ranges of characters have been
4146 * removed - a data range of 0..255 is always used, for now. (Perhaps
4147 * someday we will add information about actual byte data range to
4148 * pg_statistic.)
4149 */
4150static void
4151convert_bytea_to_scalar(Datum value,
4152 double *scaledvalue,
4153 Datum lobound,
4154 double *scaledlobound,
4155 Datum hibound,
4156 double *scaledhibound)
4157{
4158 bytea *valuep = DatumGetByteaPP(value);
4159 bytea *loboundp = DatumGetByteaPP(lobound);
4160 bytea *hiboundp = DatumGetByteaPP(hibound);
4161 int rangelo,
4162 rangehi,
4163 valuelen = VARSIZE_ANY_EXHDR(valuep),
4164 loboundlen = VARSIZE_ANY_EXHDR(loboundp),
4165 hiboundlen = VARSIZE_ANY_EXHDR(hiboundp),
4166 i,
4167 minlen;
4168 unsigned char *valstr = (unsigned char *) VARDATA_ANY(valuep);
4169 unsigned char *lostr = (unsigned char *) VARDATA_ANY(loboundp);
4170 unsigned char *histr = (unsigned char *) VARDATA_ANY(hiboundp);
4171
4172 /*
4173 * Assume bytea data is uniformly distributed across all byte values.
4174 */
4175 rangelo = 0;
4176 rangehi = 255;
4177
4178 /*
4179 * Now strip any common prefix of the three strings.
4180 */
4181 minlen = Min(Min(valuelen, loboundlen), hiboundlen);
4182 for (i = 0; i < minlen; i++)
4183 {
4184 if (*lostr != *histr || *lostr != *valstr)
4185 break;
4186 lostr++, histr++, valstr++;
4187 loboundlen--, hiboundlen--, valuelen--;
4188 }
4189
4190 /*
4191 * Now we can do the conversions.
4192 */
4193 *scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
4194 *scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
4195 *scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
4196}
4197
4198static double
4199convert_one_bytea_to_scalar(unsigned char *value, int valuelen,
4200 int rangelo, int rangehi)
4201{
4202 double num,
4203 denom,
4204 base;
4205
4206 if (valuelen <= 0)
4207 return 0.0; /* empty string has scalar value 0 */
4208
4209 /*
4210 * Since base is 256, need not consider more than about 10 chars (even
4211 * this many seems like overkill)
4212 */
4213 if (valuelen > 10)
4214 valuelen = 10;
4215
4216 /* Convert initial characters to fraction */
4217 base = rangehi - rangelo + 1;
4218 num = 0.0;
4219 denom = base;
4220 while (valuelen-- > 0)
4221 {
4222 int ch = *value++;
4223
4224 if (ch < rangelo)
4225 ch = rangelo - 1;
4226 else if (ch > rangehi)
4227 ch = rangehi + 1;
4228 num += ((double) (ch - rangelo)) / denom;
4229 denom *= base;
4230 }
4231
4232 return num;
4233}
4234
4235/*
4236 * Do convert_to_scalar()'s work for any timevalue data type.
4237 *
4238 * On failure (e.g., unsupported typid), set *failure to true;
4239 * otherwise, that variable is not changed.
4240 */
4241static double
4242convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure)
4243{
4244 switch (typid)
4245 {
4246 case TIMESTAMPOID:
4247 return DatumGetTimestamp(value);
4248 case TIMESTAMPTZOID:
4249 return DatumGetTimestampTz(value);
4250 case DATEOID:
4251 return date2timestamp_no_overflow(DatumGetDateADT(value));
4252 case INTERVALOID:
4253 {
4254 Interval *interval = DatumGetIntervalP(value);
4255
4256 /*
4257 * Convert the month part of Interval to days using assumed
4258 * average month length of 365.25/12.0 days. Not too
4259 * accurate, but plenty good enough for our purposes.
4260 */
4261 return interval->time + interval->day * (double) USECS_PER_DAY +
4262 interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
4263 }
4264 case TIMEOID:
4265 return DatumGetTimeADT(value);
4266 case TIMETZOID:
4267 {
4268 TimeTzADT *timetz = DatumGetTimeTzADTP(value);
4269
4270 /* use GMT-equivalent time */
4271 return (double) (timetz->time + (timetz->zone * 1000000.0));
4272 }
4273 }
4274
4275 *failure = true;
4276 return 0;
4277}
4278
4279
4280/*
4281 * get_restriction_variable
4282 * Examine the args of a restriction clause to see if it's of the
4283 * form (variable op pseudoconstant) or (pseudoconstant op variable),
4284 * where "variable" could be either a Var or an expression in vars of a
4285 * single relation. If so, extract information about the variable,
4286 * and also indicate which side it was on and the other argument.
4287 *
4288 * Inputs:
4289 * root: the planner info
4290 * args: clause argument list
4291 * varRelid: see specs for restriction selectivity functions
4292 *
4293 * Outputs: (these are valid only if true is returned)
4294 * *vardata: gets information about variable (see examine_variable)
4295 * *other: gets other clause argument, aggressively reduced to a constant
4296 * *varonleft: set true if variable is on the left, false if on the right
4297 *
4298 * Returns true if a variable is identified, otherwise false.
4299 *
4300 * Note: if there are Vars on both sides of the clause, we must fail, because
4301 * callers are expecting that the other side will act like a pseudoconstant.
4302 */
4303bool
4304get_restriction_variable(PlannerInfo *root, List *args, int varRelid,
4305 VariableStatData *vardata, Node **other,
4306 bool *varonleft)
4307{
4308 Node *left,
4309 *right;
4310 VariableStatData rdata;
4311
4312 /* Fail if not a binary opclause (probably shouldn't happen) */
4313 if (list_length(args) != 2)
4314 return false;
4315
4316 left = (Node *) linitial(args);
4317 right = (Node *) lsecond(args);
4318
4319 /*
4320 * Examine both sides. Note that when varRelid is nonzero, Vars of other
4321 * relations will be treated as pseudoconstants.
4322 */
4323 examine_variable(root, left, varRelid, vardata);
4324 examine_variable(root, right, varRelid, &rdata);
4325
4326 /*
4327 * If one side is a variable and the other not, we win.
4328 */
4329 if (vardata->rel && rdata.rel == NULL)
4330 {
4331 *varonleft = true;
4332 *other = estimate_expression_value(root, rdata.var);
4333 /* Assume we need no ReleaseVariableStats(rdata) here */
4334 return true;
4335 }
4336
4337 if (vardata->rel == NULL && rdata.rel)
4338 {
4339 *varonleft = false;
4340 *other = estimate_expression_value(root, vardata->var);
4341 /* Assume we need no ReleaseVariableStats(*vardata) here */
4342 *vardata = rdata;
4343 return true;
4344 }
4345
4346 /* Oops, clause has wrong structure (probably var op var) */
4347 ReleaseVariableStats(*vardata);
4348 ReleaseVariableStats(rdata);
4349
4350 return false;
4351}
4352
4353/*
4354 * get_join_variables
4355 * Apply examine_variable() to each side of a join clause.
4356 * Also, attempt to identify whether the join clause has the same
4357 * or reversed sense compared to the SpecialJoinInfo.
4358 *
4359 * We consider the join clause "normal" if it is "lhs_var OP rhs_var",
4360 * or "reversed" if it is "rhs_var OP lhs_var". In complicated cases
4361 * where we can't tell for sure, we default to assuming it's normal.
4362 */
4363void
4364get_join_variables(PlannerInfo *root, List *args, SpecialJoinInfo *sjinfo,
4365 VariableStatData *vardata1, VariableStatData *vardata2,
4366 bool *join_is_reversed)
4367{
4368 Node *left,
4369 *right;
4370
4371 if (list_length(args) != 2)
4372 elog(ERROR, "join operator should take two arguments");
4373
4374 left = (Node *) linitial(args);
4375 right = (Node *) lsecond(args);
4376
4377 examine_variable(root, left, 0, vardata1);
4378 examine_variable(root, right, 0, vardata2);
4379
4380 if (vardata1->rel &&
4381 bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4382 *join_is_reversed = true; /* var1 is on RHS */
4383 else if (vardata2->rel &&
4384 bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4385 *join_is_reversed = true; /* var2 is on LHS */
4386 else
4387 *join_is_reversed = false;
4388}
4389
4390/*
4391 * examine_variable
4392 * Try to look up statistical data about an expression.
4393 * Fill in a VariableStatData struct to describe the expression.
4394 *
4395 * Inputs:
4396 * root: the planner info
4397 * node: the expression tree to examine
4398 * varRelid: see specs for restriction selectivity functions
4399 *
4400 * Outputs: *vardata is filled as follows:
4401 * var: the input expression (with any binary relabeling stripped, if
4402 * it is or contains a variable; but otherwise the type is preserved)
4403 * rel: RelOptInfo for relation containing variable; NULL if expression
4404 * contains no Vars (NOTE this could point to a RelOptInfo of a
4405 * subquery, not one in the current query).
4406 * statsTuple: the pg_statistic entry for the variable, if one exists;
4407 * otherwise NULL.
4408 * freefunc: pointer to a function to release statsTuple with.
4409 * vartype: exposed type of the expression; this should always match
4410 * the declared input type of the operator we are estimating for.
4411 * atttype, atttypmod: actual type/typmod of the "var" expression. This is
4412 * commonly the same as the exposed type of the variable argument,
4413 * but can be different in binary-compatible-type cases.
4414 * isunique: true if we were able to match the var to a unique index or a
4415 * single-column DISTINCT clause, implying its values are unique for
4416 * this query. (Caution: this should be trusted for statistical
4417 * purposes only, since we do not check indimmediate nor verify that
4418 * the exact same definition of equality applies.)
4419 * acl_ok: true if current user has permission to read the column(s)
4420 * underlying the pg_statistic entry. This is consulted by
4421 * statistic_proc_security_check().
4422 *
4423 * Caller is responsible for doing ReleaseVariableStats() before exiting.
4424 */
4425void
4426examine_variable(PlannerInfo *root, Node *node, int varRelid,
4427 VariableStatData *vardata)
4428{
4429 Node *basenode;
4430 Relids varnos;
4431 RelOptInfo *onerel;
4432
4433 /* Make sure we don't return dangling pointers in vardata */
4434 MemSet(vardata, 0, sizeof(VariableStatData));
4435
4436 /* Save the exposed type of the expression */
4437 vardata->vartype = exprType(node);
4438
4439 /* Look inside any binary-compatible relabeling */
4440
4441 if (IsA(node, RelabelType))
4442 basenode = (Node *) ((RelabelType *) node)->arg;
4443 else
4444 basenode = node;
4445
4446 /* Fast path for a simple Var */
4447
4448 if (IsA(basenode, Var) &&
4449 (varRelid == 0 || varRelid == ((Var *) basenode)->varno))
4450 {
4451 Var *var = (Var *) basenode;
4452
4453 /* Set up result fields other than the stats tuple */
4454 vardata->var = basenode; /* return Var without relabeling */
4455 vardata->rel = find_base_rel(root, var->varno);
4456 vardata->atttype = var->vartype;
4457 vardata->atttypmod = var->vartypmod;
4458 vardata->isunique = has_unique_index(vardata->rel, var->varattno);
4459
4460 /* Try to locate some stats */
4461 examine_simple_variable(root, var, vardata);
4462
4463 return;
4464 }
4465
4466 /*
4467 * Okay, it's a more complicated expression. Determine variable
4468 * membership. Note that when varRelid isn't zero, only vars of that
4469 * relation are considered "real" vars.
4470 */
4471 varnos = pull_varnos(basenode);
4472
4473 onerel = NULL;
4474
4475 switch (bms_membership(varnos))
4476 {
4477 case BMS_EMPTY_SET:
4478 /* No Vars at all ... must be pseudo-constant clause */
4479 break;
4480 case BMS_SINGLETON:
4481 if (varRelid == 0 || bms_is_member(varRelid, varnos))
4482 {
4483 onerel = find_base_rel(root,
4484 (varRelid ? varRelid : bms_singleton_member(varnos)));
4485 vardata->rel = onerel;
4486 node = basenode; /* strip any relabeling */
4487 }
4488 /* else treat it as a constant */
4489 break;
4490 case BMS_MULTIPLE:
4491 if (varRelid == 0)
4492 {
4493 /* treat it as a variable of a join relation */
4494 vardata->rel = find_join_rel(root, varnos);
4495 node = basenode; /* strip any relabeling */
4496 }
4497 else if (bms_is_member(varRelid, varnos))
4498 {
4499 /* ignore the vars belonging to other relations */
4500 vardata->rel = find_base_rel(root, varRelid);
4501 node = basenode; /* strip any relabeling */
4502 /* note: no point in expressional-index search here */
4503 }
4504 /* else treat it as a constant */
4505 break;
4506 }
4507
4508 bms_free(varnos);
4509
4510 vardata->var = node;
4511 vardata->atttype = exprType(node);
4512 vardata->atttypmod = exprTypmod(node);
4513
4514 if (onerel)
4515 {
4516 /*
4517 * We have an expression in vars of a single relation. Try to match
4518 * it to expressional index columns, in hopes of finding some
4519 * statistics.
4520 *
4521 * Note that we consider all index columns including INCLUDE columns,
4522 * since there could be stats for such columns. But the test for
4523 * uniqueness needs to be warier.
4524 *
4525 * XXX it's conceivable that there are multiple matches with different
4526 * index opfamilies; if so, we need to pick one that matches the
4527 * operator we are estimating for. FIXME later.
4528 */
4529 ListCell *ilist;
4530
4531 foreach(ilist, onerel->indexlist)
4532 {
4533 IndexOptInfo *index = (IndexOptInfo *) lfirst(ilist);
4534 ListCell *indexpr_item;
4535 int pos;
4536
4537 indexpr_item = list_head(index->indexprs);
4538 if (indexpr_item == NULL)
4539 continue; /* no expressions here... */
4540
4541 for (pos = 0; pos < index->ncolumns; pos++)
4542 {
4543 if (index->indexkeys[pos] == 0)
4544 {
4545 Node *indexkey;
4546
4547 if (indexpr_item == NULL)
4548 elog(ERROR, "too few entries in indexprs list");
4549 indexkey = (Node *) lfirst(indexpr_item);
4550 if (indexkey && IsA(indexkey, RelabelType))
4551 indexkey = (Node *) ((RelabelType *) indexkey)->arg;
4552 if (equal(node, indexkey))
4553 {
4554 /*
4555 * Found a match ... is it a unique index? Tests here
4556 * should match has_unique_index().
4557 */
4558 if (index->unique &&
4559 index->nkeycolumns == 1 &&
4560 pos == 0 &&
4561 (index->indpred == NIL || index->predOK))
4562 vardata->isunique = true;
4563
4564 /*
4565 * Has it got stats? We only consider stats for
4566 * non-partial indexes, since partial indexes probably
4567 * don't reflect whole-relation statistics; the above
4568 * check for uniqueness is the only info we take from
4569 * a partial index.
4570 *
4571 * An index stats hook, however, must make its own
4572 * decisions about what to do with partial indexes.
4573 */
4574 if (get_index_stats_hook &&
4575 (*get_index_stats_hook) (root, index->indexoid,
4576 pos + 1, vardata))
4577 {
4578 /*
4579 * The hook took control of acquiring a stats
4580 * tuple. If it did supply a tuple, it'd better
4581 * have supplied a freefunc.
4582 */
4583 if (HeapTupleIsValid(vardata->statsTuple) &&
4584 !vardata->freefunc)
4585 elog(ERROR, "no function provided to release variable stats with");
4586 }
4587 else if (index->indpred == NIL)
4588 {
4589 vardata->statsTuple =
4590 SearchSysCache3(STATRELATTINH,
4591 ObjectIdGetDatum(index->indexoid),
4592 Int16GetDatum(pos + 1),
4593 BoolGetDatum(false));
4594 vardata->freefunc = ReleaseSysCache;
4595
4596 if (HeapTupleIsValid(vardata->statsTuple))
4597 {
4598 /* Get index's table for permission check */
4599 RangeTblEntry *rte;
4600 Oid userid;
4601
4602 rte = planner_rt_fetch(index->rel->relid, root);
4603 Assert(rte->rtekind == RTE_RELATION);
4604
4605 /*
4606 * Use checkAsUser if it's set, in case we're
4607 * accessing the table via a view.
4608 */
4609 userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4610
4611 /*
4612 * For simplicity, we insist on the whole
4613 * table being selectable, rather than trying
4614 * to identify which column(s) the index
4615 * depends on. Also require all rows to be
4616 * selectable --- there must be no
4617 * securityQuals from security barrier views
4618 * or RLS policies.
4619 */
4620 vardata->acl_ok =
4621 rte->securityQuals == NIL &&
4622 (pg_class_aclcheck(rte->relid, userid,
4623 ACL_SELECT) == ACLCHECK_OK);
4624 }
4625 else
4626 {
4627 /* suppress leakproofness checks later */
4628 vardata->acl_ok = true;
4629 }
4630 }
4631 if (vardata->statsTuple)
4632 break;
4633 }
4634 indexpr_item = lnext(indexpr_item);
4635 }
4636 }
4637 if (vardata->statsTuple)
4638 break;
4639 }
4640 }
4641}
4642
4643/*
4644 * examine_simple_variable
4645 * Handle a simple Var for examine_variable
4646 *
4647 * This is split out as a subroutine so that we can recurse to deal with
4648 * Vars referencing subqueries.
4649 *
4650 * We already filled in all the fields of *vardata except for the stats tuple.
4651 */
4652static void
4653examine_simple_variable(PlannerInfo *root, Var *var,
4654 VariableStatData *vardata)
4655{
4656 RangeTblEntry *rte = root->simple_rte_array[var->varno];
4657
4658 Assert(IsA(rte, RangeTblEntry));
4659
4660 if (get_relation_stats_hook &&
4661 (*get_relation_stats_hook) (root, rte, var->varattno, vardata))
4662 {
4663 /*
4664 * The hook took control of acquiring a stats tuple. If it did supply
4665 * a tuple, it'd better have supplied a freefunc.
4666 */
4667 if (HeapTupleIsValid(vardata->statsTuple) &&
4668 !vardata->freefunc)
4669 elog(ERROR, "no function provided to release variable stats with");
4670 }
4671 else if (rte->rtekind == RTE_RELATION)
4672 {
4673 /*
4674 * Plain table or parent of an inheritance appendrel, so look up the
4675 * column in pg_statistic
4676 */
4677 vardata->statsTuple = SearchSysCache3(STATRELATTINH,
4678 ObjectIdGetDatum(rte->relid),
4679 Int16GetDatum(var->varattno),
4680 BoolGetDatum(rte->inh));
4681 vardata->freefunc = ReleaseSysCache;
4682
4683 if (HeapTupleIsValid(vardata->statsTuple))
4684 {
4685 Oid userid;
4686
4687 /*
4688 * Check if user has permission to read this column. We require
4689 * all rows to be accessible, so there must be no securityQuals
4690 * from security barrier views or RLS policies. Use checkAsUser
4691 * if it's set, in case we're accessing the table via a view.
4692 */
4693 userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4694
4695 vardata->acl_ok =
4696 rte->securityQuals == NIL &&
4697 ((pg_class_aclcheck(rte->relid, userid,
4698 ACL_SELECT) == ACLCHECK_OK) ||
4699 (pg_attribute_aclcheck(rte->relid, var->varattno, userid,
4700 ACL_SELECT) == ACLCHECK_OK));
4701 }
4702 else
4703 {
4704 /* suppress any possible leakproofness checks later */
4705 vardata->acl_ok = true;
4706 }
4707 }
4708 else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
4709 {
4710 /*
4711 * Plain subquery (not one that was converted to an appendrel).
4712 */
4713 Query *subquery = rte->subquery;
4714 RelOptInfo *rel;
4715 TargetEntry *ste;
4716
4717 /*
4718 * Punt if it's a whole-row var rather than a plain column reference.
4719 */
4720 if (var->varattno == InvalidAttrNumber)
4721 return;
4722
4723 /*
4724 * Punt if subquery uses set operations or GROUP BY, as these will
4725 * mash underlying columns' stats beyond recognition. (Set ops are
4726 * particularly nasty; if we forged ahead, we would return stats
4727 * relevant to only the leftmost subselect...) DISTINCT is also
4728 * problematic, but we check that later because there is a possibility
4729 * of learning something even with it.
4730 */
4731 if (subquery->setOperations ||
4732 subquery->groupClause)
4733 return;
4734
4735 /*
4736 * OK, fetch RelOptInfo for subquery. Note that we don't change the
4737 * rel returned in vardata, since caller expects it to be a rel of the
4738 * caller's query level. Because we might already be recursing, we
4739 * can't use that rel pointer either, but have to look up the Var's
4740 * rel afresh.
4741 */
4742 rel = find_base_rel(root, var->varno);
4743
4744 /* If the subquery hasn't been planned yet, we have to punt */
4745 if (rel->subroot == NULL)
4746 return;
4747 Assert(IsA(rel->subroot, PlannerInfo));
4748
4749 /*
4750 * Switch our attention to the subquery as mangled by the planner. It
4751 * was okay to look at the pre-planning version for the tests above,
4752 * but now we need a Var that will refer to the subroot's live
4753 * RelOptInfos. For instance, if any subquery pullup happened during
4754 * planning, Vars in the targetlist might have gotten replaced, and we
4755 * need to see the replacement expressions.
4756 */
4757 subquery = rel->subroot->parse;
4758 Assert(IsA(subquery, Query));
4759
4760 /* Get the subquery output expression referenced by the upper Var */
4761 ste = get_tle_by_resno(subquery->targetList, var->varattno);
4762 if (ste == NULL || ste->resjunk)
4763 elog(ERROR, "subquery %s does not have attribute %d",
4764 rte->eref->aliasname, var->varattno);
4765 var = (Var *) ste->expr;
4766
4767 /*
4768 * If subquery uses DISTINCT, we can't make use of any stats for the
4769 * variable ... but, if it's the only DISTINCT column, we are entitled
4770 * to consider it unique. We do the test this way so that it works
4771 * for cases involving DISTINCT ON.
4772 */
4773 if (subquery->distinctClause)
4774 {
4775 if (list_length(subquery->distinctClause) == 1 &&
4776 targetIsInSortList(ste, InvalidOid, subquery->distinctClause))
4777 vardata->isunique = true;
4778 /* cannot go further */
4779 return;
4780 }
4781
4782 /*
4783 * If the sub-query originated from a view with the security_barrier
4784 * attribute, we must not look at the variable's statistics, though it
4785 * seems all right to notice the existence of a DISTINCT clause. So
4786 * stop here.
4787 *
4788 * This is probably a harsher restriction than necessary; it's
4789 * certainly OK for the selectivity estimator (which is a C function,
4790 * and therefore omnipotent anyway) to look at the statistics. But
4791 * many selectivity estimators will happily *invoke the operator
4792 * function* to try to work out a good estimate - and that's not OK.
4793 * So for now, don't dig down for stats.
4794 */
4795 if (rte->security_barrier)
4796 return;
4797
4798 /* Can only handle a simple Var of subquery's query level */
4799 if (var && IsA(var, Var) &&
4800 var->varlevelsup == 0)
4801 {
4802 /*
4803 * OK, recurse into the subquery. Note that the original setting
4804 * of vardata->isunique (which will surely be false) is left
4805 * unchanged in this situation. That's what we want, since even
4806 * if the underlying column is unique, the subquery may have
4807 * joined to other tables in a way that creates duplicates.
4808 */
4809 examine_simple_variable(rel->subroot, var, vardata);
4810 }
4811 }
4812 else
4813 {
4814 /*
4815 * Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE. (We
4816 * won't see RTE_JOIN here because join alias Vars have already been
4817 * flattened.) There's not much we can do with function outputs, but
4818 * maybe someday try to be smarter about VALUES and/or CTEs.
4819 */
4820 }
4821}
4822
4823/*
4824 * Check whether it is permitted to call func_oid passing some of the
4825 * pg_statistic data in vardata. We allow this either if the user has SELECT
4826 * privileges on the table or column underlying the pg_statistic data or if
4827 * the function is marked leak-proof.
4828 */
4829bool
4830statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
4831{
4832 if (vardata->acl_ok)
4833 return true;
4834
4835 if (!OidIsValid(func_oid))
4836 return false;
4837
4838 if (get_func_leakproof(func_oid))
4839 return true;
4840
4841 ereport(DEBUG2,
4842 (errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
4843 get_func_name(func_oid))));
4844 return false;
4845}
4846
4847/*
4848 * get_variable_numdistinct
4849 * Estimate the number of distinct values of a variable.
4850 *
4851 * vardata: results of examine_variable
4852 * *isdefault: set to true if the result is a default rather than based on
4853 * anything meaningful.
4854 *
4855 * NB: be careful to produce a positive integral result, since callers may
4856 * compare the result to exact integer counts, or might divide by it.
4857 */
4858double
4859get_variable_numdistinct(VariableStatData *vardata, bool *isdefault)
4860{
4861 double stadistinct;
4862 double stanullfrac = 0.0;
4863 double ntuples;
4864
4865 *isdefault = false;
4866
4867 /*
4868 * Determine the stadistinct value to use. There are cases where we can
4869 * get an estimate even without a pg_statistic entry, or can get a better
4870 * value than is in pg_statistic. Grab stanullfrac too if we can find it
4871 * (otherwise, assume no nulls, for lack of any better idea).
4872 */
4873 if (HeapTupleIsValid(vardata->statsTuple))
4874 {
4875 /* Use the pg_statistic entry */
4876 Form_pg_statistic stats;
4877
4878 stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
4879 stadistinct = stats->stadistinct;
4880 stanullfrac = stats->stanullfrac;
4881 }
4882 else if (vardata->vartype == BOOLOID)
4883 {
4884 /*
4885 * Special-case boolean columns: presumably, two distinct values.
4886 *
4887 * Are there any other datatypes we should wire in special estimates
4888 * for?
4889 */
4890 stadistinct = 2.0;
4891 }
4892 else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
4893 {
4894 /*
4895 * If the Var represents a column of a VALUES RTE, assume it's unique.
4896 * This could of course be very wrong, but it should tend to be true
4897 * in well-written queries. We could consider examining the VALUES'
4898 * contents to get some real statistics; but that only works if the
4899 * entries are all constants, and it would be pretty expensive anyway.
4900 */
4901 stadistinct = -1.0; /* unique (and all non null) */
4902 }
4903 else
4904 {
4905 /*
4906 * We don't keep statistics for system columns, but in some cases we
4907 * can infer distinctness anyway.
4908 */
4909 if (vardata->var && IsA(vardata->var, Var))
4910 {
4911 switch (((Var *) vardata->var)->varattno)
4912 {
4913 case SelfItemPointerAttributeNumber:
4914 stadistinct = -1.0; /* unique (and all non null) */
4915 break;
4916 case TableOidAttributeNumber:
4917 stadistinct = 1.0; /* only 1 value */
4918 break;
4919 default:
4920 stadistinct = 0.0; /* means "unknown" */
4921 break;
4922 }
4923 }
4924 else
4925 stadistinct = 0.0; /* means "unknown" */
4926
4927 /*
4928 * XXX consider using estimate_num_groups on expressions?
4929 */
4930 }
4931
4932 /*
4933 * If there is a unique index or DISTINCT clause for the variable, assume
4934 * it is unique no matter what pg_statistic says; the statistics could be
4935 * out of date, or we might have found a partial unique index that proves
4936 * the var is unique for this query. However, we'd better still believe
4937 * the null-fraction statistic.
4938 */
4939 if (vardata->isunique)
4940 stadistinct = -1.0 * (1.0 - stanullfrac);
4941
4942 /*
4943 * If we had an absolute estimate, use that.
4944 */
4945 if (stadistinct > 0.0)
4946 return clamp_row_est(stadistinct);
4947
4948 /*
4949 * Otherwise we need to get the relation size; punt if not available.
4950 */
4951 if (vardata->rel == NULL)
4952 {
4953 *isdefault = true;
4954 return DEFAULT_NUM_DISTINCT;
4955 }
4956 ntuples = vardata->rel->tuples;
4957 if (ntuples <= 0.0)
4958 {
4959 *isdefault = true;
4960 return DEFAULT_NUM_DISTINCT;
4961 }
4962
4963 /*
4964 * If we had a relative estimate, use that.
4965 */
4966 if (stadistinct < 0.0)
4967 return clamp_row_est(-stadistinct * ntuples);
4968
4969 /*
4970 * With no data, estimate ndistinct = ntuples if the table is small, else
4971 * use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
4972 * that the behavior isn't discontinuous.
4973 */
4974 if (ntuples < DEFAULT_NUM_DISTINCT)
4975 return clamp_row_est(ntuples);
4976
4977 *isdefault = true;
4978 return DEFAULT_NUM_DISTINCT;
4979}
4980
4981/*
4982 * get_variable_range
4983 * Estimate the minimum and maximum value of the specified variable.
4984 * If successful, store values in *min and *max, and return true.
4985 * If no data available, return false.
4986 *
4987 * sortop is the "<" comparison operator to use. This should generally
4988 * be "<" not ">", as only the former is likely to be found in pg_statistic.
4989 */
4990static bool
4991get_variable_range(PlannerInfo *root, VariableStatData *vardata, Oid sortop,
4992 Datum *min, Datum *max)
4993{
4994 Datum tmin = 0;
4995 Datum tmax = 0;
4996 bool have_data = false;
4997 int16 typLen;
4998 bool typByVal;
4999 Oid opfuncoid;
5000 AttStatsSlot sslot;
5001 int i;
5002
5003 /*
5004 * XXX It's very tempting to try to use the actual column min and max, if
5005 * we can get them relatively-cheaply with an index probe. However, since
5006 * this function is called many times during join planning, that could
5007 * have unpleasant effects on planning speed. Need more investigation
5008 * before enabling this.
5009 */
5010#ifdef NOT_USED
5011 if (get_actual_variable_range(root, vardata, sortop, min, max))
5012 return true;
5013#endif
5014
5015 if (!HeapTupleIsValid(vardata->statsTuple))
5016 {
5017 /* no stats available, so default result */
5018 return false;
5019 }
5020
5021 /*
5022 * If we can't apply the sortop to the stats data, just fail. In
5023 * principle, if there's a histogram and no MCVs, we could return the
5024 * histogram endpoints without ever applying the sortop ... but it's
5025 * probably not worth trying, because whatever the caller wants to do with
5026 * the endpoints would likely fail the security check too.
5027 */
5028 if (!statistic_proc_security_check(vardata,
5029 (opfuncoid = get_opcode(sortop))))
5030 return false;
5031
5032 get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5033
5034 /*
5035 * If there is a histogram, grab the first and last values.
5036 *
5037 * If there is a histogram that is sorted with some other operator than
5038 * the one we want, fail --- this suggests that there is data we can't
5039 * use.
5040 */
5041 if (get_attstatsslot(&sslot, vardata->statsTuple,
5042 STATISTIC_KIND_HISTOGRAM, sortop,
5043 ATTSTATSSLOT_VALUES))
5044 {
5045 if (sslot.nvalues > 0)
5046 {
5047 tmin = datumCopy(sslot.values[0], typByVal, typLen);
5048 tmax = datumCopy(sslot.values[sslot.nvalues - 1], typByVal, typLen);
5049 have_data = true;
5050 }
5051 free_attstatsslot(&sslot);
5052 }
5053 else if (get_attstatsslot(&sslot, vardata->statsTuple,
5054 STATISTIC_KIND_HISTOGRAM, InvalidOid,
5055 0))
5056 {
5057 free_attstatsslot(&sslot);
5058 return false;
5059 }
5060
5061 /*
5062 * If we have most-common-values info, look for extreme MCVs. This is
5063 * needed even if we also have a histogram, since the histogram excludes
5064 * the MCVs. However, usually the MCVs will not be the extreme values, so
5065 * avoid unnecessary data copying.
5066 */
5067 if (get_attstatsslot(&sslot, vardata->statsTuple,
5068 STATISTIC_KIND_MCV, InvalidOid,
5069 ATTSTATSSLOT_VALUES))
5070 {
5071 bool tmin_is_mcv = false;
5072 bool tmax_is_mcv = false;
5073 FmgrInfo opproc;
5074
5075 fmgr_info(opfuncoid, &opproc);
5076
5077 for (i = 0; i < sslot.nvalues; i++)
5078 {
5079 if (!have_data)
5080 {
5081 tmin = tmax = sslot.values[i];
5082 tmin_is_mcv = tmax_is_mcv = have_data = true;
5083 continue;
5084 }
5085 if (DatumGetBool(FunctionCall2Coll(&opproc,
5086 sslot.stacoll,
5087 sslot.values[i], tmin)))
5088 {
5089 tmin = sslot.values[i];
5090 tmin_is_mcv = true;
5091 }
5092 if (DatumGetBool(FunctionCall2Coll(&opproc,
5093 sslot.stacoll,
5094 tmax, sslot.values[i])))
5095 {
5096 tmax = sslot.values[i];
5097 tmax_is_mcv = true;
5098 }
5099 }
5100 if (tmin_is_mcv)
5101 tmin = datumCopy(tmin, typByVal, typLen);
5102 if (tmax_is_mcv)
5103 tmax = datumCopy(tmax, typByVal, typLen);
5104 free_attstatsslot(&sslot);
5105 }
5106
5107 *min = tmin;
5108 *max = tmax;
5109 return have_data;
5110}
5111
5112
5113/*
5114 * get_actual_variable_range
5115 * Attempt to identify the current *actual* minimum and/or maximum
5116 * of the specified variable, by looking for a suitable btree index
5117 * and fetching its low and/or high values.
5118 * If successful, store values in *min and *max, and return true.
5119 * (Either pointer can be NULL if that endpoint isn't needed.)
5120 * If no data available, return false.
5121 *
5122 * sortop is the "<" comparison operator to use.
5123 */
5124static bool
5125get_actual_variable_range(PlannerInfo *root, VariableStatData *vardata,
5126 Oid sortop,
5127 Datum *min, Datum *max)
5128{
5129 bool have_data = false;
5130 RelOptInfo *rel = vardata->rel;
5131 RangeTblEntry *rte;
5132 ListCell *lc;
5133
5134 /* No hope if no relation or it doesn't have indexes */
5135 if (rel == NULL || rel->indexlist == NIL)
5136 return false;
5137 /* If it has indexes it must be a plain relation */
5138 rte = root->simple_rte_array[rel->relid];
5139 Assert(rte->rtekind == RTE_RELATION);
5140
5141 /* Search through the indexes to see if any match our problem */
5142 foreach(lc, rel->indexlist)
5143 {
5144 IndexOptInfo *index = (IndexOptInfo *) lfirst(lc);
5145 ScanDirection indexscandir;
5146
5147 /* Ignore non-btree indexes */
5148 if (index->relam != BTREE_AM_OID)
5149 continue;
5150
5151 /*
5152 * Ignore partial indexes --- we only want stats that cover the entire
5153 * relation.
5154 */
5155 if (index->indpred != NIL)
5156 continue;
5157
5158 /*
5159 * The index list might include hypothetical indexes inserted by a
5160 * get_relation_info hook --- don't try to access them.
5161 */
5162 if (index->hypothetical)
5163 continue;
5164
5165 /*
5166 * The first index column must match the desired variable and sort
5167 * operator --- but we can use a descending-order index.
5168 */
5169 if (!match_index_to_operand(vardata->var, 0, index))
5170 continue;
5171 switch (get_op_opfamily_strategy(sortop, index->sortopfamily[0]))
5172 {
5173 case BTLessStrategyNumber:
5174 if (index->reverse_sort[0])
5175 indexscandir = BackwardScanDirection;
5176 else
5177 indexscandir = ForwardScanDirection;
5178 break;
5179 case BTGreaterStrategyNumber:
5180 if (index->reverse_sort[0])
5181 indexscandir = ForwardScanDirection;
5182 else
5183 indexscandir = BackwardScanDirection;
5184 break;
5185 default:
5186 /* index doesn't match the sortop */
5187 continue;
5188 }
5189
5190 /*
5191 * Found a suitable index to extract data from. Set up some data that
5192 * can be used by both invocations of get_actual_variable_endpoint.
5193 */
5194 {
5195 MemoryContext tmpcontext;
5196 MemoryContext oldcontext;
5197 Relation heapRel;
5198 Relation indexRel;
5199 TupleTableSlot *slot;
5200 int16 typLen;
5201 bool typByVal;
5202 ScanKeyData scankeys[1];
5203
5204 /* Make sure any cruft gets recycled when we're done */
5205 tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
5206 "get_actual_variable_range workspace",
5207 ALLOCSET_DEFAULT_SIZES);
5208 oldcontext = MemoryContextSwitchTo(tmpcontext);
5209
5210 /*
5211 * Open the table and index so we can read from them. We should
5212 * already have some type of lock on each.
5213 */
5214 heapRel = table_open(rte->relid, NoLock);
5215 indexRel = index_open(index->indexoid, NoLock);
5216
5217 /* build some stuff needed for indexscan execution */
5218 slot = table_slot_create(heapRel, NULL);
5219 get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5220
5221 /* set up an IS NOT NULL scan key so that we ignore nulls */
5222 ScanKeyEntryInitialize(&scankeys[0],
5223 SK_ISNULL | SK_SEARCHNOTNULL,
5224 1, /* index col to scan */
5225 InvalidStrategy, /* no strategy */
5226 InvalidOid, /* no strategy subtype */
5227 InvalidOid, /* no collation */
5228 InvalidOid, /* no reg proc for this */
5229 (Datum) 0); /* constant */
5230
5231 /* If min is requested ... */
5232 if (min)
5233 {
5234 have_data = get_actual_variable_endpoint(heapRel,
5235 indexRel,
5236 indexscandir,
5237 scankeys,
5238 typLen,
5239 typByVal,
5240 slot,
5241 oldcontext,
5242 min);
5243 }
5244 else
5245 {
5246 /* If min not requested, assume index is nonempty */
5247 have_data = true;
5248 }
5249
5250 /* If max is requested, and we didn't find the index is empty */
5251 if (max && have_data)
5252 {
5253 /* scan in the opposite direction; all else is the same */
5254 have_data = get_actual_variable_endpoint(heapRel,
5255 indexRel,
5256 -indexscandir,
5257 scankeys,
5258 typLen,
5259 typByVal,
5260 slot,
5261 oldcontext,
5262 max);
5263 }
5264
5265 /* Clean everything up */
5266 ExecDropSingleTupleTableSlot(slot);
5267
5268 index_close(indexRel, NoLock);
5269 table_close(heapRel, NoLock);
5270
5271 MemoryContextSwitchTo(oldcontext);
5272 MemoryContextDelete(tmpcontext);
5273
5274 /* And we're done */
5275 break;
5276 }
5277 }
5278
5279 return have_data;
5280}
5281
5282/*
5283 * Get one endpoint datum (min or max depending on indexscandir) from the
5284 * specified index. Return true if successful, false if index is empty.
5285 * On success, endpoint value is stored to *endpointDatum (and copied into
5286 * outercontext).
5287 *
5288 * scankeys is a 1-element scankey array set up to reject nulls.
5289 * typLen/typByVal describe the datatype of the index's first column.
5290 * tableslot is a slot suitable to hold table tuples, in case we need
5291 * to probe the heap.
5292 * (We could compute these values locally, but that would mean computing them
5293 * twice when get_actual_variable_range needs both the min and the max.)
5294 */
5295static bool
5296get_actual_variable_endpoint(Relation heapRel,
5297 Relation indexRel,
5298 ScanDirection indexscandir,
5299 ScanKey scankeys,
5300 int16 typLen,
5301 bool typByVal,
5302 TupleTableSlot *tableslot,
5303 MemoryContext outercontext,
5304 Datum *endpointDatum)
5305{
5306 bool have_data = false;
5307 SnapshotData SnapshotNonVacuumable;
5308 IndexScanDesc index_scan;
5309 Buffer vmbuffer = InvalidBuffer;
5310 ItemPointer tid;
5311 Datum values[INDEX_MAX_KEYS];
5312 bool isnull[INDEX_MAX_KEYS];
5313 MemoryContext oldcontext;
5314
5315 /*
5316 * We use the index-only-scan machinery for this. With mostly-static
5317 * tables that's a win because it avoids a heap visit. It's also a win
5318 * for dynamic data, but the reason is less obvious; read on for details.
5319 *
5320 * In principle, we should scan the index with our current active
5321 * snapshot, which is the best approximation we've got to what the query
5322 * will see when executed. But that won't be exact if a new snap is taken
5323 * before running the query, and it can be very expensive if a lot of
5324 * recently-dead or uncommitted rows exist at the beginning or end of the
5325 * index (because we'll laboriously fetch each one and reject it).
5326 * Instead, we use SnapshotNonVacuumable. That will accept recently-dead
5327 * and uncommitted rows as well as normal visible rows. On the other
5328 * hand, it will reject known-dead rows, and thus not give a bogus answer
5329 * when the extreme value has been deleted (unless the deletion was quite
5330 * recent); that case motivates not using SnapshotAny here.
5331 *
5332 * A crucial point here is that SnapshotNonVacuumable, with
5333 * RecentGlobalXmin as horizon, yields the inverse of the condition that
5334 * the indexscan will use to decide that index entries are killable (see
5335 * heap_hot_search_buffer()). Therefore, if the snapshot rejects a tuple
5336 * (or more precisely, all tuples of a HOT chain) and we have to continue
5337 * scanning past it, we know that the indexscan will mark that index entry
5338 * killed. That means that the next get_actual_variable_endpoint() call
5339 * will not have to re-consider that index entry. In this way we avoid
5340 * repetitive work when this function is used a lot during planning.
5341 *
5342 * But using SnapshotNonVacuumable creates a hazard of its own. In a
5343 * recently-created index, some index entries may point at "broken" HOT
5344 * chains in which not all the tuple versions contain data matching the
5345 * index entry. The live tuple version(s) certainly do match the index,
5346 * but SnapshotNonVacuumable can accept recently-dead tuple versions that
5347 * don't match. Hence, if we took data from the selected heap tuple, we
5348 * might get a bogus answer that's not close to the index extremal value,
5349 * or could even be NULL. We avoid this hazard because we take the data
5350 * from the index entry not the heap.
5351 */
5352 InitNonVacuumableSnapshot(SnapshotNonVacuumable, RecentGlobalXmin);
5353
5354 index_scan = index_beginscan(heapRel, indexRel,
5355 &SnapshotNonVacuumable,
5356 1, 0);
5357 /* Set it up for index-only scan */
5358 index_scan->xs_want_itup = true;
5359 index_rescan(index_scan, scankeys, 1, NULL, 0);
5360
5361 /* Fetch first/next tuple in specified direction */
5362 while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL)
5363 {
5364 if (!VM_ALL_VISIBLE(heapRel,
5365 ItemPointerGetBlockNumber(tid),
5366 &vmbuffer))
5367 {
5368 /* Rats, we have to visit the heap to check visibility */
5369 if (!index_fetch_heap(index_scan, tableslot))
5370 continue; /* no visible tuple, try next index entry */
5371
5372 /* We don't actually need the heap tuple for anything */
5373 ExecClearTuple(tableslot);
5374
5375 /*
5376 * We don't care whether there's more than one visible tuple in
5377 * the HOT chain; if any are visible, that's good enough.
5378 */
5379 }
5380
5381 /*
5382 * We expect that btree will return data in IndexTuple not HeapTuple
5383 * format. It's not lossy either.
5384 */
5385 if (!index_scan->xs_itup)
5386 elog(ERROR, "no data returned for index-only scan");
5387 if (index_scan->xs_recheck)
5388 elog(ERROR, "unexpected recheck indication from btree");
5389
5390 /* OK to deconstruct the index tuple */
5391 index_deform_tuple(index_scan->xs_itup,
5392 index_scan->xs_itupdesc,
5393 values, isnull);
5394
5395 /* Shouldn't have got a null, but be careful */
5396 if (isnull[0])
5397 elog(ERROR, "found unexpected null value in index \"%s\"",
5398 RelationGetRelationName(indexRel));
5399
5400 /* Copy the index column value out to caller's context */
5401 oldcontext = MemoryContextSwitchTo(outercontext);
5402 *endpointDatum = datumCopy(values[0], typByVal, typLen);
5403 MemoryContextSwitchTo(oldcontext);
5404 have_data = true;
5405 break;
5406 }
5407
5408 if (vmbuffer != InvalidBuffer)
5409 ReleaseBuffer(vmbuffer);
5410 index_endscan(index_scan);
5411
5412 return have_data;
5413}
5414
5415/*
5416 * find_join_input_rel
5417 * Look up the input relation for a join.
5418 *
5419 * We assume that the input relation's RelOptInfo must have been constructed
5420 * already.
5421 */
5422static RelOptInfo *
5423find_join_input_rel(PlannerInfo *root, Relids relids)
5424{
5425 RelOptInfo *rel = NULL;
5426
5427 switch (bms_membership(relids))
5428 {
5429 case BMS_EMPTY_SET:
5430 /* should not happen */
5431 break;
5432 case BMS_SINGLETON:
5433 rel = find_base_rel(root, bms_singleton_member(relids));
5434 break;
5435 case BMS_MULTIPLE:
5436 rel = find_join_rel(root, relids);
5437 break;
5438 }
5439
5440 if (rel == NULL)
5441 elog(ERROR, "could not find RelOptInfo for given relids");
5442
5443 return rel;
5444}
5445
5446
5447/*-------------------------------------------------------------------------
5448 *
5449 * Index cost estimation functions
5450 *
5451 *-------------------------------------------------------------------------
5452 */
5453
5454/*
5455 * Extract the actual indexquals (as RestrictInfos) from an IndexClause list
5456 */
5457List *
5458get_quals_from_indexclauses(List *indexclauses)
5459{
5460 List *result = NIL;
5461 ListCell *lc;
5462
5463 foreach(lc, indexclauses)
5464 {
5465 IndexClause *iclause = lfirst_node(IndexClause, lc);
5466 ListCell *lc2;
5467
5468 foreach(lc2, iclause->indexquals)
5469 {
5470 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
5471
5472 result = lappend(result, rinfo);
5473 }
5474 }
5475 return result;
5476}
5477
5478/*
5479 * Compute the total evaluation cost of the comparison operands in a list
5480 * of index qual expressions. Since we know these will be evaluated just
5481 * once per scan, there's no need to distinguish startup from per-row cost.
5482 *
5483 * This can be used either on the result of get_quals_from_indexclauses(),
5484 * or directly on an indexorderbys list. In both cases, we expect that the
5485 * index key expression is on the left side of binary clauses.
5486 */
5487Cost
5488index_other_operands_eval_cost(PlannerInfo *root, List *indexquals)
5489{
5490 Cost qual_arg_cost = 0;
5491 ListCell *lc;
5492
5493 foreach(lc, indexquals)
5494 {
5495 Expr *clause = (Expr *) lfirst(lc);
5496 Node *other_operand;
5497 QualCost index_qual_cost;
5498
5499 /*
5500 * Index quals will have RestrictInfos, indexorderbys won't. Look
5501 * through RestrictInfo if present.
5502 */
5503 if (IsA(clause, RestrictInfo))
5504 clause = ((RestrictInfo *) clause)->clause;
5505
5506 if (IsA(clause, OpExpr))
5507 {
5508 OpExpr *op = (OpExpr *) clause;
5509
5510 other_operand = (Node *) lsecond(op->args);
5511 }
5512 else if (IsA(clause, RowCompareExpr))
5513 {
5514 RowCompareExpr *rc = (RowCompareExpr *) clause;
5515
5516 other_operand = (Node *) rc->rargs;
5517 }
5518 else if (IsA(clause, ScalarArrayOpExpr))
5519 {
5520 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
5521
5522 other_operand = (Node *) lsecond(saop->args);
5523 }
5524 else if (IsA(clause, NullTest))
5525 {
5526 other_operand = NULL;
5527 }
5528 else
5529 {
5530 elog(ERROR, "unsupported indexqual type: %d",
5531 (int) nodeTag(clause));
5532 other_operand = NULL; /* keep compiler quiet */
5533 }
5534
5535 cost_qual_eval_node(&index_qual_cost, other_operand, root);
5536 qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
5537 }
5538 return qual_arg_cost;
5539}
5540
5541void
5542genericcostestimate(PlannerInfo *root,
5543 IndexPath *path,
5544 double loop_count,
5545 GenericCosts *costs)
5546{
5547 IndexOptInfo *index = path->indexinfo;
5548 List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
5549 List *indexOrderBys = path->indexorderbys;
5550 Cost indexStartupCost;
5551 Cost indexTotalCost;
5552 Selectivity indexSelectivity;
5553 double indexCorrelation;
5554 double numIndexPages;
5555 double numIndexTuples;
5556 double spc_random_page_cost;
5557 double num_sa_scans;
5558 double num_outer_scans;
5559 double num_scans;
5560 double qual_op_cost;
5561 double qual_arg_cost;
5562 List *selectivityQuals;
5563 ListCell *l;
5564
5565 /*
5566 * If the index is partial, AND the index predicate with the explicitly
5567 * given indexquals to produce a more accurate idea of the index
5568 * selectivity.
5569 */
5570 selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
5571
5572 /*
5573 * Check for ScalarArrayOpExpr index quals, and estimate the number of
5574 * index scans that will be performed.
5575 */
5576 num_sa_scans = 1;
5577 foreach(l, indexQuals)
5578 {
5579 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
5580
5581 if (IsA(rinfo->clause, ScalarArrayOpExpr))
5582 {
5583 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) rinfo->clause;
5584 int alength = estimate_array_length(lsecond(saop->args));
5585
5586 if (alength > 1)
5587 num_sa_scans *= alength;
5588 }
5589 }
5590
5591 /* Estimate the fraction of main-table tuples that will be visited */
5592 indexSelectivity = clauselist_selectivity(root, selectivityQuals,
5593 index->rel->relid,
5594 JOIN_INNER,
5595 NULL);
5596
5597 /*
5598 * If caller didn't give us an estimate, estimate the number of index
5599 * tuples that will be visited. We do it in this rather peculiar-looking
5600 * way in order to get the right answer for partial indexes.
5601 */
5602 numIndexTuples = costs->numIndexTuples;
5603 if (numIndexTuples <= 0.0)
5604 {
5605 numIndexTuples = indexSelectivity * index->rel->tuples;
5606
5607 /*
5608 * The above calculation counts all the tuples visited across all
5609 * scans induced by ScalarArrayOpExpr nodes. We want to consider the
5610 * average per-indexscan number, so adjust. This is a handy place to
5611 * round to integer, too. (If caller supplied tuple estimate, it's
5612 * responsible for handling these considerations.)
5613 */
5614 numIndexTuples = rint(numIndexTuples / num_sa_scans);
5615 }
5616
5617 /*
5618 * We can bound the number of tuples by the index size in any case. Also,
5619 * always estimate at least one tuple is touched, even when
5620 * indexSelectivity estimate is tiny.
5621 */
5622 if (numIndexTuples > index->tuples)
5623 numIndexTuples = index->tuples;
5624 if (numIndexTuples < 1.0)
5625 numIndexTuples = 1.0;
5626
5627 /*
5628 * Estimate the number of index pages that will be retrieved.
5629 *
5630 * We use the simplistic method of taking a pro-rata fraction of the total
5631 * number of index pages. In effect, this counts only leaf pages and not
5632 * any overhead such as index metapage or upper tree levels.
5633 *
5634 * In practice access to upper index levels is often nearly free because
5635 * those tend to stay in cache under load; moreover, the cost involved is
5636 * highly dependent on index type. We therefore ignore such costs here
5637 * and leave it to the caller to add a suitable charge if needed.
5638 */
5639 if (index->pages > 1 && index->tuples > 1)
5640 numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
5641 else
5642 numIndexPages = 1.0;
5643
5644 /* fetch estimated page cost for tablespace containing index */
5645 get_tablespace_page_costs(index->reltablespace,
5646 &spc_random_page_cost,
5647 NULL);
5648
5649 /*
5650 * Now compute the disk access costs.
5651 *
5652 * The above calculations are all per-index-scan. However, if we are in a
5653 * nestloop inner scan, we can expect the scan to be repeated (with
5654 * different search keys) for each row of the outer relation. Likewise,
5655 * ScalarArrayOpExpr quals result in multiple index scans. This creates
5656 * the potential for cache effects to reduce the number of disk page
5657 * fetches needed. We want to estimate the average per-scan I/O cost in
5658 * the presence of caching.
5659 *
5660 * We use the Mackert-Lohman formula (see costsize.c for details) to
5661 * estimate the total number of page fetches that occur. While this
5662 * wasn't what it was designed for, it seems a reasonable model anyway.
5663 * Note that we are counting pages not tuples anymore, so we take N = T =
5664 * index size, as if there were one "tuple" per page.
5665 */
5666 num_outer_scans = loop_count;
5667 num_scans = num_sa_scans * num_outer_scans;
5668
5669 if (num_scans > 1)
5670 {
5671 double pages_fetched;
5672
5673 /* total page fetches ignoring cache effects */
5674 pages_fetched = numIndexPages * num_scans;
5675
5676 /* use Mackert and Lohman formula to adjust for cache effects */
5677 pages_fetched = index_pages_fetched(pages_fetched,
5678 index->pages,
5679 (double) index->pages,
5680 root);
5681
5682 /*
5683 * Now compute the total disk access cost, and then report a pro-rated
5684 * share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
5685 * since that's internal to the indexscan.)
5686 */
5687 indexTotalCost = (pages_fetched * spc_random_page_cost)
5688 / num_outer_scans;
5689 }
5690 else
5691 {
5692 /*
5693 * For a single index scan, we just charge spc_random_page_cost per
5694 * page touched.
5695 */
5696 indexTotalCost = numIndexPages * spc_random_page_cost;
5697 }
5698
5699 /*
5700 * CPU cost: any complex expressions in the indexquals will need to be
5701 * evaluated once at the start of the scan to reduce them to runtime keys
5702 * to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
5703 * CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
5704 * indexqual operator. Because we have numIndexTuples as a per-scan
5705 * number, we have to multiply by num_sa_scans to get the correct result
5706 * for ScalarArrayOpExpr cases. Similarly add in costs for any index
5707 * ORDER BY expressions.
5708 *
5709 * Note: this neglects the possible costs of rechecking lossy operators.
5710 * Detecting that that might be needed seems more expensive than it's
5711 * worth, though, considering all the other inaccuracies here ...
5712 */
5713 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) +
5714 index_other_operands_eval_cost(root, indexOrderBys);
5715 qual_op_cost = cpu_operator_cost *
5716 (list_length(indexQuals) + list_length(indexOrderBys));
5717
5718 indexStartupCost = qual_arg_cost;
5719 indexTotalCost += qual_arg_cost;
5720 indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
5721
5722 /*
5723 * Generic assumption about index correlation: there isn't any.
5724 */
5725 indexCorrelation = 0.0;
5726
5727 /*
5728 * Return everything to caller.
5729 */
5730 costs->indexStartupCost = indexStartupCost;
5731 costs->indexTotalCost = indexTotalCost;
5732 costs->indexSelectivity = indexSelectivity;
5733 costs->indexCorrelation = indexCorrelation;
5734 costs->numIndexPages = numIndexPages;
5735 costs->numIndexTuples = numIndexTuples;
5736 costs->spc_random_page_cost = spc_random_page_cost;
5737 costs->num_sa_scans = num_sa_scans;
5738}
5739
5740/*
5741 * If the index is partial, add its predicate to the given qual list.
5742 *
5743 * ANDing the index predicate with the explicitly given indexquals produces
5744 * a more accurate idea of the index's selectivity. However, we need to be
5745 * careful not to insert redundant clauses, because clauselist_selectivity()
5746 * is easily fooled into computing a too-low selectivity estimate. Our
5747 * approach is to add only the predicate clause(s) that cannot be proven to
5748 * be implied by the given indexquals. This successfully handles cases such
5749 * as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
5750 * There are many other cases where we won't detect redundancy, leading to a
5751 * too-low selectivity estimate, which will bias the system in favor of using
5752 * partial indexes where possible. That is not necessarily bad though.
5753 *
5754 * Note that indexQuals contains RestrictInfo nodes while the indpred
5755 * does not, so the output list will be mixed. This is OK for both
5756 * predicate_implied_by() and clauselist_selectivity(), but might be
5757 * problematic if the result were passed to other things.
5758 */
5759List *
5760add_predicate_to_index_quals(IndexOptInfo *index, List *indexQuals)
5761{
5762 List *predExtraQuals = NIL;
5763 ListCell *lc;
5764
5765 if (index->indpred == NIL)
5766 return indexQuals;
5767
5768 foreach(lc, index->indpred)
5769 {
5770 Node *predQual = (Node *) lfirst(lc);
5771 List *oneQual = list_make1(predQual);
5772
5773 if (!predicate_implied_by(oneQual, indexQuals, false))
5774 predExtraQuals = list_concat(predExtraQuals, oneQual);
5775 }
5776 /* list_concat avoids modifying the passed-in indexQuals list */
5777 return list_concat(predExtraQuals, indexQuals);
5778}
5779
5780
5781void
5782btcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
5783 Cost *indexStartupCost, Cost *indexTotalCost,
5784 Selectivity *indexSelectivity, double *indexCorrelation,
5785 double *indexPages)
5786{
5787 IndexOptInfo *index = path->indexinfo;
5788 GenericCosts costs;
5789 Oid relid;
5790 AttrNumber colnum;
5791 VariableStatData vardata;
5792 double numIndexTuples;
5793 Cost descentCost;
5794 List *indexBoundQuals;
5795 int indexcol;
5796 bool eqQualHere;
5797 bool found_saop;
5798 bool found_is_null_op;
5799 double num_sa_scans;
5800 ListCell *lc;
5801
5802 /*
5803 * For a btree scan, only leading '=' quals plus inequality quals for the
5804 * immediately next attribute contribute to index selectivity (these are
5805 * the "boundary quals" that determine the starting and stopping points of
5806 * the index scan). Additional quals can suppress visits to the heap, so
5807 * it's OK to count them in indexSelectivity, but they should not count
5808 * for estimating numIndexTuples. So we must examine the given indexquals
5809 * to find out which ones count as boundary quals. We rely on the
5810 * knowledge that they are given in index column order.
5811 *
5812 * For a RowCompareExpr, we consider only the first column, just as
5813 * rowcomparesel() does.
5814 *
5815 * If there's a ScalarArrayOpExpr in the quals, we'll actually perform N
5816 * index scans not one, but the ScalarArrayOpExpr's operator can be
5817 * considered to act the same as it normally does.
5818 */
5819 indexBoundQuals = NIL;
5820 indexcol = 0;
5821 eqQualHere = false;
5822 found_saop = false;
5823 found_is_null_op = false;
5824 num_sa_scans = 1;
5825 foreach(lc, path->indexclauses)
5826 {
5827 IndexClause *iclause = lfirst_node(IndexClause, lc);
5828 ListCell *lc2;
5829
5830 if (indexcol != iclause->indexcol)
5831 {
5832 /* Beginning of a new column's quals */
5833 if (!eqQualHere)
5834 break; /* done if no '=' qual for indexcol */
5835 eqQualHere = false;
5836 indexcol++;
5837 if (indexcol != iclause->indexcol)
5838 break; /* no quals at all for indexcol */
5839 }
5840
5841 /* Examine each indexqual associated with this index clause */
5842 foreach(lc2, iclause->indexquals)
5843 {
5844 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
5845 Expr *clause = rinfo->clause;
5846 Oid clause_op = InvalidOid;
5847 int op_strategy;
5848
5849 if (IsA(clause, OpExpr))
5850 {
5851 OpExpr *op = (OpExpr *) clause;
5852
5853 clause_op = op->opno;
5854 }
5855 else if (IsA(clause, RowCompareExpr))
5856 {
5857 RowCompareExpr *rc = (RowCompareExpr *) clause;
5858
5859 clause_op = linitial_oid(rc->opnos);
5860 }
5861 else if (IsA(clause, ScalarArrayOpExpr))
5862 {
5863 ScalarArrayOpExpr *saop = (ScalarArrayOpExpr *) clause;
5864 Node *other_operand = (Node *) lsecond(saop->args);
5865 int alength = estimate_array_length(other_operand);
5866
5867 clause_op = saop->opno;
5868 found_saop = true;
5869 /* count number of SA scans induced by indexBoundQuals only */
5870 if (alength > 1)
5871 num_sa_scans *= alength;
5872 }
5873 else if (IsA(clause, NullTest))
5874 {
5875 NullTest *nt = (NullTest *) clause;
5876
5877 if (nt->nulltesttype == IS_NULL)
5878 {
5879 found_is_null_op = true;
5880 /* IS NULL is like = for selectivity purposes */
5881 eqQualHere = true;
5882 }
5883 }
5884 else
5885 elog(ERROR, "unsupported indexqual type: %d",
5886 (int) nodeTag(clause));
5887
5888 /* check for equality operator */
5889 if (OidIsValid(clause_op))
5890 {
5891 op_strategy = get_op_opfamily_strategy(clause_op,
5892 index->opfamily[indexcol]);
5893 Assert(op_strategy != 0); /* not a member of opfamily?? */
5894 if (op_strategy == BTEqualStrategyNumber)
5895 eqQualHere = true;
5896 }
5897
5898 indexBoundQuals = lappend(indexBoundQuals, rinfo);
5899 }
5900 }
5901
5902 /*
5903 * If index is unique and we found an '=' clause for each column, we can
5904 * just assume numIndexTuples = 1 and skip the expensive
5905 * clauselist_selectivity calculations. However, a ScalarArrayOp or
5906 * NullTest invalidates that theory, even though it sets eqQualHere.
5907 */
5908 if (index->unique &&
5909 indexcol == index->nkeycolumns - 1 &&
5910 eqQualHere &&
5911 !found_saop &&
5912 !found_is_null_op)
5913 numIndexTuples = 1.0;
5914 else
5915 {
5916 List *selectivityQuals;
5917 Selectivity btreeSelectivity;
5918
5919 /*
5920 * If the index is partial, AND the index predicate with the
5921 * index-bound quals to produce a more accurate idea of the number of
5922 * rows covered by the bound conditions.
5923 */
5924 selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals);
5925
5926 btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
5927 index->rel->relid,
5928 JOIN_INNER,
5929 NULL);
5930 numIndexTuples = btreeSelectivity * index->rel->tuples;
5931
5932 /*
5933 * As in genericcostestimate(), we have to adjust for any
5934 * ScalarArrayOpExpr quals included in indexBoundQuals, and then round
5935 * to integer.
5936 */
5937 numIndexTuples = rint(numIndexTuples / num_sa_scans);
5938 }
5939
5940 /*
5941 * Now do generic index cost estimation.
5942 */
5943 MemSet(&costs, 0, sizeof(costs));
5944 costs.numIndexTuples = numIndexTuples;
5945
5946 genericcostestimate(root, path, loop_count, &costs);
5947
5948 /*
5949 * Add a CPU-cost component to represent the costs of initial btree
5950 * descent. We don't charge any I/O cost for touching upper btree levels,
5951 * since they tend to stay in cache, but we still have to do about log2(N)
5952 * comparisons to descend a btree of N leaf tuples. We charge one
5953 * cpu_operator_cost per comparison.
5954 *
5955 * If there are ScalarArrayOpExprs, charge this once per SA scan. The
5956 * ones after the first one are not startup cost so far as the overall
5957 * plan is concerned, so add them only to "total" cost.
5958 */
5959 if (index->tuples > 1) /* avoid computing log(0) */
5960 {
5961 descentCost = ceil(log(index->tuples) / log(2.0)) * cpu_operator_cost;
5962 costs.indexStartupCost += descentCost;
5963 costs.indexTotalCost += costs.num_sa_scans * descentCost;
5964 }
5965
5966 /*
5967 * Even though we're not charging I/O cost for touching upper btree pages,
5968 * it's still reasonable to charge some CPU cost per page descended
5969 * through. Moreover, if we had no such charge at all, bloated indexes
5970 * would appear to have the same search cost as unbloated ones, at least
5971 * in cases where only a single leaf page is expected to be visited. This
5972 * cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
5973 * touched. The number of such pages is btree tree height plus one (ie,
5974 * we charge for the leaf page too). As above, charge once per SA scan.
5975 */
5976 descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
5977 costs.indexStartupCost += descentCost;
5978 costs.indexTotalCost += costs.num_sa_scans * descentCost;
5979
5980 /*
5981 * If we can get an estimate of the first column's ordering correlation C
5982 * from pg_statistic, estimate the index correlation as C for a
5983 * single-column index, or C * 0.75 for multiple columns. (The idea here
5984 * is that multiple columns dilute the importance of the first column's
5985 * ordering, but don't negate it entirely. Before 8.0 we divided the
5986 * correlation by the number of columns, but that seems too strong.)
5987 */
5988 MemSet(&vardata, 0, sizeof(vardata));
5989
5990 if (index->indexkeys[0] != 0)
5991 {
5992 /* Simple variable --- look to stats for the underlying table */
5993 RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
5994
5995 Assert(rte->rtekind == RTE_RELATION);
5996 relid = rte->relid;
5997 Assert(relid != InvalidOid);
5998 colnum = index->indexkeys[0];
5999
6000 if (get_relation_stats_hook &&
6001 (*get_relation_stats_hook) (root, rte, colnum, &vardata))
6002 {
6003 /*
6004 * The hook took control of acquiring a stats tuple. If it did
6005 * supply a tuple, it'd better have supplied a freefunc.
6006 */
6007 if (HeapTupleIsValid(vardata.statsTuple) &&
6008 !vardata.freefunc)
6009 elog(ERROR, "no function provided to release variable stats with");
6010 }
6011 else
6012 {
6013 vardata.statsTuple = SearchSysCache3(STATRELATTINH,
6014 ObjectIdGetDatum(relid),
6015 Int16GetDatum(colnum),
6016 BoolGetDatum(rte->inh));
6017 vardata.freefunc = ReleaseSysCache;
6018 }
6019 }
6020 else
6021 {
6022 /* Expression --- maybe there are stats for the index itself */
6023 relid = index->indexoid;
6024 colnum = 1;
6025
6026 if (get_index_stats_hook &&
6027 (*get_index_stats_hook) (root, relid, colnum, &vardata))
6028 {
6029 /*
6030 * The hook took control of acquiring a stats tuple. If it did
6031 * supply a tuple, it'd better have supplied a freefunc.
6032 */
6033 if (HeapTupleIsValid(vardata.statsTuple) &&
6034 !vardata.freefunc)
6035 elog(ERROR, "no function provided to release variable stats with");
6036 }
6037 else
6038 {
6039 vardata.statsTuple = SearchSysCache3(STATRELATTINH,
6040 ObjectIdGetDatum(relid),
6041 Int16GetDatum(colnum),
6042 BoolGetDatum(false));
6043 vardata.freefunc = ReleaseSysCache;
6044 }
6045 }
6046
6047 if (HeapTupleIsValid(vardata.statsTuple))
6048 {
6049 Oid sortop;
6050 AttStatsSlot sslot;
6051
6052 sortop = get_opfamily_member(index->opfamily[0],
6053 index->opcintype[0],
6054 index->opcintype[0],
6055 BTLessStrategyNumber);
6056 if (OidIsValid(sortop) &&
6057 get_attstatsslot(&sslot, vardata.statsTuple,
6058 STATISTIC_KIND_CORRELATION, sortop,
6059 ATTSTATSSLOT_NUMBERS))
6060 {
6061 double varCorrelation;
6062
6063 Assert(sslot.nnumbers == 1);
6064 varCorrelation = sslot.numbers[0];
6065
6066 if (index->reverse_sort[0])
6067 varCorrelation = -varCorrelation;
6068
6069 if (index->nkeycolumns > 1)
6070 costs.indexCorrelation = varCorrelation * 0.75;
6071 else
6072 costs.indexCorrelation = varCorrelation;
6073
6074 free_attstatsslot(&sslot);
6075 }
6076 }
6077
6078 ReleaseVariableStats(vardata);
6079
6080 *indexStartupCost = costs.indexStartupCost;
6081 *indexTotalCost = costs.indexTotalCost;
6082 *indexSelectivity = costs.indexSelectivity;
6083 *indexCorrelation = costs.indexCorrelation;
6084 *indexPages = costs.numIndexPages;
6085}
6086
6087void
6088hashcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6089 Cost *indexStartupCost, Cost *indexTotalCost,
6090 Selectivity *indexSelectivity, double *indexCorrelation,
6091 double *indexPages)
6092{
6093 GenericCosts costs;
6094
6095 MemSet(&costs, 0, sizeof(costs));
6096
6097 genericcostestimate(root, path, loop_count, &costs);
6098
6099 /*
6100 * A hash index has no descent costs as such, since the index AM can go
6101 * directly to the target bucket after computing the hash value. There
6102 * are a couple of other hash-specific costs that we could conceivably add
6103 * here, though:
6104 *
6105 * Ideally we'd charge spc_random_page_cost for each page in the target
6106 * bucket, not just the numIndexPages pages that genericcostestimate
6107 * thought we'd visit. However in most cases we don't know which bucket
6108 * that will be. There's no point in considering the average bucket size
6109 * because the hash AM makes sure that's always one page.
6110 *
6111 * Likewise, we could consider charging some CPU for each index tuple in
6112 * the bucket, if we knew how many there were. But the per-tuple cost is
6113 * just a hash value comparison, not a general datatype-dependent
6114 * comparison, so any such charge ought to be quite a bit less than
6115 * cpu_operator_cost; which makes it probably not worth worrying about.
6116 *
6117 * A bigger issue is that chance hash-value collisions will result in
6118 * wasted probes into the heap. We don't currently attempt to model this
6119 * cost on the grounds that it's rare, but maybe it's not rare enough.
6120 * (Any fix for this ought to consider the generic lossy-operator problem,
6121 * though; it's not entirely hash-specific.)
6122 */
6123
6124 *indexStartupCost = costs.indexStartupCost;
6125 *indexTotalCost = costs.indexTotalCost;
6126 *indexSelectivity = costs.indexSelectivity;
6127 *indexCorrelation = costs.indexCorrelation;
6128 *indexPages = costs.numIndexPages;
6129}
6130
6131void
6132gistcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6133 Cost *indexStartupCost, Cost *indexTotalCost,
6134 Selectivity *indexSelectivity, double *indexCorrelation,
6135 double *indexPages)
6136{
6137 IndexOptInfo *index = path->indexinfo;
6138 GenericCosts costs;
6139 Cost descentCost;
6140
6141 MemSet(&costs, 0, sizeof(costs));
6142
6143 genericcostestimate(root, path, loop_count, &costs);
6144
6145 /*
6146 * We model index descent costs similarly to those for btree, but to do
6147 * that we first need an idea of the tree height. We somewhat arbitrarily
6148 * assume that the fanout is 100, meaning the tree height is at most
6149 * log100(index->pages).
6150 *
6151 * Although this computation isn't really expensive enough to require
6152 * caching, we might as well use index->tree_height to cache it.
6153 */
6154 if (index->tree_height < 0) /* unknown? */
6155 {
6156 if (index->pages > 1) /* avoid computing log(0) */
6157 index->tree_height = (int) (log(index->pages) / log(100.0));
6158 else
6159 index->tree_height = 0;
6160 }
6161
6162 /*
6163 * Add a CPU-cost component to represent the costs of initial descent. We
6164 * just use log(N) here not log2(N) since the branching factor isn't
6165 * necessarily two anyway. As for btree, charge once per SA scan.
6166 */
6167 if (index->tuples > 1) /* avoid computing log(0) */
6168 {
6169 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
6170 costs.indexStartupCost += descentCost;
6171 costs.indexTotalCost += costs.num_sa_scans * descentCost;
6172 }
6173
6174 /*
6175 * Likewise add a per-page charge, calculated the same as for btrees.
6176 */
6177 descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
6178 costs.indexStartupCost += descentCost;
6179 costs.indexTotalCost += costs.num_sa_scans * descentCost;
6180
6181 *indexStartupCost = costs.indexStartupCost;
6182 *indexTotalCost = costs.indexTotalCost;
6183 *indexSelectivity = costs.indexSelectivity;
6184 *indexCorrelation = costs.indexCorrelation;
6185 *indexPages = costs.numIndexPages;
6186}
6187
6188void
6189spgcostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6190 Cost *indexStartupCost, Cost *indexTotalCost,
6191 Selectivity *indexSelectivity, double *indexCorrelation,
6192 double *indexPages)
6193{
6194 IndexOptInfo *index = path->indexinfo;
6195 GenericCosts costs;
6196 Cost descentCost;
6197
6198 MemSet(&costs, 0, sizeof(costs));
6199
6200 genericcostestimate(root, path, loop_count, &costs);
6201
6202 /*
6203 * We model index descent costs similarly to those for btree, but to do
6204 * that we first need an idea of the tree height. We somewhat arbitrarily
6205 * assume that the fanout is 100, meaning the tree height is at most
6206 * log100(index->pages).
6207 *
6208 * Although this computation isn't really expensive enough to require
6209 * caching, we might as well use index->tree_height to cache it.
6210 */
6211 if (index->tree_height < 0) /* unknown? */
6212 {
6213 if (index->pages > 1) /* avoid computing log(0) */
6214 index->tree_height = (int) (log(index->pages) / log(100.0));
6215 else
6216 index->tree_height = 0;
6217 }
6218
6219 /*
6220 * Add a CPU-cost component to represent the costs of initial descent. We
6221 * just use log(N) here not log2(N) since the branching factor isn't
6222 * necessarily two anyway. As for btree, charge once per SA scan.
6223 */
6224 if (index->tuples > 1) /* avoid computing log(0) */
6225 {
6226 descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
6227 costs.indexStartupCost += descentCost;
6228 costs.indexTotalCost += costs.num_sa_scans * descentCost;
6229 }
6230
6231 /*
6232 * Likewise add a per-page charge, calculated the same as for btrees.
6233 */
6234 descentCost = (index->tree_height + 1) * 50.0 * cpu_operator_cost;
6235 costs.indexStartupCost += descentCost;
6236 costs.indexTotalCost += costs.num_sa_scans * descentCost;
6237
6238 *indexStartupCost = costs.indexStartupCost;
6239 *indexTotalCost = costs.indexTotalCost;
6240 *indexSelectivity = costs.indexSelectivity;
6241 *indexCorrelation = costs.indexCorrelation;
6242 *indexPages = costs.numIndexPages;
6243}
6244
6245
6246/*
6247 * Support routines for gincostestimate
6248 */
6249
6250typedef struct
6251{
6252 bool haveFullScan;
6253 double partialEntries;
6254 double exactEntries;
6255 double searchEntries;
6256 double arrayScans;
6257} GinQualCounts;
6258
6259/*
6260 * Estimate the number of index terms that need to be searched for while
6261 * testing the given GIN query, and increment the counts in *counts
6262 * appropriately. If the query is unsatisfiable, return false.
6263 */
6264static bool
6265gincost_pattern(IndexOptInfo *index, int indexcol,
6266 Oid clause_op, Datum query,
6267 GinQualCounts *counts)
6268{
6269 Oid extractProcOid;
6270 Oid collation;
6271 int strategy_op;
6272 Oid lefttype,
6273 righttype;
6274 int32 nentries = 0;
6275 bool *partial_matches = NULL;
6276 Pointer *extra_data = NULL;
6277 bool *nullFlags = NULL;
6278 int32 searchMode = GIN_SEARCH_MODE_DEFAULT;
6279 int32 i;
6280
6281 Assert(indexcol < index->nkeycolumns);
6282
6283 /*
6284 * Get the operator's strategy number and declared input data types within
6285 * the index opfamily. (We don't need the latter, but we use
6286 * get_op_opfamily_properties because it will throw error if it fails to
6287 * find a matching pg_amop entry.)
6288 */
6289 get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
6290 &strategy_op, &lefttype, &righttype);
6291
6292 /*
6293 * GIN always uses the "default" support functions, which are those with
6294 * lefttype == righttype == the opclass' opcintype (see
6295 * IndexSupportInitialize in relcache.c).
6296 */
6297 extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
6298 index->opcintype[indexcol],
6299 index->opcintype[indexcol],
6300 GIN_EXTRACTQUERY_PROC);
6301
6302 if (!OidIsValid(extractProcOid))
6303 {
6304 /* should not happen; throw same error as index_getprocinfo */
6305 elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
6306 GIN_EXTRACTQUERY_PROC, indexcol + 1,
6307 get_rel_name(index->indexoid));
6308 }
6309
6310 /*
6311 * Choose collation to pass to extractProc (should match initGinState).
6312 */
6313 if (OidIsValid(index->indexcollations[indexcol]))
6314 collation = index->indexcollations[indexcol];
6315 else
6316 collation = DEFAULT_COLLATION_OID;
6317
6318 OidFunctionCall7Coll(extractProcOid,
6319 collation,
6320 query,
6321 PointerGetDatum(&nentries),
6322 UInt16GetDatum(strategy_op),
6323 PointerGetDatum(&partial_matches),
6324 PointerGetDatum(&extra_data),
6325 PointerGetDatum(&nullFlags),
6326 PointerGetDatum(&searchMode));
6327
6328 if (nentries <= 0 && searchMode == GIN_SEARCH_MODE_DEFAULT)
6329 {
6330 /* No match is possible */
6331 return false;
6332 }
6333
6334 for (i = 0; i < nentries; i++)
6335 {
6336 /*
6337 * For partial match we haven't any information to estimate number of
6338 * matched entries in index, so, we just estimate it as 100
6339 */
6340 if (partial_matches && partial_matches[i])
6341 counts->partialEntries += 100;
6342 else
6343 counts->exactEntries++;
6344
6345 counts->searchEntries++;
6346 }
6347
6348 if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
6349 {
6350 /* Treat "include empty" like an exact-match item */
6351 counts->exactEntries++;
6352 counts->searchEntries++;
6353 }
6354 else if (searchMode != GIN_SEARCH_MODE_DEFAULT)
6355 {
6356 /* It's GIN_SEARCH_MODE_ALL */
6357 counts->haveFullScan = true;
6358 }
6359
6360 return true;
6361}
6362
6363/*
6364 * Estimate the number of index terms that need to be searched for while
6365 * testing the given GIN index clause, and increment the counts in *counts
6366 * appropriately. If the query is unsatisfiable, return false.
6367 */
6368static bool
6369gincost_opexpr(PlannerInfo *root,
6370 IndexOptInfo *index,
6371 int indexcol,
6372 OpExpr *clause,
6373 GinQualCounts *counts)
6374{
6375 Oid clause_op = clause->opno;
6376 Node *operand = (Node *) lsecond(clause->args);
6377
6378 /* aggressively reduce to a constant, and look through relabeling */
6379 operand = estimate_expression_value(root, operand);
6380
6381 if (IsA(operand, RelabelType))
6382 operand = (Node *) ((RelabelType *) operand)->arg;
6383
6384 /*
6385 * It's impossible to call extractQuery method for unknown operand. So
6386 * unless operand is a Const we can't do much; just assume there will be
6387 * one ordinary search entry from the operand at runtime.
6388 */
6389 if (!IsA(operand, Const))
6390 {
6391 counts->exactEntries++;
6392 counts->searchEntries++;
6393 return true;
6394 }
6395
6396 /* If Const is null, there can be no matches */
6397 if (((Const *) operand)->constisnull)
6398 return false;
6399
6400 /* Otherwise, apply extractQuery and get the actual term counts */
6401 return gincost_pattern(index, indexcol, clause_op,
6402 ((Const *) operand)->constvalue,
6403 counts);
6404}
6405
6406/*
6407 * Estimate the number of index terms that need to be searched for while
6408 * testing the given GIN index clause, and increment the counts in *counts
6409 * appropriately. If the query is unsatisfiable, return false.
6410 *
6411 * A ScalarArrayOpExpr will give rise to N separate indexscans at runtime,
6412 * each of which involves one value from the RHS array, plus all the
6413 * non-array quals (if any). To model this, we average the counts across
6414 * the RHS elements, and add the averages to the counts in *counts (which
6415 * correspond to per-indexscan costs). We also multiply counts->arrayScans
6416 * by N, causing gincostestimate to scale up its estimates accordingly.
6417 */
6418static bool
6419gincost_scalararrayopexpr(PlannerInfo *root,
6420 IndexOptInfo *index,
6421 int indexcol,
6422 ScalarArrayOpExpr *clause,
6423 double numIndexEntries,
6424 GinQualCounts *counts)
6425{
6426 Oid clause_op = clause->opno;
6427 Node *rightop = (Node *) lsecond(clause->args);
6428 ArrayType *arrayval;
6429 int16 elmlen;
6430 bool elmbyval;
6431 char elmalign;
6432 int numElems;
6433 Datum *elemValues;
6434 bool *elemNulls;
6435 GinQualCounts arraycounts;
6436 int numPossible = 0;
6437 int i;
6438
6439 Assert(clause->useOr);
6440
6441 /* aggressively reduce to a constant, and look through relabeling */
6442 rightop = estimate_expression_value(root, rightop);
6443
6444 if (IsA(rightop, RelabelType))
6445 rightop = (Node *) ((RelabelType *) rightop)->arg;
6446
6447 /*
6448 * It's impossible to call extractQuery method for unknown operand. So
6449 * unless operand is a Const we can't do much; just assume there will be
6450 * one ordinary search entry from each array entry at runtime, and fall
6451 * back on a probably-bad estimate of the number of array entries.
6452 */
6453 if (!IsA(rightop, Const))
6454 {
6455 counts->exactEntries++;
6456 counts->searchEntries++;
6457 counts->arrayScans *= estimate_array_length(rightop);
6458 return true;
6459 }
6460
6461 /* If Const is null, there can be no matches */
6462 if (((Const *) rightop)->constisnull)
6463 return false;
6464
6465 /* Otherwise, extract the array elements and iterate over them */
6466 arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue);
6467 get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
6468 &elmlen, &elmbyval, &elmalign);
6469 deconstruct_array(arrayval,
6470 ARR_ELEMTYPE(arrayval),
6471 elmlen, elmbyval, elmalign,
6472 &elemValues, &elemNulls, &numElems);
6473
6474 memset(&arraycounts, 0, sizeof(arraycounts));
6475
6476 for (i = 0; i < numElems; i++)
6477 {
6478 GinQualCounts elemcounts;
6479
6480 /* NULL can't match anything, so ignore, as the executor will */
6481 if (elemNulls[i])
6482 continue;
6483
6484 /* Otherwise, apply extractQuery and get the actual term counts */
6485 memset(&elemcounts, 0, sizeof(elemcounts));
6486
6487 if (gincost_pattern(index, indexcol, clause_op, elemValues[i],
6488 &elemcounts))
6489 {
6490 /* We ignore array elements that are unsatisfiable patterns */
6491 numPossible++;
6492
6493 if (elemcounts.haveFullScan)
6494 {
6495 /*
6496 * Full index scan will be required. We treat this as if
6497 * every key in the index had been listed in the query; is
6498 * that reasonable?
6499 */
6500 elemcounts.partialEntries = 0;
6501 elemcounts.exactEntries = numIndexEntries;
6502 elemcounts.searchEntries = numIndexEntries;
6503 }
6504 arraycounts.partialEntries += elemcounts.partialEntries;
6505 arraycounts.exactEntries += elemcounts.exactEntries;
6506 arraycounts.searchEntries += elemcounts.searchEntries;
6507 }
6508 }
6509
6510 if (numPossible == 0)
6511 {
6512 /* No satisfiable patterns in the array */
6513 return false;
6514 }
6515
6516 /*
6517 * Now add the averages to the global counts. This will give us an
6518 * estimate of the average number of terms searched for in each indexscan,
6519 * including contributions from both array and non-array quals.
6520 */
6521 counts->partialEntries += arraycounts.partialEntries / numPossible;
6522 counts->exactEntries += arraycounts.exactEntries / numPossible;
6523 counts->searchEntries += arraycounts.searchEntries / numPossible;
6524
6525 counts->arrayScans *= numPossible;
6526
6527 return true;
6528}
6529
6530/*
6531 * GIN has search behavior completely different from other index types
6532 */
6533void
6534gincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6535 Cost *indexStartupCost, Cost *indexTotalCost,
6536 Selectivity *indexSelectivity, double *indexCorrelation,
6537 double *indexPages)
6538{
6539 IndexOptInfo *index = path->indexinfo;
6540 List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
6541 List *selectivityQuals;
6542 double numPages = index->pages,
6543 numTuples = index->tuples;
6544 double numEntryPages,
6545 numDataPages,
6546 numPendingPages,
6547 numEntries;
6548 GinQualCounts counts;
6549 bool matchPossible;
6550 double partialScale;
6551 double entryPagesFetched,
6552 dataPagesFetched,
6553 dataPagesFetchedBySel;
6554 double qual_op_cost,
6555 qual_arg_cost,
6556 spc_random_page_cost,
6557 outer_scans;
6558 Relation indexRel;
6559 GinStatsData ginStats;
6560 ListCell *lc;
6561
6562 /*
6563 * Obtain statistical information from the meta page, if possible. Else
6564 * set ginStats to zeroes, and we'll cope below.
6565 */
6566 if (!index->hypothetical)
6567 {
6568 /* Lock should have already been obtained in plancat.c */
6569 indexRel = index_open(index->indexoid, NoLock);
6570 ginGetStats(indexRel, &ginStats);
6571 index_close(indexRel, NoLock);
6572 }
6573 else
6574 {
6575 memset(&ginStats, 0, sizeof(ginStats));
6576 }
6577
6578 /*
6579 * Assuming we got valid (nonzero) stats at all, nPendingPages can be
6580 * trusted, but the other fields are data as of the last VACUUM. We can
6581 * scale them up to account for growth since then, but that method only
6582 * goes so far; in the worst case, the stats might be for a completely
6583 * empty index, and scaling them will produce pretty bogus numbers.
6584 * Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
6585 * it's grown more than that, fall back to estimating things only from the
6586 * assumed-accurate index size. But we'll trust nPendingPages in any case
6587 * so long as it's not clearly insane, ie, more than the index size.
6588 */
6589 if (ginStats.nPendingPages < numPages)
6590 numPendingPages = ginStats.nPendingPages;
6591 else
6592 numPendingPages = 0;
6593
6594 if (numPages > 0 && ginStats.nTotalPages <= numPages &&
6595 ginStats.nTotalPages > numPages / 4 &&
6596 ginStats.nEntryPages > 0 && ginStats.nEntries > 0)
6597 {
6598 /*
6599 * OK, the stats seem close enough to sane to be trusted. But we
6600 * still need to scale them by the ratio numPages / nTotalPages to
6601 * account for growth since the last VACUUM.
6602 */
6603 double scale = numPages / ginStats.nTotalPages;
6604
6605 numEntryPages = ceil(ginStats.nEntryPages * scale);
6606 numDataPages = ceil(ginStats.nDataPages * scale);
6607 numEntries = ceil(ginStats.nEntries * scale);
6608 /* ensure we didn't round up too much */
6609 numEntryPages = Min(numEntryPages, numPages - numPendingPages);
6610 numDataPages = Min(numDataPages,
6611 numPages - numPendingPages - numEntryPages);
6612 }
6613 else
6614 {
6615 /*
6616 * We might get here because it's a hypothetical index, or an index
6617 * created pre-9.1 and never vacuumed since upgrading (in which case
6618 * its stats would read as zeroes), or just because it's grown too
6619 * much since the last VACUUM for us to put our faith in scaling.
6620 *
6621 * Invent some plausible internal statistics based on the index page
6622 * count (and clamp that to at least 10 pages, just in case). We
6623 * estimate that 90% of the index is entry pages, and the rest is data
6624 * pages. Estimate 100 entries per entry page; this is rather bogus
6625 * since it'll depend on the size of the keys, but it's more robust
6626 * than trying to predict the number of entries per heap tuple.
6627 */
6628 numPages = Max(numPages, 10);
6629 numEntryPages = floor((numPages - numPendingPages) * 0.90);
6630 numDataPages = numPages - numPendingPages - numEntryPages;
6631 numEntries = floor(numEntryPages * 100);
6632 }
6633
6634 /* In an empty index, numEntries could be zero. Avoid divide-by-zero */
6635 if (numEntries < 1)
6636 numEntries = 1;
6637
6638 /*
6639 * If the index is partial, AND the index predicate with the index-bound
6640 * quals to produce a more accurate idea of the number of rows covered by
6641 * the bound conditions.
6642 */
6643 selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
6644
6645 /* Estimate the fraction of main-table tuples that will be visited */
6646 *indexSelectivity = clauselist_selectivity(root, selectivityQuals,
6647 index->rel->relid,
6648 JOIN_INNER,
6649 NULL);
6650
6651 /* fetch estimated page cost for tablespace containing index */
6652 get_tablespace_page_costs(index->reltablespace,
6653 &spc_random_page_cost,
6654 NULL);
6655
6656 /*
6657 * Generic assumption about index correlation: there isn't any.
6658 */
6659 *indexCorrelation = 0.0;
6660
6661 /*
6662 * Examine quals to estimate number of search entries & partial matches
6663 */
6664 memset(&counts, 0, sizeof(counts));
6665 counts.arrayScans = 1;
6666 matchPossible = true;
6667
6668 foreach(lc, path->indexclauses)
6669 {
6670 IndexClause *iclause = lfirst_node(IndexClause, lc);
6671 ListCell *lc2;
6672
6673 foreach(lc2, iclause->indexquals)
6674 {
6675 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
6676 Expr *clause = rinfo->clause;
6677
6678 if (IsA(clause, OpExpr))
6679 {
6680 matchPossible = gincost_opexpr(root,
6681 index,
6682 iclause->indexcol,
6683 (OpExpr *) clause,
6684 &counts);
6685 if (!matchPossible)
6686 break;
6687 }
6688 else if (IsA(clause, ScalarArrayOpExpr))
6689 {
6690 matchPossible = gincost_scalararrayopexpr(root,
6691 index,
6692 iclause->indexcol,
6693 (ScalarArrayOpExpr *) clause,
6694 numEntries,
6695 &counts);
6696 if (!matchPossible)
6697 break;
6698 }
6699 else
6700 {
6701 /* shouldn't be anything else for a GIN index */
6702 elog(ERROR, "unsupported GIN indexqual type: %d",
6703 (int) nodeTag(clause));
6704 }
6705 }
6706 }
6707
6708 /* Fall out if there were any provably-unsatisfiable quals */
6709 if (!matchPossible)
6710 {
6711 *indexStartupCost = 0;
6712 *indexTotalCost = 0;
6713 *indexSelectivity = 0;
6714 return;
6715 }
6716
6717 if (counts.haveFullScan || indexQuals == NIL)
6718 {
6719 /*
6720 * Full index scan will be required. We treat this as if every key in
6721 * the index had been listed in the query; is that reasonable?
6722 */
6723 counts.partialEntries = 0;
6724 counts.exactEntries = numEntries;
6725 counts.searchEntries = numEntries;
6726 }
6727
6728 /* Will we have more than one iteration of a nestloop scan? */
6729 outer_scans = loop_count;
6730
6731 /*
6732 * Compute cost to begin scan, first of all, pay attention to pending
6733 * list.
6734 */
6735 entryPagesFetched = numPendingPages;
6736
6737 /*
6738 * Estimate number of entry pages read. We need to do
6739 * counts.searchEntries searches. Use a power function as it should be,
6740 * but tuples on leaf pages usually is much greater. Here we include all
6741 * searches in entry tree, including search of first entry in partial
6742 * match algorithm
6743 */
6744 entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, 0.15)));
6745
6746 /*
6747 * Add an estimate of entry pages read by partial match algorithm. It's a
6748 * scan over leaf pages in entry tree. We haven't any useful stats here,
6749 * so estimate it as proportion. Because counts.partialEntries is really
6750 * pretty bogus (see code above), it's possible that it is more than
6751 * numEntries; clamp the proportion to ensure sanity.
6752 */
6753 partialScale = counts.partialEntries / numEntries;
6754 partialScale = Min(partialScale, 1.0);
6755
6756 entryPagesFetched += ceil(numEntryPages * partialScale);
6757
6758 /*
6759 * Partial match algorithm reads all data pages before doing actual scan,
6760 * so it's a startup cost. Again, we haven't any useful stats here, so
6761 * estimate it as proportion.
6762 */
6763 dataPagesFetched = ceil(numDataPages * partialScale);
6764
6765 /*
6766 * Calculate cache effects if more than one scan due to nestloops or array
6767 * quals. The result is pro-rated per nestloop scan, but the array qual
6768 * factor shouldn't be pro-rated (compare genericcostestimate).
6769 */
6770 if (outer_scans > 1 || counts.arrayScans > 1)
6771 {
6772 entryPagesFetched *= outer_scans * counts.arrayScans;
6773 entryPagesFetched = index_pages_fetched(entryPagesFetched,
6774 (BlockNumber) numEntryPages,
6775 numEntryPages, root);
6776 entryPagesFetched /= outer_scans;
6777 dataPagesFetched *= outer_scans * counts.arrayScans;
6778 dataPagesFetched = index_pages_fetched(dataPagesFetched,
6779 (BlockNumber) numDataPages,
6780 numDataPages, root);
6781 dataPagesFetched /= outer_scans;
6782 }
6783
6784 /*
6785 * Here we use random page cost because logically-close pages could be far
6786 * apart on disk.
6787 */
6788 *indexStartupCost = (entryPagesFetched + dataPagesFetched) * spc_random_page_cost;
6789
6790 /*
6791 * Now compute the number of data pages fetched during the scan.
6792 *
6793 * We assume every entry to have the same number of items, and that there
6794 * is no overlap between them. (XXX: tsvector and array opclasses collect
6795 * statistics on the frequency of individual keys; it would be nice to use
6796 * those here.)
6797 */
6798 dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
6799
6800 /*
6801 * If there is a lot of overlap among the entries, in particular if one of
6802 * the entries is very frequent, the above calculation can grossly
6803 * under-estimate. As a simple cross-check, calculate a lower bound based
6804 * on the overall selectivity of the quals. At a minimum, we must read
6805 * one item pointer for each matching entry.
6806 *
6807 * The width of each item pointer varies, based on the level of
6808 * compression. We don't have statistics on that, but an average of
6809 * around 3 bytes per item is fairly typical.
6810 */
6811 dataPagesFetchedBySel = ceil(*indexSelectivity *
6812 (numTuples / (BLCKSZ / 3)));
6813 if (dataPagesFetchedBySel > dataPagesFetched)
6814 dataPagesFetched = dataPagesFetchedBySel;
6815
6816 /* Account for cache effects, the same as above */
6817 if (outer_scans > 1 || counts.arrayScans > 1)
6818 {
6819 dataPagesFetched *= outer_scans * counts.arrayScans;
6820 dataPagesFetched = index_pages_fetched(dataPagesFetched,
6821 (BlockNumber) numDataPages,
6822 numDataPages, root);
6823 dataPagesFetched /= outer_scans;
6824 }
6825
6826 /* And apply random_page_cost as the cost per page */
6827 *indexTotalCost = *indexStartupCost +
6828 dataPagesFetched * spc_random_page_cost;
6829
6830 /*
6831 * Add on index qual eval costs, much as in genericcostestimate. But we
6832 * can disregard indexorderbys, since GIN doesn't support those.
6833 */
6834 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
6835 qual_op_cost = cpu_operator_cost * list_length(indexQuals);
6836
6837 *indexStartupCost += qual_arg_cost;
6838 *indexTotalCost += qual_arg_cost;
6839 *indexTotalCost += (numTuples * *indexSelectivity) * (cpu_index_tuple_cost + qual_op_cost);
6840 *indexPages = dataPagesFetched;
6841}
6842
6843/*
6844 * BRIN has search behavior completely different from other index types
6845 */
6846void
6847brincostestimate(PlannerInfo *root, IndexPath *path, double loop_count,
6848 Cost *indexStartupCost, Cost *indexTotalCost,
6849 Selectivity *indexSelectivity, double *indexCorrelation,
6850 double *indexPages)
6851{
6852 IndexOptInfo *index = path->indexinfo;
6853 List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
6854 double numPages = index->pages;
6855 RelOptInfo *baserel = index->rel;
6856 RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
6857 Cost spc_seq_page_cost;
6858 Cost spc_random_page_cost;
6859 double qual_arg_cost;
6860 double qualSelectivity;
6861 BrinStatsData statsData;
6862 double indexRanges;
6863 double minimalRanges;
6864 double estimatedRanges;
6865 double selec;
6866 Relation indexRel;
6867 ListCell *l;
6868 VariableStatData vardata;
6869
6870 Assert(rte->rtekind == RTE_RELATION);
6871
6872 /* fetch estimated page cost for the tablespace containing the index */
6873 get_tablespace_page_costs(index->reltablespace,
6874 &spc_random_page_cost,
6875 &spc_seq_page_cost);
6876
6877 /*
6878 * Obtain some data from the index itself. A lock should have already
6879 * been obtained on the index in plancat.c.
6880 */
6881 indexRel = index_open(index->indexoid, NoLock);
6882 brinGetStats(indexRel, &statsData);
6883 index_close(indexRel, NoLock);
6884
6885 /*
6886 * Compute index correlation
6887 *
6888 * Because we can use all index quals equally when scanning, we can use
6889 * the largest correlation (in absolute value) among columns used by the
6890 * query. Start at zero, the worst possible case. If we cannot find any
6891 * correlation statistics, we will keep it as 0.
6892 */
6893 *indexCorrelation = 0;
6894
6895 foreach(l, path->indexclauses)
6896 {
6897 IndexClause *iclause = lfirst_node(IndexClause, l);
6898 AttrNumber attnum = index->indexkeys[iclause->indexcol];
6899
6900 /* attempt to lookup stats in relation for this index column */
6901 if (attnum != 0)
6902 {
6903 /* Simple variable -- look to stats for the underlying table */
6904 if (get_relation_stats_hook &&
6905 (*get_relation_stats_hook) (root, rte, attnum, &vardata))
6906 {
6907 /*
6908 * The hook took control of acquiring a stats tuple. If it
6909 * did supply a tuple, it'd better have supplied a freefunc.
6910 */
6911 if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
6912 elog(ERROR,
6913 "no function provided to release variable stats with");
6914 }
6915 else
6916 {
6917 vardata.statsTuple =
6918 SearchSysCache3(STATRELATTINH,
6919 ObjectIdGetDatum(rte->relid),
6920 Int16GetDatum(attnum),
6921 BoolGetDatum(false));
6922 vardata.freefunc = ReleaseSysCache;
6923 }
6924 }
6925 else
6926 {
6927 /*
6928 * Looks like we've found an expression column in the index. Let's
6929 * see if there's any stats for it.
6930 */
6931
6932 /* get the attnum from the 0-based index. */
6933 attnum = iclause->indexcol + 1;
6934
6935 if (get_index_stats_hook &&
6936 (*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
6937 {
6938 /*
6939 * The hook took control of acquiring a stats tuple. If it
6940 * did supply a tuple, it'd better have supplied a freefunc.
6941 */
6942 if (HeapTupleIsValid(vardata.statsTuple) &&
6943 !vardata.freefunc)
6944 elog(ERROR, "no function provided to release variable stats with");
6945 }
6946 else
6947 {
6948 vardata.statsTuple = SearchSysCache3(STATRELATTINH,
6949 ObjectIdGetDatum(index->indexoid),
6950 Int16GetDatum(attnum),
6951 BoolGetDatum(false));
6952 vardata.freefunc = ReleaseSysCache;
6953 }
6954 }
6955
6956 if (HeapTupleIsValid(vardata.statsTuple))
6957 {
6958 AttStatsSlot sslot;
6959
6960 if (get_attstatsslot(&sslot, vardata.statsTuple,
6961 STATISTIC_KIND_CORRELATION, InvalidOid,
6962 ATTSTATSSLOT_NUMBERS))
6963 {
6964 double varCorrelation = 0.0;
6965
6966 if (sslot.nnumbers > 0)
6967 varCorrelation = Abs(sslot.numbers[0]);
6968
6969 if (varCorrelation > *indexCorrelation)
6970 *indexCorrelation = varCorrelation;
6971
6972 free_attstatsslot(&sslot);
6973 }
6974 }
6975
6976 ReleaseVariableStats(vardata);
6977 }
6978
6979 qualSelectivity = clauselist_selectivity(root, indexQuals,
6980 baserel->relid,
6981 JOIN_INNER, NULL);
6982
6983 /* work out the actual number of ranges in the index */
6984 indexRanges = Max(ceil((double) baserel->pages / statsData.pagesPerRange),
6985 1.0);
6986
6987 /*
6988 * Now calculate the minimum possible ranges we could match with if all of
6989 * the rows were in the perfect order in the table's heap.
6990 */
6991 minimalRanges = ceil(indexRanges * qualSelectivity);
6992
6993 /*
6994 * Now estimate the number of ranges that we'll touch by using the
6995 * indexCorrelation from the stats. Careful not to divide by zero (note
6996 * we're using the absolute value of the correlation).
6997 */
6998 if (*indexCorrelation < 1.0e-10)
6999 estimatedRanges = indexRanges;
7000 else
7001 estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
7002
7003 /* we expect to visit this portion of the table */
7004 selec = estimatedRanges / indexRanges;
7005
7006 CLAMP_PROBABILITY(selec);
7007
7008 *indexSelectivity = selec;
7009
7010 /*
7011 * Compute the index qual costs, much as in genericcostestimate, to add to
7012 * the index costs. We can disregard indexorderbys, since BRIN doesn't
7013 * support those.
7014 */
7015 qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
7016
7017 /*
7018 * Compute the startup cost as the cost to read the whole revmap
7019 * sequentially, including the cost to execute the index quals.
7020 */
7021 *indexStartupCost =
7022 spc_seq_page_cost * statsData.revmapNumPages * loop_count;
7023 *indexStartupCost += qual_arg_cost;
7024
7025 /*
7026 * To read a BRIN index there might be a bit of back and forth over
7027 * regular pages, as revmap might point to them out of sequential order;
7028 * calculate the total cost as reading the whole index in random order.
7029 */
7030 *indexTotalCost = *indexStartupCost +
7031 spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
7032
7033 /*
7034 * Charge a small amount per range tuple which we expect to match to. This
7035 * is meant to reflect the costs of manipulating the bitmap. The BRIN scan
7036 * will set a bit for each page in the range when we find a matching
7037 * range, so we must multiply the charge by the number of pages in the
7038 * range.
7039 */
7040 *indexTotalCost += 0.1 * cpu_operator_cost * estimatedRanges *
7041 statsData.pagesPerRange;
7042
7043 *indexPages = index->pages;
7044}
7045