1/*-------------------------------------------------------------------------
2 *
3 * clausesel.c
4 * Routines to compute clause selectivities
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/path/clausesel.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "nodes/makefuncs.h"
18#include "nodes/nodeFuncs.h"
19#include "optimizer/clauses.h"
20#include "optimizer/cost.h"
21#include "optimizer/optimizer.h"
22#include "optimizer/pathnode.h"
23#include "optimizer/plancat.h"
24#include "utils/fmgroids.h"
25#include "utils/lsyscache.h"
26#include "utils/selfuncs.h"
27#include "statistics/statistics.h"
28
29
30/*
31 * Data structure for accumulating info about possible range-query
32 * clause pairs in clauselist_selectivity.
33 */
34typedef struct RangeQueryClause
35{
36 struct RangeQueryClause *next; /* next in linked list */
37 Node *var; /* The common variable of the clauses */
38 bool have_lobound; /* found a low-bound clause yet? */
39 bool have_hibound; /* found a high-bound clause yet? */
40 Selectivity lobound; /* Selectivity of a var > something clause */
41 Selectivity hibound; /* Selectivity of a var < something clause */
42} RangeQueryClause;
43
44static void addRangeClause(RangeQueryClause **rqlist, Node *clause,
45 bool varonleft, bool isLTsel, Selectivity s2);
46static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root,
47 List *clauses);
48
49/****************************************************************************
50 * ROUTINES TO COMPUTE SELECTIVITIES
51 ****************************************************************************/
52
53/*
54 * clauselist_selectivity -
55 * Compute the selectivity of an implicitly-ANDed list of boolean
56 * expression clauses. The list can be empty, in which case 1.0
57 * must be returned. List elements may be either RestrictInfos
58 * or bare expression clauses --- the former is preferred since
59 * it allows caching of results.
60 *
61 * See clause_selectivity() for the meaning of the additional parameters.
62 *
63 * The basic approach is to apply extended statistics first, on as many
64 * clauses as possible, in order to capture cross-column dependencies etc.
65 * The remaining clauses are then estimated using regular statistics tracked
66 * for individual columns. This is done by simply passing the clauses to
67 * clauselist_selectivity_simple.
68 */
69Selectivity
70clauselist_selectivity(PlannerInfo *root,
71 List *clauses,
72 int varRelid,
73 JoinType jointype,
74 SpecialJoinInfo *sjinfo)
75{
76 Selectivity s1 = 1.0;
77 RelOptInfo *rel;
78 Bitmapset *estimatedclauses = NULL;
79
80 /*
81 * Determine if these clauses reference a single relation. If so, and if
82 * it has extended statistics, try to apply those.
83 */
84 rel = find_single_rel_for_clauses(root, clauses);
85 if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
86 {
87 /*
88 * Estimate as many clauses as possible using extended statistics.
89 *
90 * 'estimatedclauses' tracks the 0-based list position index of
91 * clauses that we've estimated using extended statistics, and that
92 * should be ignored.
93 */
94 s1 *= statext_clauselist_selectivity(root, clauses, varRelid,
95 jointype, sjinfo, rel,
96 &estimatedclauses);
97 }
98
99 /*
100 * Apply normal selectivity estimates for the remaining clauses, passing
101 * 'estimatedclauses' so that it skips already estimated ones.
102 */
103 return s1 * clauselist_selectivity_simple(root, clauses, varRelid,
104 jointype, sjinfo,
105 estimatedclauses);
106}
107
108/*
109 * clauselist_selectivity_simple -
110 * Compute the selectivity of an implicitly-ANDed list of boolean
111 * expression clauses. The list can be empty, in which case 1.0
112 * must be returned. List elements may be either RestrictInfos
113 * or bare expression clauses --- the former is preferred since
114 * it allows caching of results. The estimatedclauses bitmap tracks
115 * clauses that have already been estimated by other means.
116 *
117 * See clause_selectivity() for the meaning of the additional parameters.
118 *
119 * Our basic approach is to take the product of the selectivities of the
120 * subclauses. However, that's only right if the subclauses have independent
121 * probabilities, and in reality they are often NOT independent. So,
122 * we want to be smarter where we can.
123 *
124 * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
125 * are recognized as possible range query components if they are restriction
126 * opclauses whose operators have scalarltsel or a related function as their
127 * restriction selectivity estimator. We pair up clauses of this form that
128 * refer to the same variable. An unpairable clause of this kind is simply
129 * multiplied into the selectivity product in the normal way. But when we
130 * find a pair, we know that the selectivities represent the relative
131 * positions of the low and high bounds within the column's range, so instead
132 * of figuring the selectivity as hisel * losel, we can figure it as hisel +
133 * losel - 1. (To visualize this, see that hisel is the fraction of the range
134 * below the high bound, while losel is the fraction above the low bound; so
135 * hisel can be interpreted directly as a 0..1 value but we need to convert
136 * losel to 1-losel before interpreting it as a value. Then the available
137 * range is 1-losel to hisel. However, this calculation double-excludes
138 * nulls, so really we need hisel + losel + null_frac - 1.)
139 *
140 * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
141 * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
142 * yields an impossible (negative) result.
143 *
144 * A free side-effect is that we can recognize redundant inequalities such
145 * as "x < 4 AND x < 5"; only the tighter constraint will be counted.
146 *
147 * Of course this is all very dependent on the behavior of the inequality
148 * selectivity functions; perhaps some day we can generalize the approach.
149 */
150Selectivity
151clauselist_selectivity_simple(PlannerInfo *root,
152 List *clauses,
153 int varRelid,
154 JoinType jointype,
155 SpecialJoinInfo *sjinfo,
156 Bitmapset *estimatedclauses)
157{
158 Selectivity s1 = 1.0;
159 RangeQueryClause *rqlist = NULL;
160 ListCell *l;
161 int listidx;
162
163 /*
164 * If there's exactly one clause (and it was not estimated yet), just go
165 * directly to clause_selectivity(). None of what we might do below is
166 * relevant.
167 */
168 if ((list_length(clauses) == 1) &&
169 bms_num_members(estimatedclauses) == 0)
170 return clause_selectivity(root, (Node *) linitial(clauses),
171 varRelid, jointype, sjinfo);
172
173 /*
174 * Anything that doesn't look like a potential rangequery clause gets
175 * multiplied into s1 and forgotten. Anything that does gets inserted into
176 * an rqlist entry.
177 */
178 listidx = -1;
179 foreach(l, clauses)
180 {
181 Node *clause = (Node *) lfirst(l);
182 RestrictInfo *rinfo;
183 Selectivity s2;
184
185 listidx++;
186
187 /*
188 * Skip this clause if it's already been estimated by some other
189 * statistics above.
190 */
191 if (bms_is_member(listidx, estimatedclauses))
192 continue;
193
194 /* Always compute the selectivity using clause_selectivity */
195 s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);
196
197 /*
198 * Check for being passed a RestrictInfo.
199 *
200 * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
201 * 0.0; just use that rather than looking for range pairs.
202 */
203 if (IsA(clause, RestrictInfo))
204 {
205 rinfo = (RestrictInfo *) clause;
206 if (rinfo->pseudoconstant)
207 {
208 s1 = s1 * s2;
209 continue;
210 }
211 clause = (Node *) rinfo->clause;
212 }
213 else
214 rinfo = NULL;
215
216 /*
217 * See if it looks like a restriction clause with a pseudoconstant on
218 * one side. (Anything more complicated than that might not behave in
219 * the simple way we are expecting.) Most of the tests here can be
220 * done more efficiently with rinfo than without.
221 */
222 if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2)
223 {
224 OpExpr *expr = (OpExpr *) clause;
225 bool varonleft = true;
226 bool ok;
227
228 if (rinfo)
229 {
230 ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
231 (is_pseudo_constant_clause_relids(lsecond(expr->args),
232 rinfo->right_relids) ||
233 (varonleft = false,
234 is_pseudo_constant_clause_relids(linitial(expr->args),
235 rinfo->left_relids)));
236 }
237 else
238 {
239 ok = (NumRelids(clause) == 1) &&
240 (is_pseudo_constant_clause(lsecond(expr->args)) ||
241 (varonleft = false,
242 is_pseudo_constant_clause(linitial(expr->args))));
243 }
244
245 if (ok)
246 {
247 /*
248 * If it's not a "<"/"<="/">"/">=" operator, just merge the
249 * selectivity in generically. But if it's the right oprrest,
250 * add the clause to rqlist for later processing.
251 */
252 switch (get_oprrest(expr->opno))
253 {
254 case F_SCALARLTSEL:
255 case F_SCALARLESEL:
256 addRangeClause(&rqlist, clause,
257 varonleft, true, s2);
258 break;
259 case F_SCALARGTSEL:
260 case F_SCALARGESEL:
261 addRangeClause(&rqlist, clause,
262 varonleft, false, s2);
263 break;
264 default:
265 /* Just merge the selectivity in generically */
266 s1 = s1 * s2;
267 break;
268 }
269 continue; /* drop to loop bottom */
270 }
271 }
272
273 /* Not the right form, so treat it generically. */
274 s1 = s1 * s2;
275 }
276
277 /*
278 * Now scan the rangequery pair list.
279 */
280 while (rqlist != NULL)
281 {
282 RangeQueryClause *rqnext;
283
284 if (rqlist->have_lobound && rqlist->have_hibound)
285 {
286 /* Successfully matched a pair of range clauses */
287 Selectivity s2;
288
289 /*
290 * Exact equality to the default value probably means the
291 * selectivity function punted. This is not airtight but should
292 * be good enough.
293 */
294 if (rqlist->hibound == DEFAULT_INEQ_SEL ||
295 rqlist->lobound == DEFAULT_INEQ_SEL)
296 {
297 s2 = DEFAULT_RANGE_INEQ_SEL;
298 }
299 else
300 {
301 s2 = rqlist->hibound + rqlist->lobound - 1.0;
302
303 /* Adjust for double-exclusion of NULLs */
304 s2 += nulltestsel(root, IS_NULL, rqlist->var,
305 varRelid, jointype, sjinfo);
306
307 /*
308 * A zero or slightly negative s2 should be converted into a
309 * small positive value; we probably are dealing with a very
310 * tight range and got a bogus result due to roundoff errors.
311 * However, if s2 is very negative, then we probably have
312 * default selectivity estimates on one or both sides of the
313 * range that we failed to recognize above for some reason.
314 */
315 if (s2 <= 0.0)
316 {
317 if (s2 < -0.01)
318 {
319 /*
320 * No data available --- use a default estimate that
321 * is small, but not real small.
322 */
323 s2 = DEFAULT_RANGE_INEQ_SEL;
324 }
325 else
326 {
327 /*
328 * It's just roundoff error; use a small positive
329 * value
330 */
331 s2 = 1.0e-10;
332 }
333 }
334 }
335 /* Merge in the selectivity of the pair of clauses */
336 s1 *= s2;
337 }
338 else
339 {
340 /* Only found one of a pair, merge it in generically */
341 if (rqlist->have_lobound)
342 s1 *= rqlist->lobound;
343 else
344 s1 *= rqlist->hibound;
345 }
346 /* release storage and advance */
347 rqnext = rqlist->next;
348 pfree(rqlist);
349 rqlist = rqnext;
350 }
351
352 return s1;
353}
354
355/*
356 * addRangeClause --- add a new range clause for clauselist_selectivity
357 *
358 * Here is where we try to match up pairs of range-query clauses
359 */
360static void
361addRangeClause(RangeQueryClause **rqlist, Node *clause,
362 bool varonleft, bool isLTsel, Selectivity s2)
363{
364 RangeQueryClause *rqelem;
365 Node *var;
366 bool is_lobound;
367
368 if (varonleft)
369 {
370 var = get_leftop((Expr *) clause);
371 is_lobound = !isLTsel; /* x < something is high bound */
372 }
373 else
374 {
375 var = get_rightop((Expr *) clause);
376 is_lobound = isLTsel; /* something < x is low bound */
377 }
378
379 for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
380 {
381 /*
382 * We use full equal() here because the "var" might be a function of
383 * one or more attributes of the same relation...
384 */
385 if (!equal(var, rqelem->var))
386 continue;
387 /* Found the right group to put this clause in */
388 if (is_lobound)
389 {
390 if (!rqelem->have_lobound)
391 {
392 rqelem->have_lobound = true;
393 rqelem->lobound = s2;
394 }
395 else
396 {
397
398 /*------
399 * We have found two similar clauses, such as
400 * x < y AND x <= z.
401 * Keep only the more restrictive one.
402 *------
403 */
404 if (rqelem->lobound > s2)
405 rqelem->lobound = s2;
406 }
407 }
408 else
409 {
410 if (!rqelem->have_hibound)
411 {
412 rqelem->have_hibound = true;
413 rqelem->hibound = s2;
414 }
415 else
416 {
417
418 /*------
419 * We have found two similar clauses, such as
420 * x > y AND x >= z.
421 * Keep only the more restrictive one.
422 *------
423 */
424 if (rqelem->hibound > s2)
425 rqelem->hibound = s2;
426 }
427 }
428 return;
429 }
430
431 /* No matching var found, so make a new clause-pair data structure */
432 rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause));
433 rqelem->var = var;
434 if (is_lobound)
435 {
436 rqelem->have_lobound = true;
437 rqelem->have_hibound = false;
438 rqelem->lobound = s2;
439 }
440 else
441 {
442 rqelem->have_lobound = false;
443 rqelem->have_hibound = true;
444 rqelem->hibound = s2;
445 }
446 rqelem->next = *rqlist;
447 *rqlist = rqelem;
448}
449
450/*
451 * find_single_rel_for_clauses
452 * Examine each clause in 'clauses' and determine if all clauses
453 * reference only a single relation. If so return that relation,
454 * otherwise return NULL.
455 */
456static RelOptInfo *
457find_single_rel_for_clauses(PlannerInfo *root, List *clauses)
458{
459 int lastrelid = 0;
460 ListCell *l;
461
462 foreach(l, clauses)
463 {
464 RestrictInfo *rinfo = (RestrictInfo *) lfirst(l);
465 int relid;
466
467 /*
468 * If we have a list of bare clauses rather than RestrictInfos, we
469 * could pull out their relids the hard way with pull_varnos().
470 * However, currently the extended-stats machinery won't do anything
471 * with non-RestrictInfo clauses anyway, so there's no point in
472 * spending extra cycles; just fail if that's what we have.
473 */
474 if (!IsA(rinfo, RestrictInfo))
475 return NULL;
476
477 if (bms_is_empty(rinfo->clause_relids))
478 continue; /* we can ignore variable-free clauses */
479 if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
480 return NULL; /* multiple relations in this clause */
481 if (lastrelid == 0)
482 lastrelid = relid; /* first clause referencing a relation */
483 else if (relid != lastrelid)
484 return NULL; /* relation not same as last one */
485 }
486
487 if (lastrelid != 0)
488 return find_base_rel(root, lastrelid);
489
490 return NULL; /* no clauses */
491}
492
493/*
494 * bms_is_subset_singleton
495 *
496 * Same result as bms_is_subset(s, bms_make_singleton(x)),
497 * but a little faster and doesn't leak memory.
498 *
499 * Is this of use anywhere else? If so move to bitmapset.c ...
500 */
501static bool
502bms_is_subset_singleton(const Bitmapset *s, int x)
503{
504 switch (bms_membership(s))
505 {
506 case BMS_EMPTY_SET:
507 return true;
508 case BMS_SINGLETON:
509 return bms_is_member(x, s);
510 case BMS_MULTIPLE:
511 return false;
512 }
513 /* can't get here... */
514 return false;
515}
516
517/*
518 * treat_as_join_clause -
519 * Decide whether an operator clause is to be handled by the
520 * restriction or join estimator. Subroutine for clause_selectivity().
521 */
522static inline bool
523treat_as_join_clause(Node *clause, RestrictInfo *rinfo,
524 int varRelid, SpecialJoinInfo *sjinfo)
525{
526 if (varRelid != 0)
527 {
528 /*
529 * Caller is forcing restriction mode (eg, because we are examining an
530 * inner indexscan qual).
531 */
532 return false;
533 }
534 else if (sjinfo == NULL)
535 {
536 /*
537 * It must be a restriction clause, since it's being evaluated at a
538 * scan node.
539 */
540 return false;
541 }
542 else
543 {
544 /*
545 * Otherwise, it's a join if there's more than one relation used. We
546 * can optimize this calculation if an rinfo was passed.
547 *
548 * XXX Since we know the clause is being evaluated at a join, the
549 * only way it could be single-relation is if it was delayed by outer
550 * joins. Although we can make use of the restriction qual estimators
551 * anyway, it seems likely that we ought to account for the
552 * probability of injected nulls somehow.
553 */
554 if (rinfo)
555 return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
556 else
557 return (NumRelids(clause) > 1);
558 }
559}
560
561
562/*
563 * clause_selectivity -
564 * Compute the selectivity of a general boolean expression clause.
565 *
566 * The clause can be either a RestrictInfo or a plain expression. If it's
567 * a RestrictInfo, we try to cache the selectivity for possible re-use,
568 * so passing RestrictInfos is preferred.
569 *
570 * varRelid is either 0 or a rangetable index.
571 *
572 * When varRelid is not 0, only variables belonging to that relation are
573 * considered in computing selectivity; other vars are treated as constants
574 * of unknown values. This is appropriate for estimating the selectivity of
575 * a join clause that is being used as a restriction clause in a scan of a
576 * nestloop join's inner relation --- varRelid should then be the ID of the
577 * inner relation.
578 *
579 * When varRelid is 0, all variables are treated as variables. This
580 * is appropriate for ordinary join clauses and restriction clauses.
581 *
582 * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
583 * if the clause isn't a join clause.
584 *
585 * sjinfo is NULL for a non-join clause, otherwise it provides additional
586 * context information about the join being performed. There are some
587 * special cases:
588 * 1. For a special (not INNER) join, sjinfo is always a member of
589 * root->join_info_list.
590 * 2. For an INNER join, sjinfo is just a transient struct, and only the
591 * relids and jointype fields in it can be trusted.
592 * It is possible for jointype to be different from sjinfo->jointype.
593 * This indicates we are considering a variant join: either with
594 * the LHS and RHS switched, or with one input unique-ified.
595 *
596 * Note: when passing nonzero varRelid, it's normally appropriate to set
597 * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
598 * join clause; because we aren't treating it as a join clause.
599 */
600Selectivity
601clause_selectivity(PlannerInfo *root,
602 Node *clause,
603 int varRelid,
604 JoinType jointype,
605 SpecialJoinInfo *sjinfo)
606{
607 Selectivity s1 = 0.5; /* default for any unhandled clause type */
608 RestrictInfo *rinfo = NULL;
609 bool cacheable = false;
610
611 if (clause == NULL) /* can this still happen? */
612 return s1;
613
614 if (IsA(clause, RestrictInfo))
615 {
616 rinfo = (RestrictInfo *) clause;
617
618 /*
619 * If the clause is marked pseudoconstant, then it will be used as a
620 * gating qual and should not affect selectivity estimates; hence
621 * return 1.0. The only exception is that a constant FALSE may be
622 * taken as having selectivity 0.0, since it will surely mean no rows
623 * out of the plan. This case is simple enough that we need not
624 * bother caching the result.
625 */
626 if (rinfo->pseudoconstant)
627 {
628 if (!IsA(rinfo->clause, Const))
629 return (Selectivity) 1.0;
630 }
631
632 /*
633 * If the clause is marked redundant, always return 1.0.
634 */
635 if (rinfo->norm_selec > 1)
636 return (Selectivity) 1.0;
637
638 /*
639 * If possible, cache the result of the selectivity calculation for
640 * the clause. We can cache if varRelid is zero or the clause
641 * contains only vars of that relid --- otherwise varRelid will affect
642 * the result, so mustn't cache. Outer join quals might be examined
643 * with either their join's actual jointype or JOIN_INNER, so we need
644 * two cache variables to remember both cases. Note: we assume the
645 * result won't change if we are switching the input relations or
646 * considering a unique-ified case, so we only need one cache variable
647 * for all non-JOIN_INNER cases.
648 */
649 if (varRelid == 0 ||
650 bms_is_subset_singleton(rinfo->clause_relids, varRelid))
651 {
652 /* Cacheable --- do we already have the result? */
653 if (jointype == JOIN_INNER)
654 {
655 if (rinfo->norm_selec >= 0)
656 return rinfo->norm_selec;
657 }
658 else
659 {
660 if (rinfo->outer_selec >= 0)
661 return rinfo->outer_selec;
662 }
663 cacheable = true;
664 }
665
666 /*
667 * Proceed with examination of contained clause. If the clause is an
668 * OR-clause, we want to look at the variant with sub-RestrictInfos,
669 * so that per-subclause selectivities can be cached.
670 */
671 if (rinfo->orclause)
672 clause = (Node *) rinfo->orclause;
673 else
674 clause = (Node *) rinfo->clause;
675 }
676
677 if (IsA(clause, Var))
678 {
679 Var *var = (Var *) clause;
680
681 /*
682 * We probably shouldn't ever see an uplevel Var here, but if we do,
683 * return the default selectivity...
684 */
685 if (var->varlevelsup == 0 &&
686 (varRelid == 0 || varRelid == (int) var->varno))
687 {
688 /* Use the restriction selectivity function for a bool Var */
689 s1 = boolvarsel(root, (Node *) var, varRelid);
690 }
691 }
692 else if (IsA(clause, Const))
693 {
694 /* bool constant is pretty easy... */
695 Const *con = (Const *) clause;
696
697 s1 = con->constisnull ? 0.0 :
698 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
699 }
700 else if (IsA(clause, Param))
701 {
702 /* see if we can replace the Param */
703 Node *subst = estimate_expression_value(root, clause);
704
705 if (IsA(subst, Const))
706 {
707 /* bool constant is pretty easy... */
708 Const *con = (Const *) subst;
709
710 s1 = con->constisnull ? 0.0 :
711 DatumGetBool(con->constvalue) ? 1.0 : 0.0;
712 }
713 else
714 {
715 /* XXX any way to do better than default? */
716 }
717 }
718 else if (is_notclause(clause))
719 {
720 /* inverse of the selectivity of the underlying clause */
721 s1 = 1.0 - clause_selectivity(root,
722 (Node *) get_notclausearg((Expr *) clause),
723 varRelid,
724 jointype,
725 sjinfo);
726 }
727 else if (is_andclause(clause))
728 {
729 /* share code with clauselist_selectivity() */
730 s1 = clauselist_selectivity(root,
731 ((BoolExpr *) clause)->args,
732 varRelid,
733 jointype,
734 sjinfo);
735 }
736 else if (is_orclause(clause))
737 {
738 /*
739 * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to
740 * account for the probable overlap of selected tuple sets.
741 *
742 * XXX is this too conservative?
743 */
744 ListCell *arg;
745
746 s1 = 0.0;
747 foreach(arg, ((BoolExpr *) clause)->args)
748 {
749 Selectivity s2 = clause_selectivity(root,
750 (Node *) lfirst(arg),
751 varRelid,
752 jointype,
753 sjinfo);
754
755 s1 = s1 + s2 - s1 * s2;
756 }
757 }
758 else if (is_opclause(clause) || IsA(clause, DistinctExpr))
759 {
760 OpExpr *opclause = (OpExpr *) clause;
761 Oid opno = opclause->opno;
762
763 if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
764 {
765 /* Estimate selectivity for a join clause. */
766 s1 = join_selectivity(root, opno,
767 opclause->args,
768 opclause->inputcollid,
769 jointype,
770 sjinfo);
771 }
772 else
773 {
774 /* Estimate selectivity for a restriction clause. */
775 s1 = restriction_selectivity(root, opno,
776 opclause->args,
777 opclause->inputcollid,
778 varRelid);
779 }
780
781 /*
782 * DistinctExpr has the same representation as OpExpr, but the
783 * contained operator is "=" not "<>", so we must negate the result.
784 * This estimation method doesn't give the right behavior for nulls,
785 * but it's better than doing nothing.
786 */
787 if (IsA(clause, DistinctExpr))
788 s1 = 1.0 - s1;
789 }
790 else if (is_funcclause(clause))
791 {
792 FuncExpr *funcclause = (FuncExpr *) clause;
793
794 /* Try to get an estimate from the support function, if any */
795 s1 = function_selectivity(root,
796 funcclause->funcid,
797 funcclause->args,
798 funcclause->inputcollid,
799 treat_as_join_clause(clause, rinfo,
800 varRelid, sjinfo),
801 varRelid,
802 jointype,
803 sjinfo);
804 }
805 else if (IsA(clause, ScalarArrayOpExpr))
806 {
807 /* Use node specific selectivity calculation function */
808 s1 = scalararraysel(root,
809 (ScalarArrayOpExpr *) clause,
810 treat_as_join_clause(clause, rinfo,
811 varRelid, sjinfo),
812 varRelid,
813 jointype,
814 sjinfo);
815 }
816 else if (IsA(clause, RowCompareExpr))
817 {
818 /* Use node specific selectivity calculation function */
819 s1 = rowcomparesel(root,
820 (RowCompareExpr *) clause,
821 varRelid,
822 jointype,
823 sjinfo);
824 }
825 else if (IsA(clause, NullTest))
826 {
827 /* Use node specific selectivity calculation function */
828 s1 = nulltestsel(root,
829 ((NullTest *) clause)->nulltesttype,
830 (Node *) ((NullTest *) clause)->arg,
831 varRelid,
832 jointype,
833 sjinfo);
834 }
835 else if (IsA(clause, BooleanTest))
836 {
837 /* Use node specific selectivity calculation function */
838 s1 = booltestsel(root,
839 ((BooleanTest *) clause)->booltesttype,
840 (Node *) ((BooleanTest *) clause)->arg,
841 varRelid,
842 jointype,
843 sjinfo);
844 }
845 else if (IsA(clause, CurrentOfExpr))
846 {
847 /* CURRENT OF selects at most one row of its table */
848 CurrentOfExpr *cexpr = (CurrentOfExpr *) clause;
849 RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
850
851 if (crel->tuples > 0)
852 s1 = 1.0 / crel->tuples;
853 }
854 else if (IsA(clause, RelabelType))
855 {
856 /* Not sure this case is needed, but it can't hurt */
857 s1 = clause_selectivity(root,
858 (Node *) ((RelabelType *) clause)->arg,
859 varRelid,
860 jointype,
861 sjinfo);
862 }
863 else if (IsA(clause, CoerceToDomain))
864 {
865 /* Not sure this case is needed, but it can't hurt */
866 s1 = clause_selectivity(root,
867 (Node *) ((CoerceToDomain *) clause)->arg,
868 varRelid,
869 jointype,
870 sjinfo);
871 }
872 else
873 {
874 /*
875 * For anything else, see if we can consider it as a boolean variable.
876 * This only works if it's an immutable expression in Vars of a single
877 * relation; but there's no point in us checking that here because
878 * boolvarsel() will do it internally, and return a suitable default
879 * selectivity if not.
880 */
881 s1 = boolvarsel(root, clause, varRelid);
882 }
883
884 /* Cache the result if possible */
885 if (cacheable)
886 {
887 if (jointype == JOIN_INNER)
888 rinfo->norm_selec = s1;
889 else
890 rinfo->outer_selec = s1;
891 }
892
893#ifdef SELECTIVITY_DEBUG
894 elog(DEBUG4, "clause_selectivity: s1 %f", s1);
895#endif /* SELECTIVITY_DEBUG */
896
897 return s1;
898}
899