clausesel.c source code [PostgreSQL/src/backend/optimizer/path/clausesel.c]

1	/-------------------------------------------------------------------------*
2	*
3	* clausesel.c
4	* Routines to compute clause selectivities
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	*
10	* IDENTIFICATION
11	* src/backend/optimizer/path/clausesel.c
12	*
13	*-------------------------------------------------------------------------
14	*/
15	#include "postgres.h"
16
17	#include "nodes/makefuncs.h"
18	#include "nodes/nodeFuncs.h"
19	#include "optimizer/clauses.h"
20	#include "optimizer/cost.h"
21	#include "optimizer/optimizer.h"
22	#include "optimizer/pathnode.h"
23	#include "optimizer/plancat.h"
24	#include "utils/fmgroids.h"
25	#include "utils/lsyscache.h"
26	#include "utils/selfuncs.h"
27	#include "statistics/statistics.h"
28
29
30	/*
31	* Data structure for accumulating info about possible range-query
32	* clause pairs in clauselist_selectivity.
33	*/
34	typedef struct RangeQueryClause
35	{
36	struct RangeQueryClause next; /* next in linked list /
37	Node var; /* The common variable of the clauses /
38	bool have_lobound; / found a low-bound clause yet? /
39	bool have_hibound; / found a high-bound clause yet? /
40	Selectivity lobound; / Selectivity of a var > something clause /
41	Selectivity hibound; / Selectivity of a var < something clause /
42	} RangeQueryClause;
43
44	static void addRangeClause(RangeQueryClause *rqlist, Node clause,
45	bool varonleft, bool isLTsel, Selectivity s2);
46	static RelOptInfo find_single_rel_for_clauses(PlannerInfo root,
47	List *clauses);
48
49	/****************************************************************************
50	* ROUTINES TO COMPUTE SELECTIVITIES
51	****************************************************************************/
52
53	/*
54	* clauselist_selectivity -
55	* Compute the selectivity of an implicitly-ANDed list of boolean
56	* expression clauses. The list can be empty, in which case 1.0
57	* must be returned. List elements may be either RestrictInfos
58	* or bare expression clauses --- the former is preferred since
59	* it allows caching of results.
60	*
61	* See clause_selectivity() for the meaning of the additional parameters.
62	*
63	* The basic approach is to apply extended statistics first, on as many
64	* clauses as possible, in order to capture cross-column dependencies etc.
65	* The remaining clauses are then estimated using regular statistics tracked
66	* for individual columns. This is done by simply passing the clauses to
67	* clauselist_selectivity_simple.
68	*/
69	Selectivity
70	clauselist_selectivity(PlannerInfo *root,
71	List *clauses,
72	int varRelid,
73	JoinType jointype,
74	SpecialJoinInfo *sjinfo)
75	{
76	Selectivity s1 = `1.0`;
77	RelOptInfo *rel;
78	Bitmapset *estimatedclauses = NULL;
79
80	/*
81	* Determine if these clauses reference a single relation. If so, and if
82	* it has extended statistics, try to apply those.
83	*/
84	rel = find_single_rel_for_clauses(root, clauses);
85	if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL)
86	{
87	/*
88	* Estimate as many clauses as possible using extended statistics.
89	*
90	* 'estimatedclauses' tracks the 0-based list position index of
91	* clauses that we've estimated using extended statistics, and that
92	* should be ignored.
93	*/
94	s1 *= statext_clauselist_selectivity(root, clauses, varRelid,
95	jointype, sjinfo, rel,
96	&estimatedclauses);
97	}
98
99	/*
100	* Apply normal selectivity estimates for the remaining clauses, passing
101	* 'estimatedclauses' so that it skips already estimated ones.
102	*/
103	return s1 * clauselist_selectivity_simple(root, clauses, varRelid,
104	jointype, sjinfo,
105	estimatedclauses);
106	}
107
108	/*
109	* clauselist_selectivity_simple -
110	* Compute the selectivity of an implicitly-ANDed list of boolean
111	* expression clauses. The list can be empty, in which case 1.0
112	* must be returned. List elements may be either RestrictInfos
113	* or bare expression clauses --- the former is preferred since
114	* it allows caching of results. The estimatedclauses bitmap tracks
115	* clauses that have already been estimated by other means.
116	*
117	* See clause_selectivity() for the meaning of the additional parameters.
118	*
119	* Our basic approach is to take the product of the selectivities of the
120	* subclauses. However, that's only right if the subclauses have independent
121	* probabilities, and in reality they are often NOT independent. So,
122	* we want to be smarter where we can.
123	*
124	* We also recognize "range queries", such as "x > 34 AND x < 42". Clauses
125	* are recognized as possible range query components if they are restriction
126	* opclauses whose operators have scalarltsel or a related function as their
127	* restriction selectivity estimator. We pair up clauses of this form that
128	* refer to the same variable. An unpairable clause of this kind is simply
129	* multiplied into the selectivity product in the normal way. But when we
130	* find a pair, we know that the selectivities represent the relative
131	* positions of the low and high bounds within the column's range, so instead
132	* of figuring the selectivity as hisel * losel, we can figure it as hisel +
133	* losel - 1. (To visualize this, see that hisel is the fraction of the range
134	* below the high bound, while losel is the fraction above the low bound; so
135	* hisel can be interpreted directly as a 0..1 value but we need to convert
136	* losel to 1-losel before interpreting it as a value. Then the available
137	* range is 1-losel to hisel. However, this calculation double-excludes
138	* nulls, so really we need hisel + losel + null_frac - 1.)
139	*
140	* If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation
141	* and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation
142	* yields an impossible (negative) result.
143	*
144	* A free side-effect is that we can recognize redundant inequalities such
145	* as "x < 4 AND x < 5"; only the tighter constraint will be counted.
146	*
147	* Of course this is all very dependent on the behavior of the inequality
148	* selectivity functions; perhaps some day we can generalize the approach.
149	*/
150	Selectivity
151	clauselist_selectivity_simple(PlannerInfo *root,
152	List *clauses,
153	int varRelid,
154	JoinType jointype,
155	SpecialJoinInfo *sjinfo,
156	Bitmapset *estimatedclauses)
157	{
158	Selectivity s1 = `1.0`;
159	RangeQueryClause *rqlist = NULL;
160	ListCell *l;
161	int listidx;
162
163	/*
164	* If there's exactly one clause (and it was not estimated yet), just go
165	* directly to clause_selectivity(). None of what we might do below is
166	* relevant.
167	*/
168	if ((list_length(clauses) == `1`) &&
169	bms_num_members(estimatedclauses) == `0`)
170	return clause_selectivity(root, (Node *) linitial(clauses),
171	varRelid, jointype, sjinfo);
172
173	/*
174	* Anything that doesn't look like a potential rangequery clause gets
175	* multiplied into s1 and forgotten. Anything that does gets inserted into
176	* an rqlist entry.
177	*/
178	listidx = -`1`;
179	foreach(l, clauses)
180	{
181	Node clause = (Node ) lfirst(l);
182	RestrictInfo *rinfo;
183	Selectivity s2;
184
185	listidx++;
186
187	/*
188	* Skip this clause if it's already been estimated by some other
189	* statistics above.
190	*/
191	if (bms_is_member(listidx, estimatedclauses))
192	continue;
193
194	/ Always compute the selectivity using clause_selectivity /
195	s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo);
196
197	/*
198	* Check for being passed a RestrictInfo.
199	*
200	* If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or
201	* 0.0; just use that rather than looking for range pairs.
202	*/
203	if (IsA(clause, RestrictInfo))
204	{
205	rinfo = (RestrictInfo *) clause;
206	if (rinfo->pseudoconstant)
207	{
208	s1 = s1 * s2;
209	continue;
210	}
211	clause = (Node *) rinfo->clause;
212	}
213	else
214	rinfo = NULL;
215
216	/*
217	* See if it looks like a restriction clause with a pseudoconstant on
218	* one side. (Anything more complicated than that might not behave in
219	* the simple way we are expecting.) Most of the tests here can be
220	* done more efficiently with rinfo than without.
221	*/
222	if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == `2`)
223	{
224	OpExpr expr = (OpExpr ) clause;
225	bool varonleft = true;
226	bool ok;
227
228	if (rinfo)
229	{
230	ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) &&
231	(is_pseudo_constant_clause_relids(lsecond(expr->args),
232	rinfo->right_relids) \|\|
233	(varonleft = false,
234	is_pseudo_constant_clause_relids(linitial(expr->args),
235	rinfo->left_relids)));
236	}
237	else
238	{
239	ok = (NumRelids(clause) == `1`) &&
240	(is_pseudo_constant_clause(lsecond(expr->args)) \|\|
241	(varonleft = false,
242	is_pseudo_constant_clause(linitial(expr->args))));
243	}
244
245	if (ok)
246	{
247	/*
248	* If it's not a "<"/"<="/">"/">=" operator, just merge the
249	* selectivity in generically. But if it's the right oprrest,
250	* add the clause to rqlist for later processing.
251	*/
252	switch (get_oprrest(expr->opno))
253	{
254	case F_SCALARLTSEL:
255	case F_SCALARLESEL:
256	addRangeClause(&rqlist, clause,
257	varonleft, true, s2);
258	break;
259	case F_SCALARGTSEL:
260	case F_SCALARGESEL:
261	addRangeClause(&rqlist, clause,
262	varonleft, false, s2);
263	break;
264	default:
265	/ Just merge the selectivity in generically /
266	s1 = s1 * s2;
267	break;
268	}
269	continue; / drop to loop bottom /
270	}
271	}
272
273	/ Not the right form, so treat it generically. /
274	s1 = s1 * s2;
275	}
276
277	/*
278	* Now scan the rangequery pair list.
279	*/
280	while (rqlist != NULL)
281	{
282	RangeQueryClause *rqnext;
283
284	if (rqlist->have_lobound && rqlist->have_hibound)
285	{
286	/ Successfully matched a pair of range clauses /
287	Selectivity s2;
288
289	/*
290	* Exact equality to the default value probably means the
291	* selectivity function punted. This is not airtight but should
292	* be good enough.
293	*/
294	if (rqlist->hibound == DEFAULT_INEQ_SEL \|\|
295	rqlist->lobound == DEFAULT_INEQ_SEL)
296	{
297	s2 = DEFAULT_RANGE_INEQ_SEL;
298	}
299	else
300	{
301	s2 = rqlist->hibound + rqlist->lobound - `1.0`;
302
303	/ Adjust for double-exclusion of NULLs /
304	s2 += nulltestsel(root, IS_NULL, rqlist->var,
305	varRelid, jointype, sjinfo);
306
307	/*
308	* A zero or slightly negative s2 should be converted into a
309	* small positive value; we probably are dealing with a very
310	* tight range and got a bogus result due to roundoff errors.
311	* However, if s2 is very negative, then we probably have
312	* default selectivity estimates on one or both sides of the
313	* range that we failed to recognize above for some reason.
314	*/
315	if (s2 <= `0.0`)
316	{
317	if (s2 < -`0.01`)
318	{
319	/*
320	* No data available --- use a default estimate that
321	* is small, but not real small.
322	*/
323	s2 = DEFAULT_RANGE_INEQ_SEL;
324	}
325	else
326	{
327	/*
328	* It's just roundoff error; use a small positive
329	* value
330	*/
331	s2 = `1.0e-10`;
332	}
333	}
334	}
335	/ Merge in the selectivity of the pair of clauses /
336	s1 *= s2;
337	}
338	else
339	{
340	/ Only found one of a pair, merge it in generically /
341	if (rqlist->have_lobound)
342	s1 *= rqlist->lobound;
343	else
344	s1 *= rqlist->hibound;
345	}
346	/ release storage and advance /
347	rqnext = rqlist->next;
348	pfree(rqlist);
349	rqlist = rqnext;
350	}
351
352	return s1;
353	}
354
355	/*
356	* addRangeClause --- add a new range clause for clauselist_selectivity
357	*
358	* Here is where we try to match up pairs of range-query clauses
359	*/
360	static void
361	addRangeClause(RangeQueryClause *rqlist, Node clause,
362	bool varonleft, bool isLTsel, Selectivity s2)
363	{
364	RangeQueryClause *rqelem;
365	Node *var;
366	bool is_lobound;
367
368	if (varonleft)
369	{
370	var = get_leftop((Expr *) clause);
371	is_lobound = !isLTsel; / x < something is high bound /
372	}
373	else
374	{
375	var = get_rightop((Expr *) clause);
376	is_lobound = isLTsel; / something < x is low bound /
377	}
378
379	for (rqelem = *rqlist; rqelem; rqelem = rqelem->next)
380	{
381	/*
382	* We use full equal() here because the "var" might be a function of
383	* one or more attributes of the same relation...
384	*/
385	if (!equal(var, rqelem->var))
386	continue;
387	/ Found the right group to put this clause in /
388	if (is_lobound)
389	{
390	if (!rqelem->have_lobound)
391	{
392	rqelem->have_lobound = true;
393	rqelem->lobound = s2;
394	}
395	else
396	{
397
398	/------*
399	* We have found two similar clauses, such as
400	* x < y AND x <= z.
401	* Keep only the more restrictive one.
402	*------
403	*/
404	if (rqelem->lobound > s2)
405	rqelem->lobound = s2;
406	}
407	}
408	else
409	{
410	if (!rqelem->have_hibound)
411	{
412	rqelem->have_hibound = true;
413	rqelem->hibound = s2;
414	}
415	else
416	{
417
418	/------*
419	* We have found two similar clauses, such as
420	* x > y AND x >= z.
421	* Keep only the more restrictive one.
422	*------
423	*/
424	if (rqelem->hibound > s2)
425	rqelem->hibound = s2;
426	}
427	}
428	return;
429	}
430
431	/ No matching var found, so make a new clause-pair data structure /
432	rqelem = (RangeQueryClause ) palloc(sizeof*(RangeQueryClause));
433	rqelem->var = var;
434	if (is_lobound)
435	{
436	rqelem->have_lobound = true;
437	rqelem->have_hibound = false;
438	rqelem->lobound = s2;
439	}
440	else
441	{
442	rqelem->have_lobound = false;
443	rqelem->have_hibound = true;
444	rqelem->hibound = s2;
445	}
446	rqelem->next = *rqlist;
447	*rqlist = rqelem;
448	}
449
450	/*
451	* find_single_rel_for_clauses
452	* Examine each clause in 'clauses' and determine if all clauses
453	* reference only a single relation. If so return that relation,
454	* otherwise return NULL.
455	*/
456	static RelOptInfo *
457	find_single_rel_for_clauses(PlannerInfo root, List clauses)
458	{
459	int lastrelid = `0`;
460	ListCell *l;
461
462	foreach(l, clauses)
463	{
464	RestrictInfo rinfo = (RestrictInfo ) lfirst(l);
465	int relid;
466
467	/*
468	* If we have a list of bare clauses rather than RestrictInfos, we
469	* could pull out their relids the hard way with pull_varnos().
470	* However, currently the extended-stats machinery won't do anything
471	* with non-RestrictInfo clauses anyway, so there's no point in
472	* spending extra cycles; just fail if that's what we have.
473	*/
474	if (!IsA(rinfo, RestrictInfo))
475	return NULL;
476
477	if (bms_is_empty(rinfo->clause_relids))
478	continue; / we can ignore variable-free clauses /
479	if (!bms_get_singleton_member(rinfo->clause_relids, &relid))
480	return NULL; / multiple relations in this clause /
481	if (lastrelid == `0`)
482	lastrelid = relid; / first clause referencing a relation /
483	else if (relid != lastrelid)
484	return NULL; / relation not same as last one /
485	}
486
487	if (lastrelid != `0`)
488	return find_base_rel(root, lastrelid);
489
490	return NULL; / no clauses /
491	}
492
493	/*
494	* bms_is_subset_singleton
495	*
496	* Same result as bms_is_subset(s, bms_make_singleton(x)),
497	* but a little faster and doesn't leak memory.
498	*
499	* Is this of use anywhere else? If so move to bitmapset.c ...
500	*/
501	static bool
502	bms_is_subset_singleton(const Bitmapset s, int* x)
503	{
504	switch (bms_membership(s))
505	{
506	case BMS_EMPTY_SET:
507	return true;
508	case BMS_SINGLETON:
509	return bms_is_member(x, s);
510	case BMS_MULTIPLE:
511	return false;
512	}
513	/ can't get here... /
514	return false;
515	}
516
517	/*
518	* treat_as_join_clause -
519	* Decide whether an operator clause is to be handled by the
520	* restriction or join estimator. Subroutine for clause_selectivity().
521	*/
522	static inline bool
523	treat_as_join_clause(Node clause, RestrictInfo rinfo,
524	int varRelid, SpecialJoinInfo *sjinfo)
525	{
526	if (varRelid != `0`)
527	{
528	/*
529	* Caller is forcing restriction mode (eg, because we are examining an
530	* inner indexscan qual).
531	*/
532	return false;
533	}
534	else if (sjinfo == NULL)
535	{
536	/*
537	* It must be a restriction clause, since it's being evaluated at a
538	* scan node.
539	*/
540	return false;
541	}
542	else
543	{
544	/*
545	* Otherwise, it's a join if there's more than one relation used. We
546	* can optimize this calculation if an rinfo was passed.
547	*
548	* XXX Since we know the clause is being evaluated at a join, the
549	* only way it could be single-relation is if it was delayed by outer
550	* joins. Although we can make use of the restriction qual estimators
551	* anyway, it seems likely that we ought to account for the
552	* probability of injected nulls somehow.
553	*/
554	if (rinfo)
555	return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE);
556	else
557	return (NumRelids(clause) > `1`);
558	}
559	}
560
561
562	/*
563	* clause_selectivity -
564	* Compute the selectivity of a general boolean expression clause.
565	*
566	* The clause can be either a RestrictInfo or a plain expression. If it's
567	* a RestrictInfo, we try to cache the selectivity for possible re-use,
568	* so passing RestrictInfos is preferred.
569	*
570	* varRelid is either 0 or a rangetable index.
571	*
572	* When varRelid is not 0, only variables belonging to that relation are
573	* considered in computing selectivity; other vars are treated as constants
574	* of unknown values. This is appropriate for estimating the selectivity of
575	* a join clause that is being used as a restriction clause in a scan of a
576	* nestloop join's inner relation --- varRelid should then be the ID of the
577	* inner relation.
578	*
579	* When varRelid is 0, all variables are treated as variables. This
580	* is appropriate for ordinary join clauses and restriction clauses.
581	*
582	* jointype is the join type, if the clause is a join clause. Pass JOIN_INNER
583	* if the clause isn't a join clause.
584	*
585	* sjinfo is NULL for a non-join clause, otherwise it provides additional
586	* context information about the join being performed. There are some
587	* special cases:
588	* 1. For a special (not INNER) join, sjinfo is always a member of
589	* root->join_info_list.
590	* 2. For an INNER join, sjinfo is just a transient struct, and only the
591	* relids and jointype fields in it can be trusted.
592	* It is possible for jointype to be different from sjinfo->jointype.
593	* This indicates we are considering a variant join: either with
594	* the LHS and RHS switched, or with one input unique-ified.
595	*
596	* Note: when passing nonzero varRelid, it's normally appropriate to set
597	* jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a
598	* join clause; because we aren't treating it as a join clause.
599	*/
600	Selectivity
601	clause_selectivity(PlannerInfo *root,
602	Node *clause,
603	int varRelid,
604	JoinType jointype,
605	SpecialJoinInfo *sjinfo)
606	{
607	Selectivity s1 = `0.5`; / default for any unhandled clause type /
608	RestrictInfo *rinfo = NULL;
609	bool cacheable = false;
610
611	if (clause == NULL) / can this still happen? /
612	return s1;
613
614	if (IsA(clause, RestrictInfo))
615	{
616	rinfo = (RestrictInfo *) clause;
617
618	/*
619	* If the clause is marked pseudoconstant, then it will be used as a
620	* gating qual and should not affect selectivity estimates; hence
621	* return 1.0. The only exception is that a constant FALSE may be
622	* taken as having selectivity 0.0, since it will surely mean no rows
623	* out of the plan. This case is simple enough that we need not
624	* bother caching the result.
625	*/
626	if (rinfo->pseudoconstant)
627	{
628	if (!IsA(rinfo->clause, Const))
629	return (Selectivity) `1.0`;
630	}
631
632	/*
633	* If the clause is marked redundant, always return 1.0.
634	*/
635	if (rinfo->norm_selec > `1`)
636	return (Selectivity) `1.0`;
637
638	/*
639	* If possible, cache the result of the selectivity calculation for
640	* the clause. We can cache if varRelid is zero or the clause
641	* contains only vars of that relid --- otherwise varRelid will affect
642	* the result, so mustn't cache. Outer join quals might be examined
643	* with either their join's actual jointype or JOIN_INNER, so we need
644	* two cache variables to remember both cases. Note: we assume the
645	* result won't change if we are switching the input relations or
646	* considering a unique-ified case, so we only need one cache variable
647	* for all non-JOIN_INNER cases.
648	*/
649	if (varRelid == `0` \|\|
650	bms_is_subset_singleton(rinfo->clause_relids, varRelid))
651	{
652	/ Cacheable --- do we already have the result? /
653	if (jointype == JOIN_INNER)
654	{
655	if (rinfo->norm_selec >= `0`)
656	return rinfo->norm_selec;
657	}
658	else
659	{
660	if (rinfo->outer_selec >= `0`)
661	return rinfo->outer_selec;
662	}
663	cacheable = true;
664	}
665
666	/*
667	* Proceed with examination of contained clause. If the clause is an
668	* OR-clause, we want to look at the variant with sub-RestrictInfos,
669	* so that per-subclause selectivities can be cached.
670	*/
671	if (rinfo->orclause)
672	clause = (Node *) rinfo->orclause;
673	else
674	clause = (Node *) rinfo->clause;
675	}
676
677	if (IsA(clause, Var))
678	{
679	Var var = (Var ) clause;
680
681	/*
682	* We probably shouldn't ever see an uplevel Var here, but if we do,
683	* return the default selectivity...
684	*/
685	if (var->varlevelsup == `0` &&
686	(varRelid == `0` \|\| varRelid == (int) var->varno))
687	{
688	/ Use the restriction selectivity function for a bool Var /
689	s1 = boolvarsel(root, (Node *) var, varRelid);
690	}
691	}
692	else if (IsA(clause, Const))
693	{
694	/ bool constant is pretty easy... /
695	Const con = (Const ) clause;
696
697	s1 = con->constisnull ? `0.0` :
698	DatumGetBool(con->constvalue) ? `1.0` : `0.0`;
699	}
700	else if (IsA(clause, Param))
701	{
702	/ see if we can replace the Param /
703	Node *subst = estimate_expression_value(root, clause);
704
705	if (IsA(subst, Const))
706	{
707	/ bool constant is pretty easy... /
708	Const con = (Const ) subst;
709
710	s1 = con->constisnull ? `0.0` :
711	DatumGetBool(con->constvalue) ? `1.0` : `0.0`;
712	}
713	else
714	{
715	/ XXX any way to do better than default? /
716	}
717	}
718	else if (is_notclause(clause))
719	{
720	/ inverse of the selectivity of the underlying clause /
721	s1 = `1.0` - clause_selectivity(root,
722	(Node ) get_notclausearg((Expr ) clause),
723	varRelid,
724	jointype,
725	sjinfo);
726	}
727	else if (is_andclause(clause))
728	{
729	/ share code with clauselist_selectivity() /
730	s1 = clauselist_selectivity(root,
731	((BoolExpr *) clause)->args,
732	varRelid,
733	jointype,
734	sjinfo);
735	}
736	else if (is_orclause(clause))
737	{
738	/*
739	* Selectivities for an OR clause are computed as s1+s2 - s1*s2 to
740	* account for the probable overlap of selected tuple sets.
741	*
742	* XXX is this too conservative?
743	*/
744	ListCell *arg;
745
746	s1 = `0.0`;
747	foreach(arg, ((BoolExpr *) clause)->args)
748	{
749	Selectivity s2 = clause_selectivity(root,
750	(Node *) lfirst(arg),
751	varRelid,
752	jointype,
753	sjinfo);
754
755	s1 = s1 + s2 - s1 * s2;
756	}
757	}
758	else if (is_opclause(clause) \|\| IsA(clause, DistinctExpr))
759	{
760	OpExpr opclause = (OpExpr ) clause;
761	Oid opno = opclause->opno;
762
763	if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo))
764	{
765	/ Estimate selectivity for a join clause. /
766	s1 = join_selectivity(root, opno,
767	opclause->args,
768	opclause->inputcollid,
769	jointype,
770	sjinfo);
771	}
772	else
773	{
774	/ Estimate selectivity for a restriction clause. /
775	s1 = restriction_selectivity(root, opno,
776	opclause->args,
777	opclause->inputcollid,
778	varRelid);
779	}
780
781	/*
782	* DistinctExpr has the same representation as OpExpr, but the
783	* contained operator is "=" not "<>", so we must negate the result.
784	* This estimation method doesn't give the right behavior for nulls,
785	* but it's better than doing nothing.
786	*/
787	if (IsA(clause, DistinctExpr))
788	s1 = `1.0` - s1;
789	}
790	else if (is_funcclause(clause))
791	{
792	FuncExpr funcclause = (FuncExpr ) clause;
793
794	/ Try to get an estimate from the support function, if any /
795	s1 = function_selectivity(root,
796	funcclause->funcid,
797	funcclause->args,
798	funcclause->inputcollid,
799	treat_as_join_clause(clause, rinfo,
800	varRelid, sjinfo),
801	varRelid,
802	jointype,
803	sjinfo);
804	}
805	else if (IsA(clause, ScalarArrayOpExpr))
806	{
807	/ Use node specific selectivity calculation function /
808	s1 = scalararraysel(root,
809	(ScalarArrayOpExpr *) clause,
810	treat_as_join_clause(clause, rinfo,
811	varRelid, sjinfo),
812	varRelid,
813	jointype,
814	sjinfo);
815	}
816	else if (IsA(clause, RowCompareExpr))
817	{
818	/ Use node specific selectivity calculation function /
819	s1 = rowcomparesel(root,
820	(RowCompareExpr *) clause,
821	varRelid,
822	jointype,
823	sjinfo);
824	}
825	else if (IsA(clause, NullTest))
826	{
827	/ Use node specific selectivity calculation function /
828	s1 = nulltestsel(root,
829	((NullTest *) clause)->nulltesttype,
830	(Node ) ((NullTest ) clause)->arg,
831	varRelid,
832	jointype,
833	sjinfo);
834	}
835	else if (IsA(clause, BooleanTest))
836	{
837	/ Use node specific selectivity calculation function /
838	s1 = booltestsel(root,
839	((BooleanTest *) clause)->booltesttype,
840	(Node ) ((BooleanTest ) clause)->arg,
841	varRelid,
842	jointype,
843	sjinfo);
844	}
845	else if (IsA(clause, CurrentOfExpr))
846	{
847	/ CURRENT OF selects at most one row of its table /
848	CurrentOfExpr cexpr = (CurrentOfExpr ) clause;
849	RelOptInfo *crel = find_base_rel(root, cexpr->cvarno);
850
851	if (crel->tuples > `0`)
852	s1 = `1.0` / crel->tuples;
853	}
854	else if (IsA(clause, RelabelType))
855	{
856	/ Not sure this case is needed, but it can't hurt /
857	s1 = clause_selectivity(root,
858	(Node ) ((RelabelType ) clause)->arg,
859	varRelid,
860	jointype,
861	sjinfo);
862	}
863	else if (IsA(clause, CoerceToDomain))
864	{
865	/ Not sure this case is needed, but it can't hurt /
866	s1 = clause_selectivity(root,
867	(Node ) ((CoerceToDomain ) clause)->arg,
868	varRelid,
869	jointype,
870	sjinfo);
871	}
872	else
873	{
874	/*
875	* For anything else, see if we can consider it as a boolean variable.
876	* This only works if it's an immutable expression in Vars of a single
877	* relation; but there's no point in us checking that here because
878	* boolvarsel() will do it internally, and return a suitable default
879	* selectivity if not.
880	*/
881	s1 = boolvarsel(root, clause, varRelid);
882	}
883
884	/ Cache the result if possible /
885	if (cacheable)
886	{
887	if (jointype == JOIN_INNER)
888	rinfo->norm_selec = s1;
889	else
890	rinfo->outer_selec = s1;
891	}
892
893	#ifdef SELECTIVITY_DEBUG
894	elog(DEBUG4, "clause_selectivity: s1 %f", s1);
895	#endif /* SELECTIVITY_DEBUG */
896
897	return s1;
898	}
899

Browse the source code of PostgreSQL/src/backend/optimizer/path/clausesel.c