selfuncs.c source code [PostgreSQL/src/backend/utils/adt/selfuncs.c]

1	/-------------------------------------------------------------------------*
2	*
3	* selfuncs.c
4	* Selectivity functions and index cost estimation functions for
5	* standard operators and index access methods.
6	*
7	* Selectivity routines are registered in the pg_operator catalog
8	* in the "oprrest" and "oprjoin" attributes.
9	*
10	* Index cost functions are located via the index AM's API struct,
11	* which is obtained from the handler function registered in pg_am.
12	*
13	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
14	* Portions Copyright (c) 1994, Regents of the University of California
15	*
16	*
17	* IDENTIFICATION
18	* src/backend/utils/adt/selfuncs.c
19	*
20	*-------------------------------------------------------------------------
21	*/
22
23	/----------*
24	* Operator selectivity estimation functions are called to estimate the
25	* selectivity of WHERE clauses whose top-level operator is their operator.
26	* We divide the problem into two cases:
27	* Restriction clause estimation: the clause involves vars of just
28	* one relation.
29	* Join clause estimation: the clause involves vars of multiple rels.
30	* Join selectivity estimation is far more difficult and usually less accurate
31	* than restriction estimation.
32	*
33	* When dealing with the inner scan of a nestloop join, we consider the
34	* join's joinclauses as restriction clauses for the inner relation, and
35	* treat vars of the outer relation as parameters (a/k/a constants of unknown
36	* values). So, restriction estimators need to be able to accept an argument
37	* telling which relation is to be treated as the variable.
38	*
39	* The call convention for a restriction estimator (oprrest function) is
40	*
41	* Selectivity oprrest (PlannerInfo *root,
42	* Oid operator,
43	* List *args,
44	* int varRelid);
45	*
46	* root: general information about the query (rtable and RelOptInfo lists
47	* are particularly important for the estimator).
48	* operator: OID of the specific operator in question.
49	* args: argument list from the operator clause.
50	* varRelid: if not zero, the relid (rtable index) of the relation to
51	* be treated as the variable relation. May be zero if the args list
52	* is known to contain vars of only one relation.
53	*
54	* This is represented at the SQL level (in pg_proc) as
55	*
56	* float8 oprrest (internal, oid, internal, int4);
57	*
58	* The result is a selectivity, that is, a fraction (0 to 1) of the rows
59	* of the relation that are expected to produce a TRUE result for the
60	* given operator.
61	*
62	* The call convention for a join estimator (oprjoin function) is similar
63	* except that varRelid is not needed, and instead join information is
64	* supplied:
65	*
66	* Selectivity oprjoin (PlannerInfo *root,
67	* Oid operator,
68	* List *args,
69	* JoinType jointype,
70	* SpecialJoinInfo *sjinfo);
71	*
72	* float8 oprjoin (internal, oid, internal, int2, internal);
73	*
74	* (Before Postgres 8.4, join estimators had only the first four of these
75	* parameters. That signature is still allowed, but deprecated.) The
76	* relationship between jointype and sjinfo is explained in the comments for
77	* clause_selectivity() --- the short version is that jointype is usually
78	* best ignored in favor of examining sjinfo.
79	*
80	* Join selectivity for regular inner and outer joins is defined as the
81	* fraction (0 to 1) of the cross product of the relations that is expected
82	* to produce a TRUE result for the given operator. For both semi and anti
83	* joins, however, the selectivity is defined as the fraction of the left-hand
84	* side relation's rows that are expected to have a match (ie, at least one
85	* row with a TRUE result) in the right-hand side.
86	*
87	* For both oprrest and oprjoin functions, the operator's input collation OID
88	* (if any) is passed using the standard fmgr mechanism, so that the estimator
89	* function can fetch it with PG_GET_COLLATION(). Note, however, that all
90	* statistics in pg_statistic are currently built using the relevant column's
91	* collation. Thus, in most cases where we are looking at statistics, we
92	* should ignore the operator collation and use the stats entry's collation.
93	* We expect that the error induced by doing this is usually not large enough
94	* to justify complicating matters. In any case, doing otherwise would yield
95	* entirely garbage results for ordered stats data such as histograms.
96	*----------
97	*/
98
99	#include "postgres.h"
100
101	#include <ctype.h>
102	#include <math.h>
103
104	#include "access/brin.h"
105	#include "access/gin.h"
106	#include "access/table.h"
107	#include "access/tableam.h"
108	#include "access/visibilitymap.h"
109	#include "catalog/pg_am.h"
110	#include "catalog/pg_collation.h"
111	#include "catalog/pg_operator.h"
112	#include "catalog/pg_statistic.h"
113	#include "catalog/pg_statistic_ext.h"
114	#include "executor/nodeAgg.h"
115	#include "miscadmin.h"
116	#include "nodes/makefuncs.h"
117	#include "nodes/nodeFuncs.h"
118	#include "optimizer/clauses.h"
119	#include "optimizer/cost.h"
120	#include "optimizer/optimizer.h"
121	#include "optimizer/pathnode.h"
122	#include "optimizer/paths.h"
123	#include "optimizer/plancat.h"
124	#include "parser/parse_clause.h"
125	#include "parser/parsetree.h"
126	#include "statistics/statistics.h"
127	#include "storage/bufmgr.h"
128	#include "utils/builtins.h"
129	#include "utils/date.h"
130	#include "utils/datum.h"
131	#include "utils/fmgroids.h"
132	#include "utils/index_selfuncs.h"
133	#include "utils/lsyscache.h"
134	#include "utils/memutils.h"
135	#include "utils/pg_locale.h"
136	#include "utils/rel.h"
137	#include "utils/selfuncs.h"
138	#include "utils/snapmgr.h"
139	#include "utils/spccache.h"
140	#include "utils/syscache.h"
141	#include "utils/timestamp.h"
142	#include "utils/typcache.h"
143
144
145	/ Hooks for plugins to get control when we ask for stats /
146	get_relation_stats_hook_type get_relation_stats_hook = NULL;
147	get_index_stats_hook_type get_index_stats_hook = NULL;
148
149	static double eqsel_internal(PG_FUNCTION_ARGS, bool negate);
150	static double eqjoinsel_inner(Oid opfuncoid,
151	VariableStatData vardata1, VariableStatData vardata2,
152	double nd1, double nd2,
153	bool isdefault1, bool isdefault2,
154	AttStatsSlot sslot1, AttStatsSlot sslot2,
155	Form_pg_statistic stats1, Form_pg_statistic stats2,
156	bool have_mcvs1, bool have_mcvs2);
157	static double eqjoinsel_semi(Oid opfuncoid,
158	VariableStatData vardata1, VariableStatData vardata2,
159	double nd1, double nd2,
160	bool isdefault1, bool isdefault2,
161	AttStatsSlot sslot1, AttStatsSlot sslot2,
162	Form_pg_statistic stats1, Form_pg_statistic stats2,
163	bool have_mcvs1, bool have_mcvs2,
164	RelOptInfo *inner_rel);
165	static bool estimate_multivariate_ndistinct(PlannerInfo *root,
166	RelOptInfo rel, List varinfos, double* *ndistinct);
167	static bool convert_to_scalar(Datum value, Oid valuetypid, Oid collid,
168	double *scaledvalue,
169	Datum lobound, Datum hibound, Oid boundstypid,
170	double scaledlobound, double* *scaledhibound);
171	static double convert_numeric_to_scalar(Datum value, Oid typid, bool *failure);
172	static void convert_string_to_scalar(char *value,
173	double *scaledvalue,
174	char *lobound,
175	double *scaledlobound,
176	char *hibound,
177	double *scaledhibound);
178	static void convert_bytea_to_scalar(Datum value,
179	double *scaledvalue,
180	Datum lobound,
181	double *scaledlobound,
182	Datum hibound,
183	double *scaledhibound);
184	static double convert_one_string_to_scalar(char *value,
185	int rangelo, int rangehi);
186	static double convert_one_bytea_to_scalar(unsigned char value, int* valuelen,
187	int rangelo, int rangehi);
188	static char *convert_string_datum(Datum value, Oid typid, Oid collid,
189	bool *failure);
190	static double convert_timevalue_to_scalar(Datum value, Oid typid,
191	bool *failure);
192	static void examine_simple_variable(PlannerInfo root, Var var,
193	VariableStatData *vardata);
194	static bool get_variable_range(PlannerInfo root, VariableStatData vardata,
195	Oid sortop, Datum min, Datum max);
196	static bool get_actual_variable_range(PlannerInfo *root,
197	VariableStatData *vardata,
198	Oid sortop,
199	Datum min, Datum max);
200	static bool get_actual_variable_endpoint(Relation heapRel,
201	Relation indexRel,
202	ScanDirection indexscandir,
203	ScanKey scankeys,
204	int16 typLen,
205	bool typByVal,
206	TupleTableSlot *tableslot,
207	MemoryContext outercontext,
208	Datum *endpointDatum);
209	static RelOptInfo find_join_input_rel(PlannerInfo root, Relids relids);
210
211
212	/*
213	* eqsel - Selectivity of "=" for any data types.
214	*
215	* Note: this routine is also used to estimate selectivity for some
216	* operators that are not "=" but have comparable selectivity behavior,
217	* such as "~=" (geometric approximate-match). Even for "=", we must
218	* keep in mind that the left and right datatypes may differ.
219	*/
220	Datum
221	eqsel(PG_FUNCTION_ARGS)
222	{
223	PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, false));
224	}
225
226	/*
227	* Common code for eqsel() and neqsel()
228	*/
229	static double
230	eqsel_internal(PG_FUNCTION_ARGS, bool negate)
231	{
232	PlannerInfo root = (PlannerInfo ) PG_GETARG_POINTER(`0`);
233	Oid operator = PG_GETARG_OID(`1`);
234	List args = (List ) PG_GETARG_POINTER(`2`);
235	int varRelid = PG_GETARG_INT32(`3`);
236	VariableStatData vardata;
237	Node *other;
238	bool varonleft;
239	double selec;
240
241	/*
242	* When asked about <>, we do the estimation using the corresponding =
243	* operator, then convert to <> via "1.0 - eq_selectivity - nullfrac".
244	*/
245	if (negate)
246	{
247	operator = get_negator(operator);
248	if (!OidIsValid(operator))
249	{
250	/ Use default selectivity (should we raise an error instead?) /
251	return `1.0` - DEFAULT_EQ_SEL;
252	}
253	}
254
255	/*
256	* If expression is not variable = something or something = variable, then
257	* punt and return a default estimate.
258	*/
259	if (!get_restriction_variable(root, args, varRelid,
260	&vardata, &other, &varonleft))
261	return negate ? (`1.0` - DEFAULT_EQ_SEL) : DEFAULT_EQ_SEL;
262
263	/*
264	* We can do a lot better if the something is a constant. (Note: the
265	* Const might result from estimation rather than being a simple constant
266	* in the query.)
267	*/
268	if (IsA(other, Const))
269	selec = var_eq_const(&vardata, operator,
270	((Const *) other)->constvalue,
271	((Const *) other)->constisnull,
272	varonleft, negate);
273	else
274	selec = var_eq_non_const(&vardata, operator, other,
275	varonleft, negate);
276
277	ReleaseVariableStats(vardata);
278
279	return selec;
280	}
281
282	/*
283	* var_eq_const --- eqsel for var = const case
284	*
285	* This is exported so that some other estimation functions can use it.
286	*/
287	double
288	var_eq_const(VariableStatData *vardata, Oid operator,
289	Datum constval, bool constisnull,
290	bool varonleft, bool negate)
291	{
292	double selec;
293	double nullfrac = `0.0`;
294	bool isdefault;
295	Oid opfuncoid;
296
297	/*
298	* If the constant is NULL, assume operator is strict and return zero, ie,
299	* operator will never return TRUE. (It's zero even for a negator op.)
300	*/
301	if (constisnull)
302	return `0.0`;
303
304	/*
305	* Grab the nullfrac for use below. Note we allow use of nullfrac
306	* regardless of security check.
307	*/
308	if (HeapTupleIsValid(vardata->statsTuple))
309	{
310	Form_pg_statistic stats;
311
312	stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
313	nullfrac = stats->stanullfrac;
314	}
315
316	/*
317	* If we matched the var to a unique index or DISTINCT clause, assume
318	* there is exactly one match regardless of anything else. (This is
319	* slightly bogus, since the index or clause's equality operator might be
320	* different from ours, but it's much more likely to be right than
321	* ignoring the information.)
322	*/
323	if (vardata->isunique && vardata->rel && vardata->rel->tuples >= `1.0`)
324	{
325	selec = `1.0` / vardata->rel->tuples;
326	}
327	else if (HeapTupleIsValid(vardata->statsTuple) &&
328	statistic_proc_security_check(vardata,
329	(opfuncoid = get_opcode(operator))))
330	{
331	AttStatsSlot sslot;
332	bool match = false;
333	int i;
334
335	/*
336	* Is the constant "=" to any of the column's most common values?
337	* (Although the given operator may not really be "=", we will assume
338	* that seeing whether it returns TRUE is an appropriate test. If you
339	* don't like this, maybe you shouldn't be using eqsel for your
340	* operator...)
341	*/
342	if (get_attstatsslot(&sslot, vardata->statsTuple,
343	STATISTIC_KIND_MCV, InvalidOid,
344	ATTSTATSSLOT_VALUES \| ATTSTATSSLOT_NUMBERS))
345	{
346	FmgrInfo eqproc;
347
348	fmgr_info(opfuncoid, &eqproc);
349
350	for (i = `0`; i < sslot.nvalues; i++)
351	{
352	/ be careful to apply operator right way 'round /
353	if (varonleft)
354	match = DatumGetBool(FunctionCall2Coll(&eqproc,
355	sslot.stacoll,
356	sslot.values[i],
357	constval));
358	else
359	match = DatumGetBool(FunctionCall2Coll(&eqproc,
360	sslot.stacoll,
361	constval,
362	sslot.values[i]));
363	if (match)
364	break;
365	}
366	}
367	else
368	{
369	/ no most-common-value info available /
370	i = `0`; / keep compiler quiet /
371	}
372
373	if (match)
374	{
375	/*
376	* Constant is "=" to this common value. We know selectivity
377	* exactly (or as exactly as ANALYZE could calculate it, anyway).
378	*/
379	selec = sslot.numbers[i];
380	}
381	else
382	{
383	/*
384	* Comparison is against a constant that is neither NULL nor any
385	* of the common values. Its selectivity cannot be more than
386	* this:
387	*/
388	double sumcommon = `0.0`;
389	double otherdistinct;
390
391	for (i = `0`; i < sslot.nnumbers; i++)
392	sumcommon += sslot.numbers[i];
393	selec = `1.0` - sumcommon - nullfrac;
394	CLAMP_PROBABILITY(selec);
395
396	/*
397	* and in fact it's probably a good deal less. We approximate that
398	* all the not-common values share this remaining fraction
399	* equally, so we divide by the number of other distinct values.
400	*/
401	otherdistinct = get_variable_numdistinct(vardata, &isdefault) -
402	sslot.nnumbers;
403	if (otherdistinct > `1`)
404	selec /= otherdistinct;
405
406	/*
407	* Another cross-check: selectivity shouldn't be estimated as more
408	* than the least common "most common value".
409	*/
410	if (sslot.nnumbers > `0` && selec > sslot.numbers[sslot.nnumbers - `1`])
411	selec = sslot.numbers[sslot.nnumbers - `1`];
412	}
413
414	free_attstatsslot(&sslot);
415	}
416	else
417	{
418	/*
419	* No ANALYZE stats available, so make a guess using estimated number
420	* of distinct values and assuming they are equally common. (The guess
421	* is unlikely to be very good, but we do know a few special cases.)
422	*/
423	selec = `1.0` / get_variable_numdistinct(vardata, &isdefault);
424	}
425
426	/ now adjust if we wanted <> rather than = /
427	if (negate)
428	selec = `1.0` - selec - nullfrac;
429
430	/ result should be in range, but make sure... /
431	CLAMP_PROBABILITY(selec);
432
433	return selec;
434	}
435
436	/*
437	* var_eq_non_const --- eqsel for var = something-other-than-const case
438	*
439	* This is exported so that some other estimation functions can use it.
440	*/
441	double
442	var_eq_non_const(VariableStatData *vardata, Oid operator,
443	Node *other,
444	bool varonleft, bool negate)
445	{
446	double selec;
447	double nullfrac = `0.0`;
448	bool isdefault;
449
450	/*
451	* Grab the nullfrac for use below.
452	*/
453	if (HeapTupleIsValid(vardata->statsTuple))
454	{
455	Form_pg_statistic stats;
456
457	stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
458	nullfrac = stats->stanullfrac;
459	}
460
461	/*
462	* If we matched the var to a unique index or DISTINCT clause, assume
463	* there is exactly one match regardless of anything else. (This is
464	* slightly bogus, since the index or clause's equality operator might be
465	* different from ours, but it's much more likely to be right than
466	* ignoring the information.)
467	*/
468	if (vardata->isunique && vardata->rel && vardata->rel->tuples >= `1.0`)
469	{
470	selec = `1.0` / vardata->rel->tuples;
471	}
472	else if (HeapTupleIsValid(vardata->statsTuple))
473	{
474	double ndistinct;
475	AttStatsSlot sslot;
476
477	/*
478	* Search is for a value that we do not know a priori, but we will
479	* assume it is not NULL. Estimate the selectivity as non-null
480	* fraction divided by number of distinct values, so that we get a
481	* result averaged over all possible values whether common or
482	* uncommon. (Essentially, we are assuming that the not-yet-known
483	* comparison value is equally likely to be any of the possible
484	* values, regardless of their frequency in the table. Is that a good
485	* idea?)
486	*/
487	selec = `1.0` - nullfrac;
488	ndistinct = get_variable_numdistinct(vardata, &isdefault);
489	if (ndistinct > `1`)
490	selec /= ndistinct;
491
492	/*
493	* Cross-check: selectivity should never be estimated as more than the
494	* most common value's.
495	*/
496	if (get_attstatsslot(&sslot, vardata->statsTuple,
497	STATISTIC_KIND_MCV, InvalidOid,
498	ATTSTATSSLOT_NUMBERS))
499	{
500	if (sslot.nnumbers > `0` && selec > sslot.numbers[`0`])
501	selec = sslot.numbers[`0`];
502	free_attstatsslot(&sslot);
503	}
504	}
505	else
506	{
507	/*
508	* No ANALYZE stats available, so make a guess using estimated number
509	* of distinct values and assuming they are equally common. (The guess
510	* is unlikely to be very good, but we do know a few special cases.)
511	*/
512	selec = `1.0` / get_variable_numdistinct(vardata, &isdefault);
513	}
514
515	/ now adjust if we wanted <> rather than = /
516	if (negate)
517	selec = `1.0` - selec - nullfrac;
518
519	/ result should be in range, but make sure... /
520	CLAMP_PROBABILITY(selec);
521
522	return selec;
523	}
524
525	/*
526	* neqsel - Selectivity of "!=" for any data types.
527	*
528	* This routine is also used for some operators that are not "!="
529	* but have comparable selectivity behavior. See above comments
530	* for eqsel().
531	*/
532	Datum
533	neqsel(PG_FUNCTION_ARGS)
534	{
535	PG_RETURN_FLOAT8((float8) eqsel_internal(fcinfo, true));
536	}
537
538	/*
539	* scalarineqsel - Selectivity of "<", "<=", ">", ">=" for scalars.
540	*
541	* This is the guts of scalarltsel/scalarlesel/scalargtsel/scalargesel.
542	* The isgt and iseq flags distinguish which of the four cases apply.
543	*
544	* The caller has commuted the clause, if necessary, so that we can treat
545	* the variable as being on the left. The caller must also make sure that
546	* the other side of the clause is a non-null Const, and dissect that into
547	* a value and datatype. (This definition simplifies some callers that
548	* want to estimate against a computed value instead of a Const node.)
549	*
550	* This routine works for any datatype (or pair of datatypes) known to
551	* convert_to_scalar(). If it is applied to some other datatype,
552	* it will return an approximate estimate based on assuming that the constant
553	* value falls in the middle of the bin identified by binary search.
554	*/
555	static double
556	scalarineqsel(PlannerInfo *root, Oid operator, bool isgt, bool iseq,
557	VariableStatData *vardata, Datum constval, Oid consttype)
558	{
559	Form_pg_statistic stats;
560	FmgrInfo opproc;
561	double mcv_selec,
562	hist_selec,
563	sumcommon;
564	double selec;
565
566	if (!HeapTupleIsValid(vardata->statsTuple))
567	{
568	/*
569	* No stats are available. Typically this means we have to fall back
570	* on the default estimate; but if the variable is CTID then we can
571	* make an estimate based on comparing the constant to the table size.
572	*/
573	if (vardata->var && IsA(vardata->var, Var) &&
574	((Var *) vardata->var)->varattno == SelfItemPointerAttributeNumber)
575	{
576	ItemPointer itemptr;
577	double block;
578	double density;
579
580	/*
581	* If the relation's empty, we're going to include all of it.
582	* (This is mostly to avoid divide-by-zero below.)
583	*/
584	if (vardata->rel->pages == `0`)
585	return `1.0`;
586
587	itemptr = (ItemPointer) DatumGetPointer(constval);
588	block = ItemPointerGetBlockNumberNoCheck(itemptr);
589
590	/*
591	* Determine the average number of tuples per page (density).
592	*
593	* Since the last page will, on average, be only half full, we can
594	* estimate it to have half as many tuples as earlier pages. So
595	* give it half the weight of a regular page.
596	*/
597	density = vardata->rel->tuples / (vardata->rel->pages - `0.5`);
598
599	/ If target is the last page, use half the density. /
600	if (block >= vardata->rel->pages - `1`)
601	density *= `0.5`;
602
603	/*
604	* Using the average tuples per page, calculate how far into the
605	* page the itemptr is likely to be and adjust block accordingly,
606	* by adding that fraction of a whole block (but never more than a
607	* whole block, no matter how high the itemptr's offset is). Here
608	* we are ignoring the possibility of dead-tuple line pointers,
609	* which is fairly bogus, but we lack the info to do better.
610	*/
611	if (density > `0.0`)
612	{
613	OffsetNumber offset = ItemPointerGetOffsetNumberNoCheck(itemptr);
614
615	block += Min(offset / density, `1.0`);
616	}
617
618	/*
619	* Convert relative block number to selectivity. Again, the last
620	* page has only half weight.
621	*/
622	selec = block / (vardata->rel->pages - `0.5`);
623
624	/*
625	* The calculation so far gave us a selectivity for the "<=" case.
626	* We'll have one less tuple for "<" and one additional tuple for
627	* ">=", the latter of which we'll reverse the selectivity for
628	* below, so we can simply subtract one tuple for both cases. The
629	* cases that need this adjustment can be identified by iseq being
630	* equal to isgt.
631	*/
632	if (iseq == isgt && vardata->rel->tuples >= `1.0`)
633	selec -= (`1.0` / vardata->rel->tuples);
634
635	/ Finally, reverse the selectivity for the ">", ">=" cases. /
636	if (isgt)
637	selec = `1.0` - selec;
638
639	CLAMP_PROBABILITY(selec);
640	return selec;
641	}
642
643	/ no stats available, so default result /
644	return DEFAULT_INEQ_SEL;
645	}
646	stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
647
648	fmgr_info(get_opcode(operator), &opproc);
649
650	/*
651	* If we have most-common-values info, add up the fractions of the MCV
652	* entries that satisfy MCV OP CONST. These fractions contribute directly
653	* to the result selectivity. Also add up the total fraction represented
654	* by MCV entries.
655	*/
656	mcv_selec = mcv_selectivity(vardata, &opproc, constval, true,
657	&sumcommon);
658
659	/*
660	* If there is a histogram, determine which bin the constant falls in, and
661	* compute the resulting contribution to selectivity.
662	*/
663	hist_selec = ineq_histogram_selectivity(root, vardata,
664	&opproc, isgt, iseq,
665	constval, consttype);
666
667	/*
668	* Now merge the results from the MCV and histogram calculations,
669	* realizing that the histogram covers only the non-null values that are
670	* not listed in MCV.
671	*/
672	selec = `1.0` - stats->stanullfrac - sumcommon;
673
674	if (hist_selec >= `0.0`)
675	selec *= hist_selec;
676	else
677	{
678	/*
679	* If no histogram but there are values not accounted for by MCV,
680	* arbitrarily assume half of them will match.
681	*/
682	selec *= `0.5`;
683	}
684
685	selec += mcv_selec;
686
687	/ result should be in range, but make sure... /
688	CLAMP_PROBABILITY(selec);
689
690	return selec;
691	}
692
693	/*
694	* mcv_selectivity - Examine the MCV list for selectivity estimates
695	*
696	* Determine the fraction of the variable's MCV population that satisfies
697	* the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft. Also
698	* compute the fraction of the total column population represented by the MCV
699	* list. This code will work for any boolean-returning predicate operator.
700	*
701	* The function result is the MCV selectivity, and the fraction of the
702	* total population is returned into *sumcommonp. Zeroes are returned
703	* if there is no MCV list.
704	*/
705	double
706	mcv_selectivity(VariableStatData vardata, FmgrInfo opproc,
707	Datum constval, bool varonleft,
708	double *sumcommonp)
709	{
710	double mcv_selec,
711	sumcommon;
712	AttStatsSlot sslot;
713	int i;
714
715	mcv_selec = `0.0`;
716	sumcommon = `0.0`;
717
718	if (HeapTupleIsValid(vardata->statsTuple) &&
719	statistic_proc_security_check(vardata, opproc->fn_oid) &&
720	get_attstatsslot(&sslot, vardata->statsTuple,
721	STATISTIC_KIND_MCV, InvalidOid,
722	ATTSTATSSLOT_VALUES \| ATTSTATSSLOT_NUMBERS))
723	{
724	for (i = `0`; i < sslot.nvalues; i++)
725	{
726	if (varonleft ?
727	DatumGetBool(FunctionCall2Coll(opproc,
728	sslot.stacoll,
729	sslot.values[i],
730	constval)) :
731	DatumGetBool(FunctionCall2Coll(opproc,
732	sslot.stacoll,
733	constval,
734	sslot.values[i])))
735	mcv_selec += sslot.numbers[i];
736	sumcommon += sslot.numbers[i];
737	}
738	free_attstatsslot(&sslot);
739	}
740
741	*sumcommonp = sumcommon;
742	return mcv_selec;
743	}
744
745	/*
746	* histogram_selectivity - Examine the histogram for selectivity estimates
747	*
748	* Determine the fraction of the variable's histogram entries that satisfy
749	* the predicate (VAR OP CONST), or (CONST OP VAR) if !varonleft.
750	*
751	* This code will work for any boolean-returning predicate operator, whether
752	* or not it has anything to do with the histogram sort operator. We are
753	* essentially using the histogram just as a representative sample. However,
754	* small histograms are unlikely to be all that representative, so the caller
755	* should be prepared to fall back on some other estimation approach when the
756	* histogram is missing or very small. It may also be prudent to combine this
757	* approach with another one when the histogram is small.
758	*
759	* If the actual histogram size is not at least min_hist_size, we won't bother
760	* to do the calculation at all. Also, if the n_skip parameter is > 0, we
761	* ignore the first and last n_skip histogram elements, on the grounds that
762	* they are outliers and hence not very representative. Typical values for
763	* these parameters are 10 and 1.
764	*
765	* The function result is the selectivity, or -1 if there is no histogram
766	* or it's smaller than min_hist_size.
767	*
768	* The output parameter *hist_size receives the actual histogram size,
769	* or zero if no histogram. Callers may use this number to decide how
770	* much faith to put in the function result.
771	*
772	* Note that the result disregards both the most-common-values (if any) and
773	* null entries. The caller is expected to combine this result with
774	* statistics for those portions of the column population. It may also be
775	* prudent to clamp the result range, ie, disbelieve exact 0 or 1 outputs.
776	*/
777	double
778	histogram_selectivity(VariableStatData vardata, FmgrInfo opproc,
779	Datum constval, bool varonleft,
780	int min_hist_size, int n_skip,
781	int *hist_size)
782	{
783	double result;
784	AttStatsSlot sslot;
785
786	/ check sanity of parameters /
787	Assert(n_skip >= `0`);
788	Assert(min_hist_size > `2` * n_skip);
789
790	if (HeapTupleIsValid(vardata->statsTuple) &&
791	statistic_proc_security_check(vardata, opproc->fn_oid) &&
792	get_attstatsslot(&sslot, vardata->statsTuple,
793	STATISTIC_KIND_HISTOGRAM, InvalidOid,
794	ATTSTATSSLOT_VALUES))
795	{
796	*hist_size = sslot.nvalues;
797	if (sslot.nvalues >= min_hist_size)
798	{
799	int nmatch = `0`;
800	int i;
801
802	for (i = n_skip; i < sslot.nvalues - n_skip; i++)
803	{
804	if (varonleft ?
805	DatumGetBool(FunctionCall2Coll(opproc,
806	sslot.stacoll,
807	sslot.values[i],
808	constval)) :
809	DatumGetBool(FunctionCall2Coll(opproc,
810	sslot.stacoll,
811	constval,
812	sslot.values[i])))
813	nmatch++;
814	}
815	result = ((double) nmatch) / ((double) (sslot.nvalues - `2` * n_skip));
816	}
817	else
818	result = -`1`;
819	free_attstatsslot(&sslot);
820	}
821	else
822	{
823	*hist_size = `0`;
824	result = -`1`;
825	}
826
827	return result;
828	}
829
830	/*
831	* ineq_histogram_selectivity - Examine the histogram for scalarineqsel
832	*
833	* Determine the fraction of the variable's histogram population that
834	* satisfies the inequality condition, ie, VAR < (or <=, >, >=) CONST.
835	* The isgt and iseq flags distinguish which of the four cases apply.
836	*
837	* Returns -1 if there is no histogram (valid results will always be >= 0).
838	*
839	* Note that the result disregards both the most-common-values (if any) and
840	* null entries. The caller is expected to combine this result with
841	* statistics for those portions of the column population.
842	*
843	* This is exported so that some other estimation functions can use it.
844	*/
845	double
846	ineq_histogram_selectivity(PlannerInfo *root,
847	VariableStatData *vardata,
848	FmgrInfo *opproc, bool isgt, bool iseq,
849	Datum constval, Oid consttype)
850	{
851	double hist_selec;
852	AttStatsSlot sslot;
853
854	hist_selec = -`1.0`;
855
856	/*
857	* Someday, ANALYZE might store more than one histogram per rel/att,
858	* corresponding to more than one possible sort ordering defined for the
859	* column type. However, to make that work we will need to figure out
860	* which staop to search for --- it's not necessarily the one we have at
861	* hand! (For example, we might have a '<=' operator rather than the '<'
862	* operator that will appear in staop.) For now, assume that whatever
863	* appears in pg_statistic is sorted the same way our operator sorts, or
864	* the reverse way if isgt is true.
865	*/
866	if (HeapTupleIsValid(vardata->statsTuple) &&
867	statistic_proc_security_check(vardata, opproc->fn_oid) &&
868	get_attstatsslot(&sslot, vardata->statsTuple,
869	STATISTIC_KIND_HISTOGRAM, InvalidOid,
870	ATTSTATSSLOT_VALUES))
871	{
872	if (sslot.nvalues > `1`)
873	{
874	/*
875	* Use binary search to find the desired location, namely the
876	* right end of the histogram bin containing the comparison value,
877	* which is the leftmost entry for which the comparison operator
878	* succeeds (if isgt) or fails (if !isgt). (If the given operator
879	* isn't actually sort-compatible with the histogram, you'll get
880	* garbage results ... but probably not any more garbage-y than
881	* you would have from the old linear search.)
882	*
883	* In this loop, we pay no attention to whether the operator iseq
884	* or not; that detail will be mopped up below. (We cannot tell,
885	* anyway, whether the operator thinks the values are equal.)
886	*
887	* If the binary search accesses the first or last histogram
888	* entry, we try to replace that endpoint with the true column min
889	* or max as found by get_actual_variable_range(). This
890	* ameliorates misestimates when the min or max is moving as a
891	* result of changes since the last ANALYZE. Note that this could
892	* result in effectively including MCVs into the histogram that
893	* weren't there before, but we don't try to correct for that.
894	*/
895	double histfrac;
896	int lobound = `0`; / first possible slot to search /
897	int hibound = sslot.nvalues; / last+1 slot to search /
898	bool have_end = false;
899
900	/*
901	* If there are only two histogram entries, we'll want up-to-date
902	* values for both. (If there are more than two, we need at most
903	* one of them to be updated, so we deal with that within the
904	* loop.)
905	*/
906	if (sslot.nvalues == `2`)
907	have_end = get_actual_variable_range(root,
908	vardata,
909	sslot.staop,
910	&sslot.values[`0`],
911	&sslot.values[`1`]);
912
913	while (lobound < hibound)
914	{
915	int probe = (lobound + hibound) / `2`;
916	bool ltcmp;
917
918	/*
919	* If we find ourselves about to compare to the first or last
920	* histogram entry, first try to replace it with the actual
921	* current min or max (unless we already did so above).
922	*/
923	if (probe == `0` && sslot.nvalues > `2`)
924	have_end = get_actual_variable_range(root,
925	vardata,
926	sslot.staop,
927	&sslot.values[`0`],
928	NULL);
929	else if (probe == sslot.nvalues - `1` && sslot.nvalues > `2`)
930	have_end = get_actual_variable_range(root,
931	vardata,
932	sslot.staop,
933	NULL,
934	&sslot.values[probe]);
935
936	ltcmp = DatumGetBool(FunctionCall2Coll(opproc,
937	sslot.stacoll,
938	sslot.values[probe],
939	constval));
940	if (isgt)
941	ltcmp = !ltcmp;
942	if (ltcmp)
943	lobound = probe + `1`;
944	else
945	hibound = probe;
946	}
947
948	if (lobound <= `0`)
949	{
950	/*
951	* Constant is below lower histogram boundary. More
952	* precisely, we have found that no entry in the histogram
953	* satisfies the inequality clause (if !isgt) or they all do
954	* (if isgt). We estimate that that's true of the entire
955	* table, so set histfrac to 0.0 (which we'll flip to 1.0
956	* below, if isgt).
957	*/
958	histfrac = `0.0`;
959	}
960	else if (lobound >= sslot.nvalues)
961	{
962	/*
963	* Inverse case: constant is above upper histogram boundary.
964	*/
965	histfrac = `1.0`;
966	}
967	else
968	{
969	/ We have values[i-1] <= constant <= values[i]. /
970	int i = lobound;
971	double eq_selec = `0`;
972	double val,
973	high,
974	low;
975	double binfrac;
976
977	/*
978	* In the cases where we'll need it below, obtain an estimate
979	* of the selectivity of "x = constval". We use a calculation
980	* similar to what var_eq_const() does for a non-MCV constant,
981	* ie, estimate that all distinct non-MCV values occur equally
982	* often. But multiplication by "1.0 - sumcommon - nullfrac"
983	* will be done by our caller, so we shouldn't do that here.
984	* Therefore we can't try to clamp the estimate by reference
985	* to the least common MCV; the result would be too small.
986	*
987	* Note: since this is effectively assuming that constval
988	* isn't an MCV, it's logically dubious if constval in fact is
989	* one. But we have to apply some correction for equality,
990	* and anyway we cannot tell if constval is an MCV, since we
991	* don't have a suitable equality operator at hand.
992	*/
993	if (i == `1` \|\| isgt == iseq)
994	{
995	double otherdistinct;
996	bool isdefault;
997	AttStatsSlot mcvslot;
998
999	/ Get estimated number of distinct values /
1000	otherdistinct = get_variable_numdistinct(vardata,
1001	&isdefault);
1002
1003	/ Subtract off the number of known MCVs /
1004	if (get_attstatsslot(&mcvslot, vardata->statsTuple,
1005	STATISTIC_KIND_MCV, InvalidOid,
1006	ATTSTATSSLOT_NUMBERS))
1007	{
1008	otherdistinct -= mcvslot.nnumbers;
1009	free_attstatsslot(&mcvslot);
1010	}
1011
1012	/ If result doesn't seem sane, leave eq_selec at 0 /
1013	if (otherdistinct > `1`)
1014	eq_selec = `1.0` / otherdistinct;
1015	}
1016
1017	/*
1018	* Convert the constant and the two nearest bin boundary
1019	* values to a uniform comparison scale, and do a linear
1020	* interpolation within this bin.
1021	*/
1022	if (convert_to_scalar(constval, consttype, sslot.stacoll,
1023	&val,
1024	sslot.values[i - `1`], sslot.values[i],
1025	vardata->vartype,
1026	&low, &high))
1027	{
1028	if (high <= low)
1029	{
1030	/ cope if bin boundaries appear identical /
1031	binfrac = `0.5`;
1032	}
1033	else if (val <= low)
1034	binfrac = `0.0`;
1035	else if (val >= high)
1036	binfrac = `1.0`;
1037	else
1038	{
1039	binfrac = (val - low) / (high - low);
1040
1041	/*
1042	* Watch out for the possibility that we got a NaN or
1043	* Infinity from the division. This can happen
1044	* despite the previous checks, if for example "low"
1045	* is -Infinity.
1046	*/
1047	if (isnan(binfrac) \|\|
1048	binfrac < `0.0` \|\| binfrac > `1.0`)
1049	binfrac = `0.5`;
1050	}
1051	}
1052	else
1053	{
1054	/*
1055	* Ideally we'd produce an error here, on the grounds that
1056	* the given operator shouldn't have scalarXXsel
1057	* registered as its selectivity func unless we can deal
1058	* with its operand types. But currently, all manner of
1059	* stuff is invoking scalarXXsel, so give a default
1060	* estimate until that can be fixed.
1061	*/
1062	binfrac = `0.5`;
1063	}
1064
1065	/*
1066	* Now, compute the overall selectivity across the values
1067	* represented by the histogram. We have i-1 full bins and
1068	* binfrac partial bin below the constant.
1069	*/
1070	histfrac = (double) (i - `1`) + binfrac;
1071	histfrac /= (double) (sslot.nvalues - `1`);
1072
1073	/*
1074	* At this point, histfrac is an estimate of the fraction of
1075	* the population represented by the histogram that satisfies
1076	* "x <= constval". Somewhat remarkably, this statement is
1077	* true regardless of which operator we were doing the probes
1078	* with, so long as convert_to_scalar() delivers reasonable
1079	* results. If the probe constant is equal to some histogram
1080	* entry, we would have considered the bin to the left of that
1081	* entry if probing with "<" or ">=", or the bin to the right
1082	* if probing with "<=" or ">"; but binfrac would have come
1083	* out as 1.0 in the first case and 0.0 in the second, leading
1084	* to the same histfrac in either case. For probe constants
1085	* between histogram entries, we find the same bin and get the
1086	* same estimate with any operator.
1087	*
1088	* The fact that the estimate corresponds to "x <= constval"
1089	* and not "x < constval" is because of the way that ANALYZE
1090	* constructs the histogram: each entry is, effectively, the
1091	* rightmost value in its sample bucket. So selectivity
1092	* values that are exact multiples of 1/(histogram_size-1)
1093	* should be understood as estimates including a histogram
1094	* entry plus everything to its left.
1095	*
1096	* However, that breaks down for the first histogram entry,
1097	* which necessarily is the leftmost value in its sample
1098	* bucket. That means the first histogram bin is slightly
1099	* narrower than the rest, by an amount equal to eq_selec.
1100	* Another way to say that is that we want "x <= leftmost" to
1101	* be estimated as eq_selec not zero. So, if we're dealing
1102	* with the first bin (i==1), rescale to make that true while
1103	* adjusting the rest of that bin linearly.
1104	*/
1105	if (i == `1`)
1106	histfrac += eq_selec * (`1.0` - binfrac);
1107
1108	/*
1109	* "x <= constval" is good if we want an estimate for "<=" or
1110	* ">", but if we are estimating for "<" or ">=", we now need
1111	* to decrease the estimate by eq_selec.
1112	*/
1113	if (isgt == iseq)
1114	histfrac -= eq_selec;
1115	}
1116
1117	/*
1118	* Now the estimate is finished for "<" and "<=" cases. If we are
1119	* estimating for ">" or ">=", flip it.
1120	*/
1121	hist_selec = isgt ? (`1.0` - histfrac) : histfrac;
1122
1123	/*
1124	* The histogram boundaries are only approximate to begin with,
1125	* and may well be out of date anyway. Therefore, don't believe
1126	* extremely small or large selectivity estimates --- unless we
1127	* got actual current endpoint values from the table, in which
1128	* case just do the usual sanity clamp. Somewhat arbitrarily, we
1129	* set the cutoff for other cases at a hundredth of the histogram
1130	* resolution.
1131	*/
1132	if (have_end)
1133	CLAMP_PROBABILITY(hist_selec);
1134	else
1135	{
1136	double cutoff = `0.01` / (double) (sslot.nvalues - `1`);
1137
1138	if (hist_selec < cutoff)
1139	hist_selec = cutoff;
1140	else if (hist_selec > `1.0` - cutoff)
1141	hist_selec = `1.0` - cutoff;
1142	}
1143	}
1144
1145	free_attstatsslot(&sslot);
1146	}
1147
1148	return hist_selec;
1149	}
1150
1151	/*
1152	* Common wrapper function for the selectivity estimators that simply
1153	* invoke scalarineqsel().
1154	*/
1155	static Datum
1156	scalarineqsel_wrapper(PG_FUNCTION_ARGS, bool isgt, bool iseq)
1157	{
1158	PlannerInfo root = (PlannerInfo ) PG_GETARG_POINTER(`0`);
1159	Oid operator = PG_GETARG_OID(`1`);
1160	List args = (List ) PG_GETARG_POINTER(`2`);
1161	int varRelid = PG_GETARG_INT32(`3`);
1162	VariableStatData vardata;
1163	Node *other;
1164	bool varonleft;
1165	Datum constval;
1166	Oid consttype;
1167	double selec;
1168
1169	/*
1170	* If expression is not variable op something or something op variable,
1171	* then punt and return a default estimate.
1172	*/
1173	if (!get_restriction_variable(root, args, varRelid,
1174	&vardata, &other, &varonleft))
1175	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1176
1177	/*
1178	* Can't do anything useful if the something is not a constant, either.
1179	*/
1180	if (!IsA(other, Const))
1181	{
1182	ReleaseVariableStats(vardata);
1183	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1184	}
1185
1186	/*
1187	* If the constant is NULL, assume operator is strict and return zero, ie,
1188	* operator will never return TRUE.
1189	*/
1190	if (((Const *) other)->constisnull)
1191	{
1192	ReleaseVariableStats(vardata);
1193	PG_RETURN_FLOAT8(`0.0`);
1194	}
1195	constval = ((Const *) other)->constvalue;
1196	consttype = ((Const *) other)->consttype;
1197
1198	/*
1199	* Force the var to be on the left to simplify logic in scalarineqsel.
1200	*/
1201	if (!varonleft)
1202	{
1203	operator = get_commutator(operator);
1204	if (!operator)
1205	{
1206	/ Use default selectivity (should we raise an error instead?) /
1207	ReleaseVariableStats(vardata);
1208	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
1209	}
1210	isgt = !isgt;
1211	}
1212
1213	/ The rest of the work is done by scalarineqsel(). /
1214	selec = scalarineqsel(root, operator, isgt, iseq,
1215	&vardata, constval, consttype);
1216
1217	ReleaseVariableStats(vardata);
1218
1219	PG_RETURN_FLOAT8((float8) selec);
1220	}
1221
1222	/*
1223	* scalarltsel - Selectivity of "<" for scalars.
1224	*/
1225	Datum
1226	scalarltsel(PG_FUNCTION_ARGS)
1227	{
1228	return scalarineqsel_wrapper(fcinfo, false, false);
1229	}
1230
1231	/*
1232	* scalarlesel - Selectivity of "<=" for scalars.
1233	*/
1234	Datum
1235	scalarlesel(PG_FUNCTION_ARGS)
1236	{
1237	return scalarineqsel_wrapper(fcinfo, false, true);
1238	}
1239
1240	/*
1241	* scalargtsel - Selectivity of ">" for scalars.
1242	*/
1243	Datum
1244	scalargtsel(PG_FUNCTION_ARGS)
1245	{
1246	return scalarineqsel_wrapper(fcinfo, true, false);
1247	}
1248
1249	/*
1250	* scalargesel - Selectivity of ">=" for scalars.
1251	*/
1252	Datum
1253	scalargesel(PG_FUNCTION_ARGS)
1254	{
1255	return scalarineqsel_wrapper(fcinfo, true, true);
1256	}
1257
1258	/*
1259	* boolvarsel - Selectivity of Boolean variable.
1260	*
1261	* This can actually be called on any boolean-valued expression. If it
1262	* involves only Vars of the specified relation, and if there are statistics
1263	* about the Var or expression (the latter is possible if it's indexed) then
1264	* we'll produce a real estimate; otherwise it's just a default.
1265	*/
1266	Selectivity
1267	boolvarsel(PlannerInfo root, Node arg, int varRelid)
1268	{
1269	VariableStatData vardata;
1270	double selec;
1271
1272	examine_variable(root, arg, varRelid, &vardata);
1273	if (HeapTupleIsValid(vardata.statsTuple))
1274	{
1275	/*
1276	* A boolean variable V is equivalent to the clause V = 't', so we
1277	* compute the selectivity as if that is what we have.
1278	*/
1279	selec = var_eq_const(&vardata, BooleanEqualOperator,
1280	BoolGetDatum(true), false, true, false);
1281	}
1282	else
1283	{
1284	/ Otherwise, the default estimate is 0.5 /
1285	selec = `0.5`;
1286	}
1287	ReleaseVariableStats(vardata);
1288	return selec;
1289	}
1290
1291	/*
1292	* booltestsel - Selectivity of BooleanTest Node.
1293	*/
1294	Selectivity
1295	booltestsel(PlannerInfo root, BoolTestType booltesttype, Node arg,
1296	int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1297	{
1298	VariableStatData vardata;
1299	double selec;
1300
1301	examine_variable(root, arg, varRelid, &vardata);
1302
1303	if (HeapTupleIsValid(vardata.statsTuple))
1304	{
1305	Form_pg_statistic stats;
1306	double freq_null;
1307	AttStatsSlot sslot;
1308
1309	stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1310	freq_null = stats->stanullfrac;
1311
1312	if (get_attstatsslot(&sslot, vardata.statsTuple,
1313	STATISTIC_KIND_MCV, InvalidOid,
1314	ATTSTATSSLOT_VALUES \| ATTSTATSSLOT_NUMBERS)
1315	&& sslot.nnumbers > `0`)
1316	{
1317	double freq_true;
1318	double freq_false;
1319
1320	/*
1321	* Get first MCV frequency and derive frequency for true.
1322	*/
1323	if (DatumGetBool(sslot.values[`0`]))
1324	freq_true = sslot.numbers[`0`];
1325	else
1326	freq_true = `1.0` - sslot.numbers[`0`] - freq_null;
1327
1328	/*
1329	* Next derive frequency for false. Then use these as appropriate
1330	* to derive frequency for each case.
1331	*/
1332	freq_false = `1.0` - freq_true - freq_null;
1333
1334	switch (booltesttype)
1335	{
1336	case IS_UNKNOWN:
1337	/ select only NULL values /
1338	selec = freq_null;
1339	break;
1340	case IS_NOT_UNKNOWN:
1341	/ select non-NULL values /
1342	selec = `1.0` - freq_null;
1343	break;
1344	case IS_TRUE:
1345	/ select only TRUE values /
1346	selec = freq_true;
1347	break;
1348	case IS_NOT_TRUE:
1349	/ select non-TRUE values /
1350	selec = `1.0` - freq_true;
1351	break;
1352	case IS_FALSE:
1353	/ select only FALSE values /
1354	selec = freq_false;
1355	break;
1356	case IS_NOT_FALSE:
1357	/ select non-FALSE values /
1358	selec = `1.0` - freq_false;
1359	break;
1360	default:
1361	elog(ERROR, "unrecognized booltesttype: %d",
1362	(int) booltesttype);
1363	selec = `0.0`; / Keep compiler quiet /
1364	break;
1365	}
1366
1367	free_attstatsslot(&sslot);
1368	}
1369	else
1370	{
1371	/*
1372	* No most-common-value info available. Still have null fraction
1373	* information, so use it for IS [NOT] UNKNOWN. Otherwise adjust
1374	* for null fraction and assume a 50-50 split of TRUE and FALSE.
1375	*/
1376	switch (booltesttype)
1377	{
1378	case IS_UNKNOWN:
1379	/ select only NULL values /
1380	selec = freq_null;
1381	break;
1382	case IS_NOT_UNKNOWN:
1383	/ select non-NULL values /
1384	selec = `1.0` - freq_null;
1385	break;
1386	case IS_TRUE:
1387	case IS_FALSE:
1388	/ Assume we select half of the non-NULL values /
1389	selec = (`1.0` - freq_null) / `2.0`;
1390	break;
1391	case IS_NOT_TRUE:
1392	case IS_NOT_FALSE:
1393	/ Assume we select NULLs plus half of the non-NULLs /
1394	/ equiv. to freq_null + (1.0 - freq_null) / 2.0 /
1395	selec = (freq_null + `1.0`) / `2.0`;
1396	break;
1397	default:
1398	elog(ERROR, "unrecognized booltesttype: %d",
1399	(int) booltesttype);
1400	selec = `0.0`; / Keep compiler quiet /
1401	break;
1402	}
1403	}
1404	}
1405	else
1406	{
1407	/*
1408	* If we can't get variable statistics for the argument, perhaps
1409	* clause_selectivity can do something with it. We ignore the
1410	* possibility of a NULL value when using clause_selectivity, and just
1411	* assume the value is either TRUE or FALSE.
1412	*/
1413	switch (booltesttype)
1414	{
1415	case IS_UNKNOWN:
1416	selec = DEFAULT_UNK_SEL;
1417	break;
1418	case IS_NOT_UNKNOWN:
1419	selec = DEFAULT_NOT_UNK_SEL;
1420	break;
1421	case IS_TRUE:
1422	case IS_NOT_FALSE:
1423	selec = (double) clause_selectivity(root, arg,
1424	varRelid,
1425	jointype, sjinfo);
1426	break;
1427	case IS_FALSE:
1428	case IS_NOT_TRUE:
1429	selec = `1.0` - (double) clause_selectivity(root, arg,
1430	varRelid,
1431	jointype, sjinfo);
1432	break;
1433	default:
1434	elog(ERROR, "unrecognized booltesttype: %d",
1435	(int) booltesttype);
1436	selec = `0.0`; / Keep compiler quiet /
1437	break;
1438	}
1439	}
1440
1441	ReleaseVariableStats(vardata);
1442
1443	/ result should be in range, but make sure... /
1444	CLAMP_PROBABILITY(selec);
1445
1446	return (Selectivity) selec;
1447	}
1448
1449	/*
1450	* nulltestsel - Selectivity of NullTest Node.
1451	*/
1452	Selectivity
1453	nulltestsel(PlannerInfo root, NullTestType nulltesttype, Node arg,
1454	int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1455	{
1456	VariableStatData vardata;
1457	double selec;
1458
1459	examine_variable(root, arg, varRelid, &vardata);
1460
1461	if (HeapTupleIsValid(vardata.statsTuple))
1462	{
1463	Form_pg_statistic stats;
1464	double freq_null;
1465
1466	stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
1467	freq_null = stats->stanullfrac;
1468
1469	switch (nulltesttype)
1470	{
1471	case IS_NULL:
1472
1473	/*
1474	* Use freq_null directly.
1475	*/
1476	selec = freq_null;
1477	break;
1478	case IS_NOT_NULL:
1479
1480	/*
1481	* Select not unknown (not null) values. Calculate from
1482	* freq_null.
1483	*/
1484	selec = `1.0` - freq_null;
1485	break;
1486	default:
1487	elog(ERROR, "unrecognized nulltesttype: %d",
1488	(int) nulltesttype);
1489	return (Selectivity) `0`; / keep compiler quiet /
1490	}
1491	}
1492	else if (vardata.var && IsA(vardata.var, Var) &&
1493	((Var *) vardata.var)->varattno < `0`)
1494	{
1495	/*
1496	* There are no stats for system columns, but we know they are never
1497	* NULL.
1498	*/
1499	selec = (nulltesttype == IS_NULL) ? `0.0` : `1.0`;
1500	}
1501	else
1502	{
1503	/*
1504	* No ANALYZE stats available, so make a guess
1505	*/
1506	switch (nulltesttype)
1507	{
1508	case IS_NULL:
1509	selec = DEFAULT_UNK_SEL;
1510	break;
1511	case IS_NOT_NULL:
1512	selec = DEFAULT_NOT_UNK_SEL;
1513	break;
1514	default:
1515	elog(ERROR, "unrecognized nulltesttype: %d",
1516	(int) nulltesttype);
1517	return (Selectivity) `0`; / keep compiler quiet /
1518	}
1519	}
1520
1521	ReleaseVariableStats(vardata);
1522
1523	/ result should be in range, but make sure... /
1524	CLAMP_PROBABILITY(selec);
1525
1526	return (Selectivity) selec;
1527	}
1528
1529	/*
1530	* strip_array_coercion - strip binary-compatible relabeling from an array expr
1531	*
1532	* For array values, the parser normally generates ArrayCoerceExpr conversions,
1533	* but it seems possible that RelabelType might show up. Also, the planner
1534	* is not currently tense about collapsing stacked ArrayCoerceExpr nodes,
1535	* so we need to be ready to deal with more than one level.
1536	*/
1537	static Node *
1538	strip_array_coercion(Node *node)
1539	{
1540	for (;;)
1541	{
1542	if (node && IsA(node, ArrayCoerceExpr))
1543	{
1544	ArrayCoerceExpr acoerce = (ArrayCoerceExpr ) node;
1545
1546	/*
1547	* If the per-element expression is just a RelabelType on top of
1548	* CaseTestExpr, then we know it's a binary-compatible relabeling.
1549	*/
1550	if (IsA(acoerce->elemexpr, RelabelType) &&
1551	IsA(((RelabelType *) acoerce->elemexpr)->arg, CaseTestExpr))
1552	node = (Node *) acoerce->arg;
1553	else
1554	break;
1555	}
1556	else if (node && IsA(node, RelabelType))
1557	{
1558	/ We don't really expect this case, but may as well cope /
1559	node = (Node ) ((RelabelType ) node)->arg;
1560	}
1561	else
1562	break;
1563	}
1564	return node;
1565	}
1566
1567	/*
1568	* scalararraysel - Selectivity of ScalarArrayOpExpr Node.
1569	*/
1570	Selectivity
1571	scalararraysel(PlannerInfo *root,
1572	ScalarArrayOpExpr *clause,
1573	bool is_join_clause,
1574	int varRelid,
1575	JoinType jointype,
1576	SpecialJoinInfo *sjinfo)
1577	{
1578	Oid operator = clause->opno;
1579	bool useOr = clause->useOr;
1580	bool isEquality = false;
1581	bool isInequality = false;
1582	Node *leftop;
1583	Node *rightop;
1584	Oid nominal_element_type;
1585	Oid nominal_element_collation;
1586	TypeCacheEntry *typentry;
1587	RegProcedure oprsel;
1588	FmgrInfo oprselproc;
1589	Selectivity s1;
1590	Selectivity s1disjoint;
1591
1592	/ First, deconstruct the expression /
1593	Assert(list_length(clause->args) == `2`);
1594	leftop = (Node *) linitial(clause->args);
1595	rightop = (Node *) lsecond(clause->args);
1596
1597	/ aggressively reduce both sides to constants /
1598	leftop = estimate_expression_value(root, leftop);
1599	rightop = estimate_expression_value(root, rightop);
1600
1601	/ get nominal (after relabeling) element type of rightop /
1602	nominal_element_type = get_base_element_type(exprType(rightop));
1603	if (!OidIsValid(nominal_element_type))
1604	return (Selectivity) `0.5`; / probably shouldn't happen /
1605	/ get nominal collation, too, for generating constants /
1606	nominal_element_collation = exprCollation(rightop);
1607
1608	/ look through any binary-compatible relabeling of rightop /
1609	rightop = strip_array_coercion(rightop);
1610
1611	/*
1612	* Detect whether the operator is the default equality or inequality
1613	* operator of the array element type.
1614	*/
1615	typentry = lookup_type_cache(nominal_element_type, TYPECACHE_EQ_OPR);
1616	if (OidIsValid(typentry->eq_opr))
1617	{
1618	if (operator == typentry->eq_opr)
1619	isEquality = true;
1620	else if (get_negator(operator) == typentry->eq_opr)
1621	isInequality = true;
1622	}
1623
1624	/*
1625	* If it is equality or inequality, we might be able to estimate this as a
1626	* form of array containment; for instance "const = ANY(column)" can be
1627	* treated as "ARRAY[const] <@ column". scalararraysel_containment tries
1628	* that, and returns the selectivity estimate if successful, or -1 if not.
1629	*/
1630	if ((isEquality \|\| isInequality) && !is_join_clause)
1631	{
1632	s1 = scalararraysel_containment(root, leftop, rightop,
1633	nominal_element_type,
1634	isEquality, useOr, varRelid);
1635	if (s1 >= `0.0`)
1636	return s1;
1637	}
1638
1639	/*
1640	* Look up the underlying operator's selectivity estimator. Punt if it
1641	* hasn't got one.
1642	*/
1643	if (is_join_clause)
1644	oprsel = get_oprjoin(operator);
1645	else
1646	oprsel = get_oprrest(operator);
1647	if (!oprsel)
1648	return (Selectivity) `0.5`;
1649	fmgr_info(oprsel, &oprselproc);
1650
1651	/*
1652	* In the array-containment check above, we must only believe that an
1653	* operator is equality or inequality if it is the default btree equality
1654	* operator (or its negator) for the element type, since those are the
1655	* operators that array containment will use. But in what follows, we can
1656	* be a little laxer, and also believe that any operators using eqsel() or
1657	* neqsel() as selectivity estimator act like equality or inequality.
1658	*/
1659	if (oprsel == F_EQSEL \|\| oprsel == F_EQJOINSEL)
1660	isEquality = true;
1661	else if (oprsel == F_NEQSEL \|\| oprsel == F_NEQJOINSEL)
1662	isInequality = true;
1663
1664	/*
1665	* We consider three cases:
1666	*
1667	* 1. rightop is an Array constant: deconstruct the array, apply the
1668	* operator's selectivity function for each array element, and merge the
1669	* results in the same way that clausesel.c does for AND/OR combinations.
1670	*
1671	* 2. rightop is an ARRAY[] construct: apply the operator's selectivity
1672	* function for each element of the ARRAY[] construct, and merge.
1673	*
1674	* 3. otherwise, make a guess ...
1675	*/
1676	if (rightop && IsA(rightop, Const))
1677	{
1678	Datum arraydatum = ((Const *) rightop)->constvalue;
1679	bool arrayisnull = ((Const *) rightop)->constisnull;
1680	ArrayType *arrayval;
1681	int16 elmlen;
1682	bool elmbyval;
1683	char elmalign;
1684	int num_elems;
1685	Datum *elem_values;
1686	bool *elem_nulls;
1687	int i;
1688
1689	if (arrayisnull) / qual can't succeed if null array /
1690	return (Selectivity) `0.0`;
1691	arrayval = DatumGetArrayTypeP(arraydatum);
1692	get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
1693	&elmlen, &elmbyval, &elmalign);
1694	deconstruct_array(arrayval,
1695	ARR_ELEMTYPE(arrayval),
1696	elmlen, elmbyval, elmalign,
1697	&elem_values, &elem_nulls, &num_elems);
1698
1699	/*
1700	* For generic operators, we assume the probability of success is
1701	* independent for each array element. But for "= ANY" or "<> ALL",
1702	* if the array elements are distinct (which'd typically be the case)
1703	* then the probabilities are disjoint, and we should just sum them.
1704	*
1705	* If we were being really tense we would try to confirm that the
1706	* elements are all distinct, but that would be expensive and it
1707	* doesn't seem to be worth the cycles; it would amount to penalizing
1708	* well-written queries in favor of poorly-written ones. However, we
1709	* do protect ourselves a little bit by checking whether the
1710	* disjointness assumption leads to an impossible (out of range)
1711	* probability; if so, we fall back to the normal calculation.
1712	*/
1713	s1 = s1disjoint = (useOr ? `0.0` : `1.0`);
1714
1715	for (i = `0`; i < num_elems; i++)
1716	{
1717	List *args;
1718	Selectivity s2;
1719
1720	args = list_make2(leftop,
1721	makeConst(nominal_element_type,
1722	-`1`,
1723	nominal_element_collation,
1724	elmlen,
1725	elem_values[i],
1726	elem_nulls[i],
1727	elmbyval));
1728	if (is_join_clause)
1729	s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1730	clause->inputcollid,
1731	PointerGetDatum(root),
1732	ObjectIdGetDatum(operator),
1733	PointerGetDatum(args),
1734	Int16GetDatum(jointype),
1735	PointerGetDatum(sjinfo)));
1736	else
1737	s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1738	clause->inputcollid,
1739	PointerGetDatum(root),
1740	ObjectIdGetDatum(operator),
1741	PointerGetDatum(args),
1742	Int32GetDatum(varRelid)));
1743
1744	if (useOr)
1745	{
1746	s1 = s1 + s2 - s1 * s2;
1747	if (isEquality)
1748	s1disjoint += s2;
1749	}
1750	else
1751	{
1752	s1 = s1 * s2;
1753	if (isInequality)
1754	s1disjoint += s2 - `1.0`;
1755	}
1756	}
1757
1758	/ accept disjoint-probability estimate if in range /
1759	if ((useOr ? isEquality : isInequality) &&
1760	s1disjoint >= `0.0` && s1disjoint <= `1.0`)
1761	s1 = s1disjoint;
1762	}
1763	else if (rightop && IsA(rightop, ArrayExpr) &&
1764	!((ArrayExpr *) rightop)->multidims)
1765	{
1766	ArrayExpr arrayexpr = (ArrayExpr ) rightop;
1767	int16 elmlen;
1768	bool elmbyval;
1769	ListCell *l;
1770
1771	get_typlenbyval(arrayexpr->element_typeid,
1772	&elmlen, &elmbyval);
1773
1774	/*
1775	* We use the assumption of disjoint probabilities here too, although
1776	* the odds of equal array elements are rather higher if the elements
1777	* are not all constants (which they won't be, else constant folding
1778	* would have reduced the ArrayExpr to a Const). In this path it's
1779	* critical to have the sanity check on the s1disjoint estimate.
1780	*/
1781	s1 = s1disjoint = (useOr ? `0.0` : `1.0`);
1782
1783	foreach(l, arrayexpr->elements)
1784	{
1785	Node elem = (Node ) lfirst(l);
1786	List *args;
1787	Selectivity s2;
1788
1789	/*
1790	* Theoretically, if elem isn't of nominal_element_type we should
1791	* insert a RelabelType, but it seems unlikely that any operator
1792	* estimation function would really care ...
1793	*/
1794	args = list_make2(leftop, elem);
1795	if (is_join_clause)
1796	s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1797	clause->inputcollid,
1798	PointerGetDatum(root),
1799	ObjectIdGetDatum(operator),
1800	PointerGetDatum(args),
1801	Int16GetDatum(jointype),
1802	PointerGetDatum(sjinfo)));
1803	else
1804	s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1805	clause->inputcollid,
1806	PointerGetDatum(root),
1807	ObjectIdGetDatum(operator),
1808	PointerGetDatum(args),
1809	Int32GetDatum(varRelid)));
1810
1811	if (useOr)
1812	{
1813	s1 = s1 + s2 - s1 * s2;
1814	if (isEquality)
1815	s1disjoint += s2;
1816	}
1817	else
1818	{
1819	s1 = s1 * s2;
1820	if (isInequality)
1821	s1disjoint += s2 - `1.0`;
1822	}
1823	}
1824
1825	/ accept disjoint-probability estimate if in range /
1826	if ((useOr ? isEquality : isInequality) &&
1827	s1disjoint >= `0.0` && s1disjoint <= `1.0`)
1828	s1 = s1disjoint;
1829	}
1830	else
1831	{
1832	CaseTestExpr *dummyexpr;
1833	List *args;
1834	Selectivity s2;
1835	int i;
1836
1837	/*
1838	* We need a dummy rightop to pass to the operator selectivity
1839	* routine. It can be pretty much anything that doesn't look like a
1840	* constant; CaseTestExpr is a convenient choice.
1841	*/
1842	dummyexpr = makeNode(CaseTestExpr);
1843	dummyexpr->typeId = nominal_element_type;
1844	dummyexpr->typeMod = -`1`;
1845	dummyexpr->collation = clause->inputcollid;
1846	args = list_make2(leftop, dummyexpr);
1847	if (is_join_clause)
1848	s2 = DatumGetFloat8(FunctionCall5Coll(&oprselproc,
1849	clause->inputcollid,
1850	PointerGetDatum(root),
1851	ObjectIdGetDatum(operator),
1852	PointerGetDatum(args),
1853	Int16GetDatum(jointype),
1854	PointerGetDatum(sjinfo)));
1855	else
1856	s2 = DatumGetFloat8(FunctionCall4Coll(&oprselproc,
1857	clause->inputcollid,
1858	PointerGetDatum(root),
1859	ObjectIdGetDatum(operator),
1860	PointerGetDatum(args),
1861	Int32GetDatum(varRelid)));
1862	s1 = useOr ? `0.0` : `1.0`;
1863
1864	/*
1865	* Arbitrarily assume 10 elements in the eventual array value (see
1866	* also estimate_array_length). We don't risk an assumption of
1867	* disjoint probabilities here.
1868	*/
1869	for (i = `0`; i < `10`; i++)
1870	{
1871	if (useOr)
1872	s1 = s1 + s2 - s1 * s2;
1873	else
1874	s1 = s1 * s2;
1875	}
1876	}
1877
1878	/ result should be in range, but make sure... /
1879	CLAMP_PROBABILITY(s1);
1880
1881	return s1;
1882	}
1883
1884	/*
1885	* Estimate number of elements in the array yielded by an expression.
1886	*
1887	* It's important that this agree with scalararraysel.
1888	*/
1889	int
1890	estimate_array_length(Node *arrayexpr)
1891	{
1892	/ look through any binary-compatible relabeling of arrayexpr /
1893	arrayexpr = strip_array_coercion(arrayexpr);
1894
1895	if (arrayexpr && IsA(arrayexpr, Const))
1896	{
1897	Datum arraydatum = ((Const *) arrayexpr)->constvalue;
1898	bool arrayisnull = ((Const *) arrayexpr)->constisnull;
1899	ArrayType *arrayval;
1900
1901	if (arrayisnull)
1902	return `0`;
1903	arrayval = DatumGetArrayTypeP(arraydatum);
1904	return ArrayGetNItems(ARR_NDIM(arrayval), ARR_DIMS(arrayval));
1905	}
1906	else if (arrayexpr && IsA(arrayexpr, ArrayExpr) &&
1907	!((ArrayExpr *) arrayexpr)->multidims)
1908	{
1909	return list_length(((ArrayExpr *) arrayexpr)->elements);
1910	}
1911	else
1912	{
1913	/ default guess --- see also scalararraysel /
1914	return `10`;
1915	}
1916	}
1917
1918	/*
1919	* rowcomparesel - Selectivity of RowCompareExpr Node.
1920	*
1921	* We estimate RowCompare selectivity by considering just the first (high
1922	* order) columns, which makes it equivalent to an ordinary OpExpr. While
1923	* this estimate could be refined by considering additional columns, it
1924	* seems unlikely that we could do a lot better without multi-column
1925	* statistics.
1926	*/
1927	Selectivity
1928	rowcomparesel(PlannerInfo *root,
1929	RowCompareExpr *clause,
1930	int varRelid, JoinType jointype, SpecialJoinInfo *sjinfo)
1931	{
1932	Selectivity s1;
1933	Oid opno = linitial_oid(clause->opnos);
1934	Oid inputcollid = linitial_oid(clause->inputcollids);
1935	List *opargs;
1936	bool is_join_clause;
1937
1938	/ Build equivalent arg list for single operator /
1939	opargs = list_make2(linitial(clause->largs), linitial(clause->rargs));
1940
1941	/*
1942	* Decide if it's a join clause. This should match clausesel.c's
1943	* treat_as_join_clause(), except that we intentionally consider only the
1944	* leading columns and not the rest of the clause.
1945	*/
1946	if (varRelid != `0`)
1947	{
1948	/*
1949	* Caller is forcing restriction mode (eg, because we are examining an
1950	* inner indexscan qual).
1951	*/
1952	is_join_clause = false;
1953	}
1954	else if (sjinfo == NULL)
1955	{
1956	/*
1957	* It must be a restriction clause, since it's being evaluated at a
1958	* scan node.
1959	*/
1960	is_join_clause = false;
1961	}
1962	else
1963	{
1964	/*
1965	* Otherwise, it's a join if there's more than one relation used.
1966	*/
1967	is_join_clause = (NumRelids((Node *) opargs) > `1`);
1968	}
1969
1970	if (is_join_clause)
1971	{
1972	/ Estimate selectivity for a join clause. /
1973	s1 = join_selectivity(root, opno,
1974	opargs,
1975	inputcollid,
1976	jointype,
1977	sjinfo);
1978	}
1979	else
1980	{
1981	/ Estimate selectivity for a restriction clause. /
1982	s1 = restriction_selectivity(root, opno,
1983	opargs,
1984	inputcollid,
1985	varRelid);
1986	}
1987
1988	return s1;
1989	}
1990
1991	/*
1992	* eqjoinsel - Join selectivity of "="
1993	*/
1994	Datum
1995	eqjoinsel(PG_FUNCTION_ARGS)
1996	{
1997	PlannerInfo root = (PlannerInfo ) PG_GETARG_POINTER(`0`);
1998	Oid operator = PG_GETARG_OID(`1`);
1999	List args = (List ) PG_GETARG_POINTER(`2`);
2000
2001	#ifdef NOT_USED
2002	JoinType jointype = (JoinType) PG_GETARG_INT16(`3`);
2003	#endif
2004	SpecialJoinInfo sjinfo = (SpecialJoinInfo ) PG_GETARG_POINTER(`4`);
2005	double selec;
2006	double selec_inner;
2007	VariableStatData vardata1;
2008	VariableStatData vardata2;
2009	double nd1;
2010	double nd2;
2011	bool isdefault1;
2012	bool isdefault2;
2013	Oid opfuncoid;
2014	AttStatsSlot sslot1;
2015	AttStatsSlot sslot2;
2016	Form_pg_statistic stats1 = NULL;
2017	Form_pg_statistic stats2 = NULL;
2018	bool have_mcvs1 = false;
2019	bool have_mcvs2 = false;
2020	bool join_is_reversed;
2021	RelOptInfo *inner_rel;
2022
2023	get_join_variables(root, args, sjinfo,
2024	&vardata1, &vardata2, &join_is_reversed);
2025
2026	nd1 = get_variable_numdistinct(&vardata1, &isdefault1);
2027	nd2 = get_variable_numdistinct(&vardata2, &isdefault2);
2028
2029	opfuncoid = get_opcode(operator);
2030
2031	memset(&sslot1, `0`, sizeof(sslot1));
2032	memset(&sslot2, `0`, sizeof(sslot2));
2033
2034	if (HeapTupleIsValid(vardata1.statsTuple))
2035	{
2036	/ note we allow use of nullfrac regardless of security check /
2037	stats1 = (Form_pg_statistic) GETSTRUCT(vardata1.statsTuple);
2038	if (statistic_proc_security_check(&vardata1, opfuncoid))
2039	have_mcvs1 = get_attstatsslot(&sslot1, vardata1.statsTuple,
2040	STATISTIC_KIND_MCV, InvalidOid,
2041	ATTSTATSSLOT_VALUES \| ATTSTATSSLOT_NUMBERS);
2042	}
2043
2044	if (HeapTupleIsValid(vardata2.statsTuple))
2045	{
2046	/ note we allow use of nullfrac regardless of security check /
2047	stats2 = (Form_pg_statistic) GETSTRUCT(vardata2.statsTuple);
2048	if (statistic_proc_security_check(&vardata2, opfuncoid))
2049	have_mcvs2 = get_attstatsslot(&sslot2, vardata2.statsTuple,
2050	STATISTIC_KIND_MCV, InvalidOid,
2051	ATTSTATSSLOT_VALUES \| ATTSTATSSLOT_NUMBERS);
2052	}
2053
2054	/ We need to compute the inner-join selectivity in all cases /
2055	selec_inner = eqjoinsel_inner(opfuncoid,
2056	&vardata1, &vardata2,
2057	nd1, nd2,
2058	isdefault1, isdefault2,
2059	&sslot1, &sslot2,
2060	stats1, stats2,
2061	have_mcvs1, have_mcvs2);
2062
2063	switch (sjinfo->jointype)
2064	{
2065	case JOIN_INNER:
2066	case JOIN_LEFT:
2067	case JOIN_FULL:
2068	selec = selec_inner;
2069	break;
2070	case JOIN_SEMI:
2071	case JOIN_ANTI:
2072
2073	/*
2074	* Look up the join's inner relation. min_righthand is sufficient
2075	* information because neither SEMI nor ANTI joins permit any
2076	* reassociation into or out of their RHS, so the righthand will
2077	* always be exactly that set of rels.
2078	*/
2079	inner_rel = find_join_input_rel(root, sjinfo->min_righthand);
2080
2081	if (!join_is_reversed)
2082	selec = eqjoinsel_semi(opfuncoid,
2083	&vardata1, &vardata2,
2084	nd1, nd2,
2085	isdefault1, isdefault2,
2086	&sslot1, &sslot2,
2087	stats1, stats2,
2088	have_mcvs1, have_mcvs2,
2089	inner_rel);
2090	else
2091	{
2092	Oid commop = get_commutator(operator);
2093	Oid commopfuncoid = OidIsValid(commop) ? get_opcode(commop) : InvalidOid;
2094
2095	selec = eqjoinsel_semi(commopfuncoid,
2096	&vardata2, &vardata1,
2097	nd2, nd1,
2098	isdefault2, isdefault1,
2099	&sslot2, &sslot1,
2100	stats2, stats1,
2101	have_mcvs2, have_mcvs1,
2102	inner_rel);
2103	}
2104
2105	/*
2106	* We should never estimate the output of a semijoin to be more
2107	* rows than we estimate for an inner join with the same input
2108	* rels and join condition; it's obviously impossible for that to
2109	* happen. The former estimate is N1 * Ssemi while the latter is
2110	* N1 * N2 * Sinner, so we may clamp Ssemi <= N2 * Sinner. Doing
2111	* this is worthwhile because of the shakier estimation rules we
2112	* use in eqjoinsel_semi, particularly in cases where it has to
2113	* punt entirely.
2114	*/
2115	selec = Min(selec, inner_rel->rows * selec_inner);
2116	break;
2117	default:
2118	/ other values not expected here /
2119	elog(ERROR, "unrecognized join type: %d",
2120	(int) sjinfo->jointype);
2121	selec = `0`; / keep compiler quiet /
2122	break;
2123	}
2124
2125	free_attstatsslot(&sslot1);
2126	free_attstatsslot(&sslot2);
2127
2128	ReleaseVariableStats(vardata1);
2129	ReleaseVariableStats(vardata2);
2130
2131	CLAMP_PROBABILITY(selec);
2132
2133	PG_RETURN_FLOAT8((float8) selec);
2134	}
2135
2136	/*
2137	* eqjoinsel_inner --- eqjoinsel for normal inner join
2138	*
2139	* We also use this for LEFT/FULL outer joins; it's not presently clear
2140	* that it's worth trying to distinguish them here.
2141	*/
2142	static double
2143	eqjoinsel_inner(Oid opfuncoid,
2144	VariableStatData vardata1, VariableStatData vardata2,
2145	double nd1, double nd2,
2146	bool isdefault1, bool isdefault2,
2147	AttStatsSlot sslot1, AttStatsSlot sslot2,
2148	Form_pg_statistic stats1, Form_pg_statistic stats2,
2149	bool have_mcvs1, bool have_mcvs2)
2150	{
2151	double selec;
2152
2153	if (have_mcvs1 && have_mcvs2)
2154	{
2155	/*
2156	* We have most-common-value lists for both relations. Run through
2157	* the lists to see which MCVs actually join to each other with the
2158	* given operator. This allows us to determine the exact join
2159	* selectivity for the portion of the relations represented by the MCV
2160	* lists. We still have to estimate for the remaining population, but
2161	* in a skewed distribution this gives us a big leg up in accuracy.
2162	* For motivation see the analysis in Y. Ioannidis and S.
2163	* Christodoulakis, "On the propagation of errors in the size of join
2164	* results", Technical Report 1018, Computer Science Dept., University
2165	* of Wisconsin, Madison, March 1991 (available from ftp.cs.wisc.edu).
2166	*/
2167	FmgrInfo eqproc;
2168	bool *hasmatch1;
2169	bool *hasmatch2;
2170	double nullfrac1 = stats1->stanullfrac;
2171	double nullfrac2 = stats2->stanullfrac;
2172	double matchprodfreq,
2173	matchfreq1,
2174	matchfreq2,
2175	unmatchfreq1,
2176	unmatchfreq2,
2177	otherfreq1,
2178	otherfreq2,
2179	totalsel1,
2180	totalsel2;
2181	int i,
2182	nmatches;
2183
2184	fmgr_info(opfuncoid, &eqproc);
2185	hasmatch1 = (bool ) palloc0(sslot1->nvalues sizeof(bool));
2186	hasmatch2 = (bool ) palloc0(sslot2->nvalues sizeof(bool));
2187
2188	/*
2189	* Note we assume that each MCV will match at most one member of the
2190	* other MCV list. If the operator isn't really equality, there could
2191	* be multiple matches --- but we don't look for them, both for speed
2192	* and because the math wouldn't add up...
2193	*/
2194	matchprodfreq = `0.0`;
2195	nmatches = `0`;
2196	for (i = `0`; i < sslot1->nvalues; i++)
2197	{
2198	int j;
2199
2200	for (j = `0`; j < sslot2->nvalues; j++)
2201	{
2202	if (hasmatch2[j])
2203	continue;
2204	if (DatumGetBool(FunctionCall2Coll(&eqproc,
2205	sslot1->stacoll,
2206	sslot1->values[i],
2207	sslot2->values[j])))
2208	{
2209	hasmatch1[i] = hasmatch2[j] = true;
2210	matchprodfreq += sslot1->numbers[i] * sslot2->numbers[j];
2211	nmatches++;
2212	break;
2213	}
2214	}
2215	}
2216	CLAMP_PROBABILITY(matchprodfreq);
2217	/ Sum up frequencies of matched and unmatched MCVs /
2218	matchfreq1 = unmatchfreq1 = `0.0`;
2219	for (i = `0`; i < sslot1->nvalues; i++)
2220	{
2221	if (hasmatch1[i])
2222	matchfreq1 += sslot1->numbers[i];
2223	else
2224	unmatchfreq1 += sslot1->numbers[i];
2225	}
2226	CLAMP_PROBABILITY(matchfreq1);
2227	CLAMP_PROBABILITY(unmatchfreq1);
2228	matchfreq2 = unmatchfreq2 = `0.0`;
2229	for (i = `0`; i < sslot2->nvalues; i++)
2230	{
2231	if (hasmatch2[i])
2232	matchfreq2 += sslot2->numbers[i];
2233	else
2234	unmatchfreq2 += sslot2->numbers[i];
2235	}
2236	CLAMP_PROBABILITY(matchfreq2);
2237	CLAMP_PROBABILITY(unmatchfreq2);
2238	pfree(hasmatch1);
2239	pfree(hasmatch2);
2240
2241	/*
2242	* Compute total frequency of non-null values that are not in the MCV
2243	* lists.
2244	*/
2245	otherfreq1 = `1.0` - nullfrac1 - matchfreq1 - unmatchfreq1;
2246	otherfreq2 = `1.0` - nullfrac2 - matchfreq2 - unmatchfreq2;
2247	CLAMP_PROBABILITY(otherfreq1);
2248	CLAMP_PROBABILITY(otherfreq2);
2249
2250	/*
2251	* We can estimate the total selectivity from the point of view of
2252	* relation 1 as: the known selectivity for matched MCVs, plus
2253	* unmatched MCVs that are assumed to match against random members of
2254	* relation 2's non-MCV population, plus non-MCV values that are
2255	* assumed to match against random members of relation 2's unmatched
2256	* MCVs plus non-MCV values.
2257	*/
2258	totalsel1 = matchprodfreq;
2259	if (nd2 > sslot2->nvalues)
2260	totalsel1 += unmatchfreq1 * otherfreq2 / (nd2 - sslot2->nvalues);
2261	if (nd2 > nmatches)
2262	totalsel1 += otherfreq1 * (otherfreq2 + unmatchfreq2) /
2263	(nd2 - nmatches);
2264	/ Same estimate from the point of view of relation 2. /
2265	totalsel2 = matchprodfreq;
2266	if (nd1 > sslot1->nvalues)
2267	totalsel2 += unmatchfreq2 * otherfreq1 / (nd1 - sslot1->nvalues);
2268	if (nd1 > nmatches)
2269	totalsel2 += otherfreq2 * (otherfreq1 + unmatchfreq1) /
2270	(nd1 - nmatches);
2271
2272	/*
2273	* Use the smaller of the two estimates. This can be justified in
2274	* essentially the same terms as given below for the no-stats case: to
2275	* a first approximation, we are estimating from the point of view of
2276	* the relation with smaller nd.
2277	*/
2278	selec = (totalsel1 < totalsel2) ? totalsel1 : totalsel2;
2279	}
2280	else
2281	{
2282	/*
2283	* We do not have MCV lists for both sides. Estimate the join
2284	* selectivity as MIN(1/nd1,1/nd2)(1-nullfrac1)(1-nullfrac2). This
2285	* is plausible if we assume that the join operator is strict and the
2286	* non-null values are about equally distributed: a given non-null
2287	* tuple of rel1 will join to either zero or N2*(1-nullfrac2)/nd2 rows
2288	* of rel2, so total join rows are at most
2289	* N1(1-nullfrac1)N2*(1-nullfrac2)/nd2 giving a join selectivity of
2290	* not more than (1-nullfrac1)*(1-nullfrac2)/nd2. By the same logic it
2291	* is not more than (1-nullfrac1)*(1-nullfrac2)/nd1, so the expression
2292	* with MIN() is an upper bound. Using the MIN() means we estimate
2293	* from the point of view of the relation with smaller nd (since the
2294	* larger nd is determining the MIN). It is reasonable to assume that
2295	* most tuples in this rel will have join partners, so the bound is
2296	* probably reasonably tight and should be taken as-is.
2297	*
2298	* XXX Can we be smarter if we have an MCV list for just one side? It
2299	* seems that if we assume equal distribution for the other side, we
2300	* end up with the same answer anyway.
2301	*/
2302	double nullfrac1 = stats1 ? stats1->stanullfrac : `0.0`;
2303	double nullfrac2 = stats2 ? stats2->stanullfrac : `0.0`;
2304
2305	selec = (`1.0` - nullfrac1) * (`1.0` - nullfrac2);
2306	if (nd1 > nd2)
2307	selec /= nd1;
2308	else
2309	selec /= nd2;
2310	}
2311
2312	return selec;
2313	}
2314
2315	/*
2316	* eqjoinsel_semi --- eqjoinsel for semi join
2317	*
2318	* (Also used for anti join, which we are supposed to estimate the same way.)
2319	* Caller has ensured that vardata1 is the LHS variable.
2320	* Unlike eqjoinsel_inner, we have to cope with opfuncoid being InvalidOid.
2321	*/
2322	static double
2323	eqjoinsel_semi(Oid opfuncoid,
2324	VariableStatData vardata1, VariableStatData vardata2,
2325	double nd1, double nd2,
2326	bool isdefault1, bool isdefault2,
2327	AttStatsSlot sslot1, AttStatsSlot sslot2,
2328	Form_pg_statistic stats1, Form_pg_statistic stats2,
2329	bool have_mcvs1, bool have_mcvs2,
2330	RelOptInfo *inner_rel)
2331	{
2332	double selec;
2333
2334	/*
2335	* We clamp nd2 to be not more than what we estimate the inner relation's
2336	* size to be. This is intuitively somewhat reasonable since obviously
2337	* there can't be more than that many distinct values coming from the
2338	* inner rel. The reason for the asymmetry (ie, that we don't clamp nd1
2339	* likewise) is that this is the only pathway by which restriction clauses
2340	* applied to the inner rel will affect the join result size estimate,
2341	* since set_joinrel_size_estimates will multiply SEMI/ANTI selectivity by
2342	* only the outer rel's size. If we clamped nd1 we'd be double-counting
2343	* the selectivity of outer-rel restrictions.
2344	*
2345	* We can apply this clamping both with respect to the base relation from
2346	* which the join variable comes (if there is just one), and to the
2347	* immediate inner input relation of the current join.
2348	*
2349	* If we clamp, we can treat nd2 as being a non-default estimate; it's not
2350	* great, maybe, but it didn't come out of nowhere either. This is most
2351	* helpful when the inner relation is empty and consequently has no stats.
2352	*/
2353	if (vardata2->rel)
2354	{
2355	if (nd2 >= vardata2->rel->rows)
2356	{
2357	nd2 = vardata2->rel->rows;
2358	isdefault2 = false;
2359	}
2360	}
2361	if (nd2 >= inner_rel->rows)
2362	{
2363	nd2 = inner_rel->rows;
2364	isdefault2 = false;
2365	}
2366
2367	if (have_mcvs1 && have_mcvs2 && OidIsValid(opfuncoid))
2368	{
2369	/*
2370	* We have most-common-value lists for both relations. Run through
2371	* the lists to see which MCVs actually join to each other with the
2372	* given operator. This allows us to determine the exact join
2373	* selectivity for the portion of the relations represented by the MCV
2374	* lists. We still have to estimate for the remaining population, but
2375	* in a skewed distribution this gives us a big leg up in accuracy.
2376	*/
2377	FmgrInfo eqproc;
2378	bool *hasmatch1;
2379	bool *hasmatch2;
2380	double nullfrac1 = stats1->stanullfrac;
2381	double matchfreq1,
2382	uncertainfrac,
2383	uncertain;
2384	int i,
2385	nmatches,
2386	clamped_nvalues2;
2387
2388	/*
2389	* The clamping above could have resulted in nd2 being less than
2390	* sslot2->nvalues; in which case, we assume that precisely the nd2
2391	* most common values in the relation will appear in the join input,
2392	* and so compare to only the first nd2 members of the MCV list. Of
2393	* course this is frequently wrong, but it's the best bet we can make.
2394	*/
2395	clamped_nvalues2 = Min(sslot2->nvalues, nd2);
2396
2397	fmgr_info(opfuncoid, &eqproc);
2398	hasmatch1 = (bool ) palloc0(sslot1->nvalues sizeof(bool));
2399	hasmatch2 = (bool ) palloc0(clamped_nvalues2 sizeof(bool));
2400
2401	/*
2402	* Note we assume that each MCV will match at most one member of the
2403	* other MCV list. If the operator isn't really equality, there could
2404	* be multiple matches --- but we don't look for them, both for speed
2405	* and because the math wouldn't add up...
2406	*/
2407	nmatches = `0`;
2408	for (i = `0`; i < sslot1->nvalues; i++)
2409	{
2410	int j;
2411
2412	for (j = `0`; j < clamped_nvalues2; j++)
2413	{
2414	if (hasmatch2[j])
2415	continue;
2416	if (DatumGetBool(FunctionCall2Coll(&eqproc,
2417	sslot1->stacoll,
2418	sslot1->values[i],
2419	sslot2->values[j])))
2420	{
2421	hasmatch1[i] = hasmatch2[j] = true;
2422	nmatches++;
2423	break;
2424	}
2425	}
2426	}
2427	/ Sum up frequencies of matched MCVs /
2428	matchfreq1 = `0.0`;
2429	for (i = `0`; i < sslot1->nvalues; i++)
2430	{
2431	if (hasmatch1[i])
2432	matchfreq1 += sslot1->numbers[i];
2433	}
2434	CLAMP_PROBABILITY(matchfreq1);
2435	pfree(hasmatch1);
2436	pfree(hasmatch2);
2437
2438	/*
2439	* Now we need to estimate the fraction of relation 1 that has at
2440	* least one join partner. We know for certain that the matched MCVs
2441	* do, so that gives us a lower bound, but we're really in the dark
2442	* about everything else. Our crude approach is: if nd1 <= nd2 then
2443	* assume all non-null rel1 rows have join partners, else assume for
2444	* the uncertain rows that a fraction nd2/nd1 have join partners. We
2445	* can discount the known-matched MCVs from the distinct-values counts
2446	* before doing the division.
2447	*
2448	* Crude as the above is, it's completely useless if we don't have
2449	* reliable ndistinct values for both sides. Hence, if either nd1 or
2450	* nd2 is default, punt and assume half of the uncertain rows have
2451	* join partners.
2452	*/
2453	if (!isdefault1 && !isdefault2)
2454	{
2455	nd1 -= nmatches;
2456	nd2 -= nmatches;
2457	if (nd1 <= nd2 \|\| nd2 < `0`)
2458	uncertainfrac = `1.0`;
2459	else
2460	uncertainfrac = nd2 / nd1;
2461	}
2462	else
2463	uncertainfrac = `0.5`;
2464	uncertain = `1.0` - matchfreq1 - nullfrac1;
2465	CLAMP_PROBABILITY(uncertain);
2466	selec = matchfreq1 + uncertainfrac * uncertain;
2467	}
2468	else
2469	{
2470	/*
2471	* Without MCV lists for both sides, we can only use the heuristic
2472	* about nd1 vs nd2.
2473	*/
2474	double nullfrac1 = stats1 ? stats1->stanullfrac : `0.0`;
2475
2476	if (!isdefault1 && !isdefault2)
2477	{
2478	if (nd1 <= nd2 \|\| nd2 < `0`)
2479	selec = `1.0` - nullfrac1;
2480	else
2481	selec = (nd2 / nd1) * (`1.0` - nullfrac1);
2482	}
2483	else
2484	selec = `0.5` * (`1.0` - nullfrac1);
2485	}
2486
2487	return selec;
2488	}
2489
2490	/*
2491	* neqjoinsel - Join selectivity of "!="
2492	*/
2493	Datum
2494	neqjoinsel(PG_FUNCTION_ARGS)
2495	{
2496	PlannerInfo root = (PlannerInfo ) PG_GETARG_POINTER(`0`);
2497	Oid operator = PG_GETARG_OID(`1`);
2498	List args = (List ) PG_GETARG_POINTER(`2`);
2499	JoinType jointype = (JoinType) PG_GETARG_INT16(`3`);
2500	SpecialJoinInfo sjinfo = (SpecialJoinInfo ) PG_GETARG_POINTER(`4`);
2501	float8 result;
2502
2503	if (jointype == JOIN_SEMI \|\| jointype == JOIN_ANTI)
2504	{
2505	/*
2506	* For semi-joins, if there is more than one distinct value in the RHS
2507	* relation then every non-null LHS row must find a row to join since
2508	* it can only be equal to one of them. We'll assume that there is
2509	* always more than one distinct RHS value for the sake of stability,
2510	* though in theory we could have special cases for empty RHS
2511	* (selectivity = 0) and single-distinct-value RHS (selectivity =
2512	* fraction of LHS that has the same value as the single RHS value).
2513	*
2514	* For anti-joins, if we use the same assumption that there is more
2515	* than one distinct key in the RHS relation, then every non-null LHS
2516	* row must be suppressed by the anti-join.
2517	*
2518	* So either way, the selectivity estimate should be 1 - nullfrac.
2519	*/
2520	VariableStatData leftvar;
2521	VariableStatData rightvar;
2522	bool reversed;
2523	HeapTuple statsTuple;
2524	double nullfrac;
2525
2526	get_join_variables(root, args, sjinfo, &leftvar, &rightvar, &reversed);
2527	statsTuple = reversed ? rightvar.statsTuple : leftvar.statsTuple;
2528	if (HeapTupleIsValid(statsTuple))
2529	nullfrac = ((Form_pg_statistic) GETSTRUCT(statsTuple))->stanullfrac;
2530	else
2531	nullfrac = `0.0`;
2532	ReleaseVariableStats(leftvar);
2533	ReleaseVariableStats(rightvar);
2534
2535	result = `1.0` - nullfrac;
2536	}
2537	else
2538	{
2539	/*
2540	* We want 1 - eqjoinsel() where the equality operator is the one
2541	* associated with this != operator, that is, its negator.
2542	*/
2543	Oid eqop = get_negator(operator);
2544
2545	if (eqop)
2546	{
2547	result = DatumGetFloat8(DirectFunctionCall5(eqjoinsel,
2548	PointerGetDatum(root),
2549	ObjectIdGetDatum(eqop),
2550	PointerGetDatum(args),
2551	Int16GetDatum(jointype),
2552	PointerGetDatum(sjinfo)));
2553	}
2554	else
2555	{
2556	/ Use default selectivity (should we raise an error instead?) /
2557	result = DEFAULT_EQ_SEL;
2558	}
2559	result = `1.0` - result;
2560	}
2561
2562	PG_RETURN_FLOAT8(result);
2563	}
2564
2565	/*
2566	* scalarltjoinsel - Join selectivity of "<" for scalars
2567	*/
2568	Datum
2569	scalarltjoinsel(PG_FUNCTION_ARGS)
2570	{
2571	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2572	}
2573
2574	/*
2575	* scalarlejoinsel - Join selectivity of "<=" for scalars
2576	*/
2577	Datum
2578	scalarlejoinsel(PG_FUNCTION_ARGS)
2579	{
2580	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2581	}
2582
2583	/*
2584	* scalargtjoinsel - Join selectivity of ">" for scalars
2585	*/
2586	Datum
2587	scalargtjoinsel(PG_FUNCTION_ARGS)
2588	{
2589	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2590	}
2591
2592	/*
2593	* scalargejoinsel - Join selectivity of ">=" for scalars
2594	*/
2595	Datum
2596	scalargejoinsel(PG_FUNCTION_ARGS)
2597	{
2598	PG_RETURN_FLOAT8(DEFAULT_INEQ_SEL);
2599	}
2600
2601
2602	/*
2603	* mergejoinscansel - Scan selectivity of merge join.
2604	*
2605	* A merge join will stop as soon as it exhausts either input stream.
2606	* Therefore, if we can estimate the ranges of both input variables,
2607	* we can estimate how much of the input will actually be read. This
2608	* can have a considerable impact on the cost when using indexscans.
2609	*
2610	* Also, we can estimate how much of each input has to be read before the
2611	* first join pair is found, which will affect the join's startup time.
2612	*
2613	* clause should be a clause already known to be mergejoinable. opfamily,
2614	* strategy, and nulls_first specify the sort ordering being used.
2615	*
2616	* The outputs are:
2617	* *leftstart is set to the fraction of the left-hand variable expected
2618	* to be scanned before the first join pair is found (0 to 1).
2619	* *leftend is set to the fraction of the left-hand variable expected
2620	* to be scanned before the join terminates (0 to 1).
2621	* rightstart, rightend similarly for the right-hand variable.
2622	*/
2623	void
2624	mergejoinscansel(PlannerInfo root, Node clause,
2625	Oid opfamily, int strategy, bool nulls_first,
2626	Selectivity leftstart, Selectivity leftend,
2627	Selectivity rightstart, Selectivity rightend)
2628	{
2629	Node *left,
2630	*right;
2631	VariableStatData leftvar,
2632	rightvar;
2633	int op_strategy;
2634	Oid op_lefttype;
2635	Oid op_righttype;
2636	Oid opno,
2637	lsortop,
2638	rsortop,
2639	lstatop,
2640	rstatop,
2641	ltop,
2642	leop,
2643	revltop,
2644	revleop;
2645	bool isgt;
2646	Datum leftmin,
2647	leftmax,
2648	rightmin,
2649	rightmax;
2650	double selec;
2651
2652	/ Set default results if we can't figure anything out. /
2653	/ XXX should default "start" fraction be a bit more than 0? /
2654	leftstart = rightstart = `0.0`;
2655	leftend = rightend = `1.0`;
2656
2657	/ Deconstruct the merge clause /
2658	if (!is_opclause(clause))
2659	return; / shouldn't happen /
2660	opno = ((OpExpr *) clause)->opno;
2661	left = get_leftop((Expr *) clause);
2662	right = get_rightop((Expr *) clause);
2663	if (!right)
2664	return; / shouldn't happen /
2665
2666	/ Look for stats for the inputs /
2667	examine_variable(root, left, `0`, &leftvar);
2668	examine_variable(root, right, `0`, &rightvar);
2669
2670	/ Extract the operator's declared left/right datatypes /
2671	get_op_opfamily_properties(opno, opfamily, false,
2672	&op_strategy,
2673	&op_lefttype,
2674	&op_righttype);
2675	Assert(op_strategy == BTEqualStrategyNumber);
2676
2677	/*
2678	* Look up the various operators we need. If we don't find them all, it
2679	* probably means the opfamily is broken, but we just fail silently.
2680	*
2681	* Note: we expect that pg_statistic histograms will be sorted by the '<'
2682	* operator, regardless of which sort direction we are considering.
2683	*/
2684	switch (strategy)
2685	{
2686	case BTLessStrategyNumber:
2687	isgt = false;
2688	if (op_lefttype == op_righttype)
2689	{
2690	/ easy case /
2691	ltop = get_opfamily_member(opfamily,
2692	op_lefttype, op_righttype,
2693	BTLessStrategyNumber);
2694	leop = get_opfamily_member(opfamily,
2695	op_lefttype, op_righttype,
2696	BTLessEqualStrategyNumber);
2697	lsortop = ltop;
2698	rsortop = ltop;
2699	lstatop = lsortop;
2700	rstatop = rsortop;
2701	revltop = ltop;
2702	revleop = leop;
2703	}
2704	else
2705	{
2706	ltop = get_opfamily_member(opfamily,
2707	op_lefttype, op_righttype,
2708	BTLessStrategyNumber);
2709	leop = get_opfamily_member(opfamily,
2710	op_lefttype, op_righttype,
2711	BTLessEqualStrategyNumber);
2712	lsortop = get_opfamily_member(opfamily,
2713	op_lefttype, op_lefttype,
2714	BTLessStrategyNumber);
2715	rsortop = get_opfamily_member(opfamily,
2716	op_righttype, op_righttype,
2717	BTLessStrategyNumber);
2718	lstatop = lsortop;
2719	rstatop = rsortop;
2720	revltop = get_opfamily_member(opfamily,
2721	op_righttype, op_lefttype,
2722	BTLessStrategyNumber);
2723	revleop = get_opfamily_member(opfamily,
2724	op_righttype, op_lefttype,
2725	BTLessEqualStrategyNumber);
2726	}
2727	break;
2728	case BTGreaterStrategyNumber:
2729	/ descending-order case /
2730	isgt = true;
2731	if (op_lefttype == op_righttype)
2732	{
2733	/ easy case /
2734	ltop = get_opfamily_member(opfamily,
2735	op_lefttype, op_righttype,
2736	BTGreaterStrategyNumber);
2737	leop = get_opfamily_member(opfamily,
2738	op_lefttype, op_righttype,
2739	BTGreaterEqualStrategyNumber);
2740	lsortop = ltop;
2741	rsortop = ltop;
2742	lstatop = get_opfamily_member(opfamily,
2743	op_lefttype, op_lefttype,
2744	BTLessStrategyNumber);
2745	rstatop = lstatop;
2746	revltop = ltop;
2747	revleop = leop;
2748	}
2749	else
2750	{
2751	ltop = get_opfamily_member(opfamily,
2752	op_lefttype, op_righttype,
2753	BTGreaterStrategyNumber);
2754	leop = get_opfamily_member(opfamily,
2755	op_lefttype, op_righttype,
2756	BTGreaterEqualStrategyNumber);
2757	lsortop = get_opfamily_member(opfamily,
2758	op_lefttype, op_lefttype,
2759	BTGreaterStrategyNumber);
2760	rsortop = get_opfamily_member(opfamily,
2761	op_righttype, op_righttype,
2762	BTGreaterStrategyNumber);
2763	lstatop = get_opfamily_member(opfamily,
2764	op_lefttype, op_lefttype,
2765	BTLessStrategyNumber);
2766	rstatop = get_opfamily_member(opfamily,
2767	op_righttype, op_righttype,
2768	BTLessStrategyNumber);
2769	revltop = get_opfamily_member(opfamily,
2770	op_righttype, op_lefttype,
2771	BTGreaterStrategyNumber);
2772	revleop = get_opfamily_member(opfamily,
2773	op_righttype, op_lefttype,
2774	BTGreaterEqualStrategyNumber);
2775	}
2776	break;
2777	default:
2778	goto fail; / shouldn't get here /
2779	}
2780
2781	if (!OidIsValid(lsortop) \|\|
2782	!OidIsValid(rsortop) \|\|
2783	!OidIsValid(lstatop) \|\|
2784	!OidIsValid(rstatop) \|\|
2785	!OidIsValid(ltop) \|\|
2786	!OidIsValid(leop) \|\|
2787	!OidIsValid(revltop) \|\|
2788	!OidIsValid(revleop))
2789	goto fail; / insufficient info in catalogs /
2790
2791	/ Try to get ranges of both inputs /
2792	if (!isgt)
2793	{
2794	if (!get_variable_range(root, &leftvar, lstatop,
2795	&leftmin, &leftmax))
2796	goto fail; / no range available from stats /
2797	if (!get_variable_range(root, &rightvar, rstatop,
2798	&rightmin, &rightmax))
2799	goto fail; / no range available from stats /
2800	}
2801	else
2802	{
2803	/ need to swap the max and min /
2804	if (!get_variable_range(root, &leftvar, lstatop,
2805	&leftmax, &leftmin))
2806	goto fail; / no range available from stats /
2807	if (!get_variable_range(root, &rightvar, rstatop,
2808	&rightmax, &rightmin))
2809	goto fail; / no range available from stats /
2810	}
2811
2812	/*
2813	* Now, the fraction of the left variable that will be scanned is the
2814	* fraction that's <= the right-side maximum value. But only believe
2815	* non-default estimates, else stick with our 1.0.
2816	*/
2817	selec = scalarineqsel(root, leop, isgt, true, &leftvar,
2818	rightmax, op_righttype);
2819	if (selec != DEFAULT_INEQ_SEL)
2820	*leftend = selec;
2821
2822	/ And similarly for the right variable. /
2823	selec = scalarineqsel(root, revleop, isgt, true, &rightvar,
2824	leftmax, op_lefttype);
2825	if (selec != DEFAULT_INEQ_SEL)
2826	*rightend = selec;
2827
2828	/*
2829	* Only one of the two "end" fractions can really be less than 1.0;
2830	* believe the smaller estimate and reset the other one to exactly 1.0. If
2831	* we get exactly equal estimates (as can easily happen with self-joins),
2832	* believe neither.
2833	*/
2834	if (leftend > rightend)
2835	*leftend = `1.0`;
2836	else if (leftend < rightend)
2837	*rightend = `1.0`;
2838	else
2839	leftend = rightend = `1.0`;
2840
2841	/*
2842	* Also, the fraction of the left variable that will be scanned before the
2843	* first join pair is found is the fraction that's < the right-side
2844	* minimum value. But only believe non-default estimates, else stick with
2845	* our own default.
2846	*/
2847	selec = scalarineqsel(root, ltop, isgt, false, &leftvar,
2848	rightmin, op_righttype);
2849	if (selec != DEFAULT_INEQ_SEL)
2850	*leftstart = selec;
2851
2852	/ And similarly for the right variable. /
2853	selec = scalarineqsel(root, revltop, isgt, false, &rightvar,
2854	leftmin, op_lefttype);
2855	if (selec != DEFAULT_INEQ_SEL)
2856	*rightstart = selec;
2857
2858	/*
2859	* Only one of the two "start" fractions can really be more than zero;
2860	* believe the larger estimate and reset the other one to exactly 0.0. If
2861	* we get exactly equal estimates (as can easily happen with self-joins),
2862	* believe neither.
2863	*/
2864	if (leftstart < rightstart)
2865	*leftstart = `0.0`;
2866	else if (leftstart > rightstart)
2867	*rightstart = `0.0`;
2868	else
2869	leftstart = rightstart = `0.0`;
2870
2871	/*
2872	* If the sort order is nulls-first, we're going to have to skip over any
2873	* nulls too. These would not have been counted by scalarineqsel, and we
2874	* can safely add in this fraction regardless of whether we believe
2875	* scalarineqsel's results or not. But be sure to clamp the sum to 1.0!
2876	*/
2877	if (nulls_first)
2878	{
2879	Form_pg_statistic stats;
2880
2881	if (HeapTupleIsValid(leftvar.statsTuple))
2882	{
2883	stats = (Form_pg_statistic) GETSTRUCT(leftvar.statsTuple);
2884	*leftstart += stats->stanullfrac;
2885	CLAMP_PROBABILITY(*leftstart);
2886	*leftend += stats->stanullfrac;
2887	CLAMP_PROBABILITY(*leftend);
2888	}
2889	if (HeapTupleIsValid(rightvar.statsTuple))
2890	{
2891	stats = (Form_pg_statistic) GETSTRUCT(rightvar.statsTuple);
2892	*rightstart += stats->stanullfrac;
2893	CLAMP_PROBABILITY(*rightstart);
2894	*rightend += stats->stanullfrac;
2895	CLAMP_PROBABILITY(*rightend);
2896	}
2897	}
2898
2899	/ Disbelieve start >= end, just in case that can happen /
2900	if (leftstart >= leftend)
2901	{
2902	*leftstart = `0.0`;
2903	*leftend = `1.0`;
2904	}
2905	if (rightstart >= rightend)
2906	{
2907	*rightstart = `0.0`;
2908	*rightend = `1.0`;
2909	}
2910
2911	fail:
2912	ReleaseVariableStats(leftvar);
2913	ReleaseVariableStats(rightvar);
2914	}
2915
2916
2917	/*
2918	* Helper routine for estimate_num_groups: add an item to a list of
2919	* GroupVarInfos, but only if it's not known equal to any of the existing
2920	* entries.
2921	*/
2922	typedef struct
2923	{
2924	Node var; /* might be an expression, not just a Var /
2925	RelOptInfo rel; /* relation it belongs to /
2926	double ndistinct; / # distinct values /
2927	} GroupVarInfo;
2928
2929	static List *
2930	add_unique_group_var(PlannerInfo root, List varinfos,
2931	Node var, VariableStatData vardata)
2932	{
2933	GroupVarInfo *varinfo;
2934	double ndistinct;
2935	bool isdefault;
2936	ListCell *lc;
2937
2938	ndistinct = get_variable_numdistinct(vardata, &isdefault);
2939
2940	/ cannot use foreach here because of possible list_delete /
2941	lc = list_head(varinfos);
2942	while (lc)
2943	{
2944	varinfo = (GroupVarInfo *) lfirst(lc);
2945
2946	/ must advance lc before list_delete possibly pfree's it /
2947	lc = lnext(lc);
2948
2949	/ Drop exact duplicates /
2950	if (equal(var, varinfo->var))
2951	return varinfos;
2952
2953	/*
2954	* Drop known-equal vars, but only if they belong to different
2955	* relations (see comments for estimate_num_groups)
2956	*/
2957	if (vardata->rel != varinfo->rel &&
2958	exprs_known_equal(root, var, varinfo->var))
2959	{
2960	if (varinfo->ndistinct <= ndistinct)
2961	{
2962	/ Keep older item, forget new one /
2963	return varinfos;
2964	}
2965	else
2966	{
2967	/ Delete the older item /
2968	varinfos = list_delete_ptr(varinfos, varinfo);
2969	}
2970	}
2971	}
2972
2973	varinfo = (GroupVarInfo ) palloc(sizeof*(GroupVarInfo));
2974
2975	varinfo->var = var;
2976	varinfo->rel = vardata->rel;
2977	varinfo->ndistinct = ndistinct;
2978	varinfos = lappend(varinfos, varinfo);
2979	return varinfos;
2980	}
2981
2982	/*
2983	* estimate_num_groups - Estimate number of groups in a grouped query
2984	*
2985	* Given a query having a GROUP BY clause, estimate how many groups there
2986	* will be --- ie, the number of distinct combinations of the GROUP BY
2987	* expressions.
2988	*
2989	* This routine is also used to estimate the number of rows emitted by
2990	* a DISTINCT filtering step; that is an isomorphic problem. (Note:
2991	* actually, we only use it for DISTINCT when there's no grouping or
2992	* aggregation ahead of the DISTINCT.)
2993	*
2994	* Inputs:
2995	* root - the query
2996	* groupExprs - list of expressions being grouped by
2997	* input_rows - number of rows estimated to arrive at the group/unique
2998	* filter step
2999	* pgset - NULL, or a List** pointing to a grouping set to filter the
3000	* groupExprs against
3001	*
3002	* Given the lack of any cross-correlation statistics in the system, it's
3003	* impossible to do anything really trustworthy with GROUP BY conditions
3004	* involving multiple Vars. We should however avoid assuming the worst
3005	* case (all possible cross-product terms actually appear as groups) since
3006	* very often the grouped-by Vars are highly correlated. Our current approach
3007	* is as follows:
3008	* 1. Expressions yielding boolean are assumed to contribute two groups,
3009	* independently of their content, and are ignored in the subsequent
3010	* steps. This is mainly because tests like "col IS NULL" break the
3011	* heuristic used in step 2 especially badly.
3012	* 2. Reduce the given expressions to a list of unique Vars used. For
3013	* example, GROUP BY a, a + b is treated the same as GROUP BY a, b.
3014	* It is clearly correct not to count the same Var more than once.
3015	* It is also reasonable to treat f(x) the same as x: f() cannot
3016	* increase the number of distinct values (unless it is volatile,
3017	* which we consider unlikely for grouping), but it probably won't
3018	* reduce the number of distinct values much either.
3019	* As a special case, if a GROUP BY expression can be matched to an
3020	* expressional index for which we have statistics, then we treat the
3021	* whole expression as though it were just a Var.
3022	* 3. If the list contains Vars of different relations that are known equal
3023	* due to equivalence classes, then drop all but one of the Vars from each
3024	* known-equal set, keeping the one with smallest estimated # of values
3025	* (since the extra values of the others can't appear in joined rows).
3026	* Note the reason we only consider Vars of different relations is that
3027	* if we considered ones of the same rel, we'd be double-counting the
3028	* restriction selectivity of the equality in the next step.
3029	* 4. For Vars within a single source rel, we multiply together the numbers
3030	* of values, clamp to the number of rows in the rel (divided by 10 if
3031	* more than one Var), and then multiply by a factor based on the
3032	* selectivity of the restriction clauses for that rel. When there's
3033	* more than one Var, the initial product is probably too high (it's the
3034	* worst case) but clamping to a fraction of the rel's rows seems to be a
3035	* helpful heuristic for not letting the estimate get out of hand. (The
3036	* factor of 10 is derived from pre-Postgres-7.4 practice.) The factor
3037	* we multiply by to adjust for the restriction selectivity assumes that
3038	* the restriction clauses are independent of the grouping, which may not
3039	* be a valid assumption, but it's hard to do better.
3040	* 5. If there are Vars from multiple rels, we repeat step 4 for each such
3041	* rel, and multiply the results together.
3042	* Note that rels not containing grouped Vars are ignored completely, as are
3043	* join clauses. Such rels cannot increase the number of groups, and we
3044	* assume such clauses do not reduce the number either (somewhat bogus,
3045	* but we don't have the info to do better).
3046	*/
3047	double
3048	estimate_num_groups(PlannerInfo root, List groupExprs, double input_rows,
3049	List **pgset)
3050	{
3051	List *varinfos = NIL;
3052	double srf_multiplier = `1.0`;
3053	double numdistinct;
3054	ListCell *l;
3055	int i;
3056
3057	/*
3058	* We don't ever want to return an estimate of zero groups, as that tends
3059	* to lead to division-by-zero and other unpleasantness. The input_rows
3060	* estimate is usually already at least 1, but clamp it just in case it
3061	* isn't.
3062	*/
3063	input_rows = clamp_row_est(input_rows);
3064
3065	/*
3066	* If no grouping columns, there's exactly one group. (This can't happen
3067	* for normal cases with GROUP BY or DISTINCT, but it is possible for
3068	* corner cases with set operations.)
3069	*/
3070	if (groupExprs == NIL \|\| (pgset && list_length(*pgset) < `1`))
3071	return `1.0`;
3072
3073	/*
3074	* Count groups derived from boolean grouping expressions. For other
3075	* expressions, find the unique Vars used, treating an expression as a Var
3076	* if we can find stats for it. For each one, record the statistical
3077	* estimate of number of distinct values (total in its table, without
3078	* regard for filtering).
3079	*/
3080	numdistinct = `1.0`;
3081
3082	i = `0`;
3083	foreach(l, groupExprs)
3084	{
3085	Node groupexpr = (Node ) lfirst(l);
3086	double this_srf_multiplier;
3087	VariableStatData vardata;
3088	List *varshere;
3089	ListCell *l2;
3090
3091	/ is expression in this grouping set? /
3092	if (pgset && !list_member_int(*pgset, i++))
3093	continue;
3094
3095	/*
3096	* Set-returning functions in grouping columns are a bit problematic.
3097	* The code below will effectively ignore their SRF nature and come up
3098	* with a numdistinct estimate as though they were scalar functions.
3099	* We compensate by scaling up the end result by the largest SRF
3100	* rowcount estimate. (This will be an overestimate if the SRF
3101	* produces multiple copies of any output value, but it seems best to
3102	* assume the SRF's outputs are distinct. In any case, it's probably
3103	* pointless to worry too much about this without much better
3104	* estimates for SRF output rowcounts than we have today.)
3105	*/
3106	this_srf_multiplier = expression_returns_set_rows(root, groupexpr);
3107	if (srf_multiplier < this_srf_multiplier)
3108	srf_multiplier = this_srf_multiplier;
3109
3110	/ Short-circuit for expressions returning boolean /
3111	if (exprType(groupexpr) == BOOLOID)
3112	{
3113	numdistinct *= `2.0`;
3114	continue;
3115	}
3116
3117	/*
3118	* If examine_variable is able to deduce anything about the GROUP BY
3119	* expression, treat it as a single variable even if it's really more
3120	* complicated.
3121	*/
3122	examine_variable(root, groupexpr, `0`, &vardata);
3123	if (HeapTupleIsValid(vardata.statsTuple) \|\| vardata.isunique)
3124	{
3125	varinfos = add_unique_group_var(root, varinfos,
3126	groupexpr, &vardata);
3127	ReleaseVariableStats(vardata);
3128	continue;
3129	}
3130	ReleaseVariableStats(vardata);
3131
3132	/*
3133	* Else pull out the component Vars. Handle PlaceHolderVars by
3134	* recursing into their arguments (effectively assuming that the
3135	* PlaceHolderVar doesn't change the number of groups, which boils
3136	* down to ignoring the possible addition of nulls to the result set).
3137	*/
3138	varshere = pull_var_clause(groupexpr,
3139	PVC_RECURSE_AGGREGATES \|
3140	PVC_RECURSE_WINDOWFUNCS \|
3141	PVC_RECURSE_PLACEHOLDERS);
3142
3143	/*
3144	* If we find any variable-free GROUP BY item, then either it is a
3145	* constant (and we can ignore it) or it contains a volatile function;
3146	* in the latter case we punt and assume that each input row will
3147	* yield a distinct group.
3148	*/
3149	if (varshere == NIL)
3150	{
3151	if (contain_volatile_functions(groupexpr))
3152	return input_rows;
3153	continue;
3154	}
3155
3156	/*
3157	* Else add variables to varinfos list
3158	*/
3159	foreach(l2, varshere)
3160	{
3161	Node var = (Node ) lfirst(l2);
3162
3163	examine_variable(root, var, `0`, &vardata);
3164	varinfos = add_unique_group_var(root, varinfos, var, &vardata);
3165	ReleaseVariableStats(vardata);
3166	}
3167	}
3168
3169	/*
3170	* If now no Vars, we must have an all-constant or all-boolean GROUP BY
3171	* list.
3172	*/
3173	if (varinfos == NIL)
3174	{
3175	/ Apply SRF multiplier as we would do in the long path /
3176	numdistinct *= srf_multiplier;
3177	/ Round off /
3178	numdistinct = ceil(numdistinct);
3179	/ Guard against out-of-range answers /
3180	if (numdistinct > input_rows)
3181	numdistinct = input_rows;
3182	if (numdistinct < `1.0`)
3183	numdistinct = `1.0`;
3184	return numdistinct;
3185	}
3186
3187	/*
3188	* Group Vars by relation and estimate total numdistinct.
3189	*
3190	* For each iteration of the outer loop, we process the frontmost Var in
3191	* varinfos, plus all other Vars in the same relation. We remove these
3192	* Vars from the newvarinfos list for the next iteration. This is the
3193	* easiest way to group Vars of same rel together.
3194	*/
3195	do
3196	{
3197	GroupVarInfo varinfo1 = (GroupVarInfo ) linitial(varinfos);
3198	RelOptInfo *rel = varinfo1->rel;
3199	double reldistinct = `1`;
3200	double relmaxndistinct = reldistinct;
3201	int relvarcount = `0`;
3202	List *newvarinfos = NIL;
3203	List *relvarinfos = NIL;
3204
3205	/*
3206	* Split the list of varinfos in two - one for the current rel, one
3207	* for remaining Vars on other rels.
3208	*/
3209	relvarinfos = lcons(varinfo1, relvarinfos);
3210	for_each_cell(l, lnext(list_head(varinfos)))
3211	{
3212	GroupVarInfo varinfo2 = (GroupVarInfo ) lfirst(l);
3213
3214	if (varinfo2->rel == varinfo1->rel)
3215	{
3216	/ varinfos on current rel /
3217	relvarinfos = lcons(varinfo2, relvarinfos);
3218	}
3219	else
3220	{
3221	/ not time to process varinfo2 yet /
3222	newvarinfos = lcons(varinfo2, newvarinfos);
3223	}
3224	}
3225
3226	/*
3227	* Get the numdistinct estimate for the Vars of this rel. We
3228	* iteratively search for multivariate n-distinct with maximum number
3229	* of vars; assuming that each var group is independent of the others,
3230	* we multiply them together. Any remaining relvarinfos after no more
3231	* multivariate matches are found are assumed independent too, so
3232	* their individual ndistinct estimates are multiplied also.
3233	*
3234	* While iterating, count how many separate numdistinct values we
3235	* apply. We apply a fudge factor below, but only if we multiplied
3236	* more than one such values.
3237	*/
3238	while (relvarinfos)
3239	{
3240	double mvndistinct;
3241
3242	if (estimate_multivariate_ndistinct(root, rel, &relvarinfos,
3243	&mvndistinct))
3244	{
3245	reldistinct *= mvndistinct;
3246	if (relmaxndistinct < mvndistinct)
3247	relmaxndistinct = mvndistinct;
3248	relvarcount++;
3249	}
3250	else
3251	{
3252	foreach(l, relvarinfos)
3253	{
3254	GroupVarInfo varinfo2 = (GroupVarInfo ) lfirst(l);
3255
3256	reldistinct *= varinfo2->ndistinct;
3257	if (relmaxndistinct < varinfo2->ndistinct)
3258	relmaxndistinct = varinfo2->ndistinct;
3259	relvarcount++;
3260	}
3261
3262	/ we're done with this relation /
3263	relvarinfos = NIL;
3264	}
3265	}
3266
3267	/*
3268	* Sanity check --- don't divide by zero if empty relation.
3269	*/
3270	Assert(IS_SIMPLE_REL(rel));
3271	if (rel->tuples > `0`)
3272	{
3273	/*
3274	* Clamp to size of rel, or size of rel / 10 if multiple Vars. The
3275	* fudge factor is because the Vars are probably correlated but we
3276	* don't know by how much. We should never clamp to less than the
3277	* largest ndistinct value for any of the Vars, though, since
3278	* there will surely be at least that many groups.
3279	*/
3280	double clamp = rel->tuples;
3281
3282	if (relvarcount > `1`)
3283	{
3284	clamp *= `0.1`;
3285	if (clamp < relmaxndistinct)
3286	{
3287	clamp = relmaxndistinct;
3288	/ for sanity in case some ndistinct is too large: /
3289	if (clamp > rel->tuples)
3290	clamp = rel->tuples;
3291	}
3292	}
3293	if (reldistinct > clamp)
3294	reldistinct = clamp;
3295
3296	/*
3297	* Update the estimate based on the restriction selectivity,
3298	* guarding against division by zero when reldistinct is zero.
3299	* Also skip this if we know that we are returning all rows.
3300	*/
3301	if (reldistinct > `0` && rel->rows < rel->tuples)
3302	{
3303	/*
3304	* Given a table containing N rows with n distinct values in a
3305	* uniform distribution, if we select p rows at random then
3306	* the expected number of distinct values selected is
3307	*
3308	* n * (1 - product((N-N/n-i)/(N-i), i=0..p-1))
3309	*
3310	* = n * (1 - (N-N/n)! / (N-N/n-p)! * (N-p)! / N!)
3311	*
3312	* See "Approximating block accesses in database
3313	* organizations", S. B. Yao, Communications of the ACM,
3314	* Volume 20 Issue 4, April 1977 Pages 260-261.
3315	*
3316	* Alternatively, re-arranging the terms from the factorials,
3317	* this may be written as
3318	*
3319	* n * (1 - product((N-p-i)/(N-i), i=0..N/n-1))
3320	*
3321	* This form of the formula is more efficient to compute in
3322	* the common case where p is larger than N/n. Additionally,
3323	* as pointed out by Dell'Era, if i << N for all terms in the
3324	* product, it can be approximated by
3325	*
3326	* n * (1 - ((N-p)/N)^(N/n))
3327	*
3328	* See "Expected distinct values when selecting from a bag
3329	* without replacement", Alberto Dell'Era,
3330	* http://www.adellera.it/investigations/distinct_balls/.
3331	*
3332	* The condition i << N is equivalent to n >> 1, so this is a
3333	* good approximation when the number of distinct values in
3334	* the table is large. It turns out that this formula also
3335	* works well even when n is small.
3336	*/
3337	reldistinct *=
3338	(`1` - pow((rel->tuples - rel->rows) / rel->tuples,
3339	rel->tuples / reldistinct));
3340	}
3341	reldistinct = clamp_row_est(reldistinct);
3342
3343	/*
3344	* Update estimate of total distinct groups.
3345	*/
3346	numdistinct *= reldistinct;
3347	}
3348
3349	varinfos = newvarinfos;
3350	} while (varinfos != NIL);
3351
3352	/ Now we can account for the effects of any SRFs /
3353	numdistinct *= srf_multiplier;
3354
3355	/ Round off /
3356	numdistinct = ceil(numdistinct);
3357
3358	/ Guard against out-of-range answers /
3359	if (numdistinct > input_rows)
3360	numdistinct = input_rows;
3361	if (numdistinct < `1.0`)
3362	numdistinct = `1.0`;
3363
3364	return numdistinct;
3365	}
3366
3367	/*
3368	* Estimate hash bucket statistics when the specified expression is used
3369	* as a hash key for the given number of buckets.
3370	*
3371	* This attempts to determine two values:
3372	*
3373	* 1. The frequency of the most common value of the expression (returns
3374	* zero into *mcv_freq if we can't get that).
3375	*
3376	* 2. The "bucketsize fraction", ie, average number of entries in a bucket
3377	* divided by total tuples in relation.
3378	*
3379	* XXX This is really pretty bogus since we're effectively assuming that the
3380	* distribution of hash keys will be the same after applying restriction
3381	* clauses as it was in the underlying relation. However, we are not nearly
3382	* smart enough to figure out how the restrict clauses might change the
3383	* distribution, so this will have to do for now.
3384	*
3385	* We are passed the number of buckets the executor will use for the given
3386	* input relation. If the data were perfectly distributed, with the same
3387	* number of tuples going into each available bucket, then the bucketsize
3388	* fraction would be 1/nbuckets. But this happy state of affairs will occur
3389	* only if (a) there are at least nbuckets distinct data values, and (b)
3390	* we have a not-too-skewed data distribution. Otherwise the buckets will
3391	* be nonuniformly occupied. If the other relation in the join has a key
3392	* distribution similar to this one's, then the most-loaded buckets are
3393	* exactly those that will be probed most often. Therefore, the "average"
3394	* bucket size for costing purposes should really be taken as something close
3395	* to the "worst case" bucket size. We try to estimate this by adjusting the
3396	* fraction if there are too few distinct data values, and then scaling up
3397	* by the ratio of the most common value's frequency to the average frequency.
3398	*
3399	* If no statistics are available, use a default estimate of 0.1. This will
3400	* discourage use of a hash rather strongly if the inner relation is large,
3401	* which is what we want. We do not want to hash unless we know that the
3402	* inner rel is well-dispersed (or the alternatives seem much worse).
3403	*
3404	* The caller should also check that the mcv_freq is not so large that the
3405	* most common value would by itself require an impractically large bucket.
3406	* In a hash join, the executor can split buckets if they get too big, but
3407	* obviously that doesn't help for a bucket that contains many duplicates of
3408	* the same value.
3409	*/
3410	void
3411	estimate_hash_bucket_stats(PlannerInfo root, Node hashkey, double nbuckets,
3412	Selectivity *mcv_freq,
3413	Selectivity *bucketsize_frac)
3414	{
3415	VariableStatData vardata;
3416	double estfract,
3417	ndistinct,
3418	stanullfrac,
3419	avgfreq;
3420	bool isdefault;
3421	AttStatsSlot sslot;
3422
3423	examine_variable(root, hashkey, `0`, &vardata);
3424
3425	/ Look up the frequency of the most common value, if available /
3426	*mcv_freq = `0.0`;
3427
3428	if (HeapTupleIsValid(vardata.statsTuple))
3429	{
3430	if (get_attstatsslot(&sslot, vardata.statsTuple,
3431	STATISTIC_KIND_MCV, InvalidOid,
3432	ATTSTATSSLOT_NUMBERS))
3433	{
3434	/*
3435	* The first MCV stat is for the most common value.
3436	*/
3437	if (sslot.nnumbers > `0`)
3438	*mcv_freq = sslot.numbers[`0`];
3439	free_attstatsslot(&sslot);
3440	}
3441	}
3442
3443	/ Get number of distinct values /
3444	ndistinct = get_variable_numdistinct(&vardata, &isdefault);
3445
3446	/*
3447	* If ndistinct isn't real, punt. We normally return 0.1, but if the
3448	* mcv_freq is known to be even higher than that, use it instead.
3449	*/
3450	if (isdefault)
3451	{
3452	bucketsize_frac = (Selectivity) Max(`0.1`, mcv_freq);
3453	ReleaseVariableStats(vardata);
3454	return;
3455	}
3456
3457	/ Get fraction that are null /
3458	if (HeapTupleIsValid(vardata.statsTuple))
3459	{
3460	Form_pg_statistic stats;
3461
3462	stats = (Form_pg_statistic) GETSTRUCT(vardata.statsTuple);
3463	stanullfrac = stats->stanullfrac;
3464	}
3465	else
3466	stanullfrac = `0.0`;
3467
3468	/ Compute avg freq of all distinct data values in raw relation /
3469	avgfreq = (`1.0` - stanullfrac) / ndistinct;
3470
3471	/*
3472	* Adjust ndistinct to account for restriction clauses. Observe we are
3473	* assuming that the data distribution is affected uniformly by the
3474	* restriction clauses!
3475	*
3476	* XXX Possibly better way, but much more expensive: multiply by
3477	* selectivity of rel's restriction clauses that mention the target Var.
3478	*/
3479	if (vardata.rel && vardata.rel->tuples > `0`)
3480	{
3481	ndistinct *= vardata.rel->rows / vardata.rel->tuples;
3482	ndistinct = clamp_row_est(ndistinct);
3483	}
3484
3485	/*
3486	* Initial estimate of bucketsize fraction is 1/nbuckets as long as the
3487	* number of buckets is less than the expected number of distinct values;
3488	* otherwise it is 1/ndistinct.
3489	*/
3490	if (ndistinct > nbuckets)
3491	estfract = `1.0` / nbuckets;
3492	else
3493	estfract = `1.0` / ndistinct;
3494
3495	/*
3496	* Adjust estimated bucketsize upward to account for skewed distribution.
3497	*/
3498	if (avgfreq > `0.0` && *mcv_freq > avgfreq)
3499	estfract = mcv_freq / avgfreq;
3500
3501	/*
3502	* Clamp bucketsize to sane range (the above adjustment could easily
3503	* produce an out-of-range result). We set the lower bound a little above
3504	* zero, since zero isn't a very sane result.
3505	*/
3506	if (estfract < `1.0e-6`)
3507	estfract = `1.0e-6`;
3508	else if (estfract > `1.0`)
3509	estfract = `1.0`;
3510
3511	*bucketsize_frac = (Selectivity) estfract;
3512
3513	ReleaseVariableStats(vardata);
3514	}
3515
3516	/*
3517	* estimate_hashagg_tablesize
3518	* estimate the number of bytes that a hash aggregate hashtable will
3519	* require based on the agg_costs, path width and number of groups.
3520	*
3521	* We return the result as "double" to forestall any possible overflow
3522	* problem in the multiplication by dNumGroups.
3523	*
3524	* XXX this may be over-estimating the size now that hashagg knows to omit
3525	* unneeded columns from the hashtable. Also for mixed-mode grouping sets,
3526	* grouping columns not in the hashed set are counted here even though hashagg
3527	* won't store them. Is this a problem?
3528	*/
3529	double
3530	estimate_hashagg_tablesize(Path path, const* AggClauseCosts *agg_costs,
3531	double dNumGroups)
3532	{
3533	Size hashentrysize;
3534
3535	/ Estimate per-hash-entry space at tuple width... /
3536	hashentrysize = MAXALIGN(path->pathtarget->width) +
3537	MAXALIGN(SizeofMinimalTupleHeader);
3538
3539	/ plus space for pass-by-ref transition values... /
3540	hashentrysize += agg_costs->transitionSpace;
3541	/ plus the per-hash-entry overhead /
3542	hashentrysize += hash_agg_entry_size(agg_costs->numAggs);
3543
3544	/*
3545	* Note that this disregards the effect of fill-factor and growth policy
3546	* of the hash table. That's probably ok, given that the default
3547	* fill-factor is relatively high. It'd be hard to meaningfully factor in
3548	* "double-in-size" growth policies here.
3549	*/
3550	return hashentrysize * dNumGroups;
3551	}
3552
3553
3554	/-------------------------------------------------------------------------*
3555	*
3556	* Support routines
3557	*
3558	*-------------------------------------------------------------------------
3559	*/
3560
3561	/*
3562	* Find applicable ndistinct statistics for the given list of VarInfos (which
3563	* must all belong to the given rel), and update *ndistinct to the estimate of
3564	* the MVNDistinctItem that best matches. If a match it found, *varinfos is
3565	* updated to remove the list of matched varinfos.
3566	*
3567	* Varinfos that aren't for simple Vars are ignored.
3568	*
3569	* Return true if we're able to find a match, false otherwise.
3570	*/
3571	static bool
3572	estimate_multivariate_ndistinct(PlannerInfo root, RelOptInfo rel,
3573	List *varinfos, double* *ndistinct)
3574	{
3575	ListCell *lc;
3576	Bitmapset *attnums = NULL;
3577	int nmatches;
3578	Oid statOid = InvalidOid;
3579	MVNDistinct *stats;
3580	Bitmapset *matched = NULL;
3581
3582	/ bail out immediately if the table has no extended statistics /
3583	if (!rel->statlist)
3584	return false;
3585
3586	/ Determine the attnums we're looking for /
3587	foreach(lc, *varinfos)
3588	{
3589	GroupVarInfo varinfo = (GroupVarInfo ) lfirst(lc);
3590
3591	Assert(varinfo->rel == rel);
3592
3593	if (IsA(varinfo->var, Var))
3594	{
3595	attnums = bms_add_member(attnums,
3596	((Var *) varinfo->var)->varattno);
3597	}
3598	}
3599
3600	/ look for the ndistinct statistics matching the most vars /
3601	nmatches = `1`; / we require at least two matches /
3602	foreach(lc, rel->statlist)
3603	{
3604	StatisticExtInfo info = (StatisticExtInfo ) lfirst(lc);
3605	Bitmapset *shared;
3606	int nshared;
3607
3608	/ skip statistics of other kinds /
3609	if (info->kind != STATS_EXT_NDISTINCT)
3610	continue;
3611
3612	/ compute attnums shared by the vars and the statistics object /
3613	shared = bms_intersect(info->keys, attnums);
3614	nshared = bms_num_members(shared);
3615
3616	/*
3617	* Does this statistics object match more columns than the currently
3618	* best object? If so, use this one instead.
3619	*
3620	* XXX This should break ties using name of the object, or something
3621	* like that, to make the outcome stable.
3622	*/
3623	if (nshared > nmatches)
3624	{
3625	statOid = info->statOid;
3626	nmatches = nshared;
3627	matched = shared;
3628	}
3629	}
3630
3631	/ No match? /
3632	if (statOid == InvalidOid)
3633	return false;
3634	Assert(nmatches > `1` && matched != NULL);
3635
3636	stats = statext_ndistinct_load(statOid);
3637
3638	/*
3639	* If we have a match, search it for the specific item that matches (there
3640	* must be one), and construct the output values.
3641	*/
3642	if (stats)
3643	{
3644	int i;
3645	List *newlist = NIL;
3646	MVNDistinctItem *item = NULL;
3647
3648	/ Find the specific item that exactly matches the combination /
3649	for (i = `0`; i < stats->nitems; i++)
3650	{
3651	MVNDistinctItem *tmpitem = &stats->items[i];
3652
3653	if (bms_subset_compare(tmpitem->attrs, matched) == BMS_EQUAL)
3654	{
3655	item = tmpitem;
3656	break;
3657	}
3658	}
3659
3660	/ make sure we found an item /
3661	if (!item)
3662	elog(ERROR, "corrupt MVNDistinct entry");
3663
3664	/ Form the output varinfo list, keeping only unmatched ones /
3665	foreach(lc, *varinfos)
3666	{
3667	GroupVarInfo varinfo = (GroupVarInfo ) lfirst(lc);
3668	AttrNumber attnum;
3669
3670	if (!IsA(varinfo->var, Var))
3671	{
3672	newlist = lappend(newlist, varinfo);
3673	continue;
3674	}
3675
3676	attnum = ((Var *) varinfo->var)->varattno;
3677	if (!bms_is_member(attnum, matched))
3678	newlist = lappend(newlist, varinfo);
3679	}
3680
3681	*varinfos = newlist;
3682	*ndistinct = item->ndistinct;
3683	return true;
3684	}
3685
3686	return false;
3687	}
3688
3689	/*
3690	* convert_to_scalar
3691	* Convert non-NULL values of the indicated types to the comparison
3692	* scale needed by scalarineqsel().
3693	* Returns "true" if successful.
3694	*
3695	* XXX this routine is a hack: ideally we should look up the conversion
3696	* subroutines in pg_type.
3697	*
3698	* All numeric datatypes are simply converted to their equivalent
3699	* "double" values. (NUMERIC values that are outside the range of "double"
3700	* are clamped to +/- HUGE_VAL.)
3701	*
3702	* String datatypes are converted by convert_string_to_scalar(),
3703	* which is explained below. The reason why this routine deals with
3704	* three values at a time, not just one, is that we need it for strings.
3705	*
3706	* The bytea datatype is just enough different from strings that it has
3707	* to be treated separately.
3708	*
3709	* The several datatypes representing absolute times are all converted
3710	* to Timestamp, which is actually a double, and then we just use that
3711	* double value. Note this will give correct results even for the "special"
3712	* values of Timestamp, since those are chosen to compare correctly;
3713	* see timestamp_cmp.
3714	*
3715	* The several datatypes representing relative times (intervals) are all
3716	* converted to measurements expressed in seconds.
3717	*/
3718	static bool
3719	convert_to_scalar(Datum value, Oid valuetypid, Oid collid, double *scaledvalue,
3720	Datum lobound, Datum hibound, Oid boundstypid,
3721	double scaledlobound, double* *scaledhibound)
3722	{
3723	bool failure = false;
3724
3725	/*
3726	* Both the valuetypid and the boundstypid should exactly match the
3727	* declared input type(s) of the operator we are invoked for. However,
3728	* extensions might try to use scalarineqsel as estimator for operators
3729	* with input type(s) we don't handle here; in such cases, we want to
3730	* return false, not fail. In any case, we mustn't assume that valuetypid
3731	* and boundstypid are identical.
3732	*
3733	* XXX The histogram we are interpolating between points of could belong
3734	* to a column that's only binary-compatible with the declared type. In
3735	* essence we are assuming that the semantics of binary-compatible types
3736	* are enough alike that we can use a histogram generated with one type's
3737	* operators to estimate selectivity for the other's. This is outright
3738	* wrong in some cases --- in particular signed versus unsigned
3739	* interpretation could trip us up. But it's useful enough in the
3740	* majority of cases that we do it anyway. Should think about more
3741	* rigorous ways to do it.
3742	*/
3743	switch (valuetypid)
3744	{
3745	/*
3746	* Built-in numeric types
3747	*/
3748	case BOOLOID:
3749	case INT2OID:
3750	case INT4OID:
3751	case INT8OID:
3752	case FLOAT4OID:
3753	case FLOAT8OID:
3754	case NUMERICOID:
3755	case OIDOID:
3756	case REGPROCOID:
3757	case REGPROCEDUREOID:
3758	case REGOPEROID:
3759	case REGOPERATOROID:
3760	case REGCLASSOID:
3761	case REGTYPEOID:
3762	case REGCONFIGOID:
3763	case REGDICTIONARYOID:
3764	case REGROLEOID:
3765	case REGNAMESPACEOID:
3766	*scaledvalue = convert_numeric_to_scalar(value, valuetypid,
3767	&failure);
3768	*scaledlobound = convert_numeric_to_scalar(lobound, boundstypid,
3769	&failure);
3770	*scaledhibound = convert_numeric_to_scalar(hibound, boundstypid,
3771	&failure);
3772	return !failure;
3773
3774	/*
3775	* Built-in string types
3776	*/
3777	case CHAROID:
3778	case BPCHAROID:
3779	case VARCHAROID:
3780	case TEXTOID:
3781	case NAMEOID:
3782	{
3783	char *valstr = convert_string_datum(value, valuetypid,
3784	collid, &failure);
3785	char *lostr = convert_string_datum(lobound, boundstypid,
3786	collid, &failure);
3787	char *histr = convert_string_datum(hibound, boundstypid,
3788	collid, &failure);
3789
3790	/*
3791	* Bail out if any of the values is not of string type. We
3792	* might leak converted strings for the other value(s), but
3793	* that's not worth troubling over.
3794	*/
3795	if (failure)
3796	return false;
3797
3798	convert_string_to_scalar(valstr, scaledvalue,
3799	lostr, scaledlobound,
3800	histr, scaledhibound);
3801	pfree(valstr);
3802	pfree(lostr);
3803	pfree(histr);
3804	return true;
3805	}
3806
3807	/*
3808	* Built-in bytea type
3809	*/
3810	case BYTEAOID:
3811	{
3812	/ We only support bytea vs bytea comparison /
3813	if (boundstypid != BYTEAOID)
3814	return false;
3815	convert_bytea_to_scalar(value, scaledvalue,
3816	lobound, scaledlobound,
3817	hibound, scaledhibound);
3818	return true;
3819	}
3820
3821	/*
3822	* Built-in time types
3823	*/
3824	case TIMESTAMPOID:
3825	case TIMESTAMPTZOID:
3826	case DATEOID:
3827	case INTERVALOID:
3828	case TIMEOID:
3829	case TIMETZOID:
3830	*scaledvalue = convert_timevalue_to_scalar(value, valuetypid,
3831	&failure);
3832	*scaledlobound = convert_timevalue_to_scalar(lobound, boundstypid,
3833	&failure);
3834	*scaledhibound = convert_timevalue_to_scalar(hibound, boundstypid,
3835	&failure);
3836	return !failure;
3837
3838	/*
3839	* Built-in network types
3840	*/
3841	case INETOID:
3842	case CIDROID:
3843	case MACADDROID:
3844	case MACADDR8OID:
3845	*scaledvalue = convert_network_to_scalar(value, valuetypid,
3846	&failure);
3847	*scaledlobound = convert_network_to_scalar(lobound, boundstypid,
3848	&failure);
3849	*scaledhibound = convert_network_to_scalar(hibound, boundstypid,
3850	&failure);
3851	return !failure;
3852	}
3853	/ Don't know how to convert /
3854	scaledvalue = scaledlobound = *scaledhibound = `0`;
3855	return false;
3856	}
3857
3858	/*
3859	* Do convert_to_scalar()'s work for any numeric data type.
3860	*
3861	* On failure (e.g., unsupported typid), set *failure to true;
3862	* otherwise, that variable is not changed.
3863	*/
3864	static double
3865	convert_numeric_to_scalar(Datum value, Oid typid, bool *failure)
3866	{
3867	switch (typid)
3868	{
3869	case BOOLOID:
3870	return (double) DatumGetBool(value);
3871	case INT2OID:
3872	return (double) DatumGetInt16(value);
3873	case INT4OID:
3874	return (double) DatumGetInt32(value);
3875	case INT8OID:
3876	return (double) DatumGetInt64(value);
3877	case FLOAT4OID:
3878	return (double) DatumGetFloat4(value);
3879	case FLOAT8OID:
3880	return (double) DatumGetFloat8(value);
3881	case NUMERICOID:
3882	/ Note: out-of-range values will be clamped to +-HUGE_VAL /
3883	return (double)
3884	DatumGetFloat8(DirectFunctionCall1(numeric_float8_no_overflow,
3885	value));
3886	case OIDOID:
3887	case REGPROCOID:
3888	case REGPROCEDUREOID:
3889	case REGOPEROID:
3890	case REGOPERATOROID:
3891	case REGCLASSOID:
3892	case REGTYPEOID:
3893	case REGCONFIGOID:
3894	case REGDICTIONARYOID:
3895	case REGROLEOID:
3896	case REGNAMESPACEOID:
3897	/ we can treat OIDs as integers... /
3898	return (double) DatumGetObjectId(value);
3899	}
3900
3901	*failure = true;
3902	return `0`;
3903	}
3904
3905	/*
3906	* Do convert_to_scalar()'s work for any character-string data type.
3907	*
3908	* String datatypes are converted to a scale that ranges from 0 to 1,
3909	* where we visualize the bytes of the string as fractional digits.
3910	*
3911	* We do not want the base to be 256, however, since that tends to
3912	* generate inflated selectivity estimates; few databases will have
3913	* occurrences of all 256 possible byte values at each position.
3914	* Instead, use the smallest and largest byte values seen in the bounds
3915	* as the estimated range for each byte, after some fudging to deal with
3916	* the fact that we probably aren't going to see the full range that way.
3917	*
3918	* An additional refinement is that we discard any common prefix of the
3919	* three strings before computing the scaled values. This allows us to
3920	* "zoom in" when we encounter a narrow data range. An example is a phone
3921	* number database where all the values begin with the same area code.
3922	* (Actually, the bounds will be adjacent histogram-bin-boundary values,
3923	* so this is more likely to happen than you might think.)
3924	*/
3925	static void
3926	convert_string_to_scalar(char *value,
3927	double *scaledvalue,
3928	char *lobound,
3929	double *scaledlobound,
3930	char *hibound,
3931	double *scaledhibound)
3932	{
3933	int rangelo,
3934	rangehi;
3935	char *sptr;
3936
3937	rangelo = rangehi = (unsigned char) hibound[`0`];
3938	for (sptr = lobound; *sptr; sptr++)
3939	{
3940	if (rangelo > (unsigned char) *sptr)
3941	rangelo = (unsigned char) *sptr;
3942	if (rangehi < (unsigned char) *sptr)
3943	rangehi = (unsigned char) *sptr;
3944	}
3945	for (sptr = hibound; *sptr; sptr++)
3946	{
3947	if (rangelo > (unsigned char) *sptr)
3948	rangelo = (unsigned char) *sptr;
3949	if (rangehi < (unsigned char) *sptr)
3950	rangehi = (unsigned char) *sptr;
3951	}
3952	/ If range includes any upper-case ASCII chars, make it include all /
3953	if (rangelo <= `'Z'` && rangehi >= `'A'`)
3954	{
3955	if (rangelo > `'A'`)
3956	rangelo = `'A'`;
3957	if (rangehi < `'Z'`)
3958	rangehi = `'Z'`;
3959	}
3960	/ Ditto lower-case /
3961	if (rangelo <= `'z'` && rangehi >= `'a'`)
3962	{
3963	if (rangelo > `'a'`)
3964	rangelo = `'a'`;
3965	if (rangehi < `'z'`)
3966	rangehi = `'z'`;
3967	}
3968	/ Ditto digits /
3969	if (rangelo <= `'9'` && rangehi >= `'0'`)
3970	{
3971	if (rangelo > `'0'`)
3972	rangelo = `'0'`;
3973	if (rangehi < `'9'`)
3974	rangehi = `'9'`;
3975	}
3976
3977	/*
3978	* If range includes less than 10 chars, assume we have not got enough
3979	* data, and make it include regular ASCII set.
3980	*/
3981	if (rangehi - rangelo < `9`)
3982	{
3983	rangelo = `' '`;
3984	rangehi = `127`;
3985	}
3986
3987	/*
3988	* Now strip any common prefix of the three strings.
3989	*/
3990	while (*lobound)
3991	{
3992	if (lobound != hibound \|\| lobound != value)
3993	break;
3994	lobound++, hibound++, value++;
3995	}
3996
3997	/*
3998	* Now we can do the conversions.
3999	*/
4000	*scaledvalue = convert_one_string_to_scalar(value, rangelo, rangehi);
4001	*scaledlobound = convert_one_string_to_scalar(lobound, rangelo, rangehi);
4002	*scaledhibound = convert_one_string_to_scalar(hibound, rangelo, rangehi);
4003	}
4004
4005	static double
4006	convert_one_string_to_scalar(char value, int* rangelo, int rangehi)
4007	{
4008	int slen = strlen(value);
4009	double num,
4010	denom,
4011	base;
4012
4013	if (slen <= `0`)
4014	return `0.0`; / empty string has scalar value 0 /
4015
4016	/*
4017	* There seems little point in considering more than a dozen bytes from
4018	* the string. Since base is at least 10, that will give us nominal
4019	* resolution of at least 12 decimal digits, which is surely far more
4020	* precision than this estimation technique has got anyway (especially in
4021	* non-C locales). Also, even with the maximum possible base of 256, this
4022	* ensures denom cannot grow larger than 256^13 = 2.03e31, which will not
4023	* overflow on any known machine.
4024	*/
4025	if (slen > `12`)
4026	slen = `12`;
4027
4028	/ Convert initial characters to fraction /
4029	base = rangehi - rangelo + `1`;
4030	num = `0.0`;
4031	denom = base;
4032	while (slen-- > `0`)
4033	{
4034	int ch = (unsigned char) *value++;
4035
4036	if (ch < rangelo)
4037	ch = rangelo - `1`;
4038	else if (ch > rangehi)
4039	ch = rangehi + `1`;
4040	num += ((double) (ch - rangelo)) / denom;
4041	denom *= base;
4042	}
4043
4044	return num;
4045	}
4046
4047	/*
4048	* Convert a string-type Datum into a palloc'd, null-terminated string.
4049	*
4050	* On failure (e.g., unsupported typid), set *failure to true;
4051	* otherwise, that variable is not changed. (We'll return NULL on failure.)
4052	*
4053	* When using a non-C locale, we must pass the string through strxfrm()
4054	* before continuing, so as to generate correct locale-specific results.
4055	*/
4056	static char *
4057	convert_string_datum(Datum value, Oid typid, Oid collid, bool *failure)
4058	{
4059	char *val;
4060
4061	switch (typid)
4062	{
4063	case CHAROID:
4064	val = (char *) palloc(`2`);
4065	val[`0`] = DatumGetChar(value);
4066	val[`1`] = `'\0'`;
4067	break;
4068	case BPCHAROID:
4069	case VARCHAROID:
4070	case TEXTOID:
4071	val = TextDatumGetCString(value);
4072	break;
4073	case NAMEOID:
4074	{
4075	NameData nm = (NameData ) DatumGetPointer(value);
4076
4077	val = pstrdup(NameStr(*nm));
4078	break;
4079	}
4080	default:
4081	*failure = true;
4082	return NULL;
4083	}
4084
4085	if (!lc_collate_is_c(collid))
4086	{
4087	char *xfrmstr;
4088	size_t xfrmlen;
4089	size_t xfrmlen2 PG_USED_FOR_ASSERTS_ONLY;
4090
4091	/*
4092	* XXX: We could guess at a suitable output buffer size and only call
4093	* strxfrm twice if our guess is too small.
4094	*
4095	* XXX: strxfrm doesn't support UTF-8 encoding on Win32, it can return
4096	* bogus data or set an error. This is not really a problem unless it
4097	* crashes since it will only give an estimation error and nothing
4098	* fatal.
4099	*/
4100	#if _MSC_VER == 1400 /* VS.Net 2005 */
4101
4102	/*
4103	*
4104	* http://connect.microsoft.com/VisualStudio/feedback/ViewFeedback.aspx?FeedbackID=99694
4105	*/
4106	{
4107	char x[`1`];
4108
4109	xfrmlen = strxfrm(x, val, `0`);
4110	}
4111	#else
4112	xfrmlen = strxfrm(NULL, val, `0`);
4113	#endif
4114	#ifdef WIN32
4115
4116	/*
4117	* On Windows, strxfrm returns INT_MAX when an error occurs. Instead
4118	* of trying to allocate this much memory (and fail), just return the
4119	* original string unmodified as if we were in the C locale.
4120	*/
4121	if (xfrmlen == INT_MAX)
4122	return val;
4123	#endif
4124	xfrmstr = (char *) palloc(xfrmlen + `1`);
4125	xfrmlen2 = strxfrm(xfrmstr, val, xfrmlen + `1`);
4126
4127	/*
4128	* Some systems (e.g., glibc) can return a smaller value from the
4129	* second call than the first; thus the Assert must be <= not ==.
4130	*/
4131	Assert(xfrmlen2 <= xfrmlen);
4132	pfree(val);
4133	val = xfrmstr;
4134	}
4135
4136	return val;
4137	}
4138
4139	/*
4140	* Do convert_to_scalar()'s work for any bytea data type.
4141	*
4142	* Very similar to convert_string_to_scalar except we can't assume
4143	* null-termination and therefore pass explicit lengths around.
4144	*
4145	* Also, assumptions about likely "normal" ranges of characters have been
4146	* removed - a data range of 0..255 is always used, for now. (Perhaps
4147	* someday we will add information about actual byte data range to
4148	* pg_statistic.)
4149	*/
4150	static void
4151	convert_bytea_to_scalar(Datum value,
4152	double *scaledvalue,
4153	Datum lobound,
4154	double *scaledlobound,
4155	Datum hibound,
4156	double *scaledhibound)
4157	{
4158	bytea *valuep = DatumGetByteaPP(value);
4159	bytea *loboundp = DatumGetByteaPP(lobound);
4160	bytea *hiboundp = DatumGetByteaPP(hibound);
4161	int rangelo,
4162	rangehi,
4163	valuelen = VARSIZE_ANY_EXHDR(valuep),
4164	loboundlen = VARSIZE_ANY_EXHDR(loboundp),
4165	hiboundlen = VARSIZE_ANY_EXHDR(hiboundp),
4166	i,
4167	minlen;
4168	unsigned char valstr = (unsigned* char *) VARDATA_ANY(valuep);
4169	unsigned char lostr = (unsigned* char *) VARDATA_ANY(loboundp);
4170	unsigned char histr = (unsigned* char *) VARDATA_ANY(hiboundp);
4171
4172	/*
4173	* Assume bytea data is uniformly distributed across all byte values.
4174	*/
4175	rangelo = `0`;
4176	rangehi = `255`;
4177
4178	/*
4179	* Now strip any common prefix of the three strings.
4180	*/
4181	minlen = Min(Min(valuelen, loboundlen), hiboundlen);
4182	for (i = `0`; i < minlen; i++)
4183	{
4184	if (lostr != histr \|\| lostr != valstr)
4185	break;
4186	lostr++, histr++, valstr++;
4187	loboundlen--, hiboundlen--, valuelen--;
4188	}
4189
4190	/*
4191	* Now we can do the conversions.
4192	*/
4193	*scaledvalue = convert_one_bytea_to_scalar(valstr, valuelen, rangelo, rangehi);
4194	*scaledlobound = convert_one_bytea_to_scalar(lostr, loboundlen, rangelo, rangehi);
4195	*scaledhibound = convert_one_bytea_to_scalar(histr, hiboundlen, rangelo, rangehi);
4196	}
4197
4198	static double
4199	convert_one_bytea_to_scalar(unsigned char value, int* valuelen,
4200	int rangelo, int rangehi)
4201	{
4202	double num,
4203	denom,
4204	base;
4205
4206	if (valuelen <= `0`)
4207	return `0.0`; / empty string has scalar value 0 /
4208
4209	/*
4210	* Since base is 256, need not consider more than about 10 chars (even
4211	* this many seems like overkill)
4212	*/
4213	if (valuelen > `10`)
4214	valuelen = `10`;
4215
4216	/ Convert initial characters to fraction /
4217	base = rangehi - rangelo + `1`;
4218	num = `0.0`;
4219	denom = base;
4220	while (valuelen-- > `0`)
4221	{
4222	int ch = *value++;
4223
4224	if (ch < rangelo)
4225	ch = rangelo - `1`;
4226	else if (ch > rangehi)
4227	ch = rangehi + `1`;
4228	num += ((double) (ch - rangelo)) / denom;
4229	denom *= base;
4230	}
4231
4232	return num;
4233	}
4234
4235	/*
4236	* Do convert_to_scalar()'s work for any timevalue data type.
4237	*
4238	* On failure (e.g., unsupported typid), set *failure to true;
4239	* otherwise, that variable is not changed.
4240	*/
4241	static double
4242	convert_timevalue_to_scalar(Datum value, Oid typid, bool *failure)
4243	{
4244	switch (typid)
4245	{
4246	case TIMESTAMPOID:
4247	return DatumGetTimestamp(value);
4248	case TIMESTAMPTZOID:
4249	return DatumGetTimestampTz(value);
4250	case DATEOID:
4251	return date2timestamp_no_overflow(DatumGetDateADT(value));
4252	case INTERVALOID:
4253	{
4254	Interval *interval = DatumGetIntervalP(value);
4255
4256	/*
4257	* Convert the month part of Interval to days using assumed
4258	* average month length of 365.25/12.0 days. Not too
4259	* accurate, but plenty good enough for our purposes.
4260	*/
4261	return interval->time + interval->day * (double) USECS_PER_DAY +
4262	interval->month * ((DAYS_PER_YEAR / (double) MONTHS_PER_YEAR) * USECS_PER_DAY);
4263	}
4264	case TIMEOID:
4265	return DatumGetTimeADT(value);
4266	case TIMETZOID:
4267	{
4268	TimeTzADT *timetz = DatumGetTimeTzADTP(value);
4269
4270	/ use GMT-equivalent time /
4271	return (double) (timetz->time + (timetz->zone * `1000000.0`));
4272	}
4273	}
4274
4275	*failure = true;
4276	return `0`;
4277	}
4278
4279
4280	/*
4281	* get_restriction_variable
4282	* Examine the args of a restriction clause to see if it's of the
4283	* form (variable op pseudoconstant) or (pseudoconstant op variable),
4284	* where "variable" could be either a Var or an expression in vars of a
4285	* single relation. If so, extract information about the variable,
4286	* and also indicate which side it was on and the other argument.
4287	*
4288	* Inputs:
4289	* root: the planner info
4290	* args: clause argument list
4291	* varRelid: see specs for restriction selectivity functions
4292	*
4293	* Outputs: (these are valid only if true is returned)
4294	* *vardata: gets information about variable (see examine_variable)
4295	* *other: gets other clause argument, aggressively reduced to a constant
4296	* *varonleft: set true if variable is on the left, false if on the right
4297	*
4298	* Returns true if a variable is identified, otherwise false.
4299	*
4300	* Note: if there are Vars on both sides of the clause, we must fail, because
4301	* callers are expecting that the other side will act like a pseudoconstant.
4302	*/
4303	bool
4304	get_restriction_variable(PlannerInfo root, List args, int varRelid,
4305	VariableStatData vardata, Node *other,
4306	bool *varonleft)
4307	{
4308	Node *left,
4309	*right;
4310	VariableStatData rdata;
4311
4312	/ Fail if not a binary opclause (probably shouldn't happen) /
4313	if (list_length(args) != `2`)
4314	return false;
4315
4316	left = (Node *) linitial(args);
4317	right = (Node *) lsecond(args);
4318
4319	/*
4320	* Examine both sides. Note that when varRelid is nonzero, Vars of other
4321	* relations will be treated as pseudoconstants.
4322	*/
4323	examine_variable(root, left, varRelid, vardata);
4324	examine_variable(root, right, varRelid, &rdata);
4325
4326	/*
4327	* If one side is a variable and the other not, we win.
4328	*/
4329	if (vardata->rel && rdata.rel == NULL)
4330	{
4331	*varonleft = true;
4332	*other = estimate_expression_value(root, rdata.var);
4333	/ Assume we need no ReleaseVariableStats(rdata) here /
4334	return true;
4335	}
4336
4337	if (vardata->rel == NULL && rdata.rel)
4338	{
4339	*varonleft = false;
4340	*other = estimate_expression_value(root, vardata->var);
4341	/ Assume we need no ReleaseVariableStats(vardata) here /*
4342	*vardata = rdata;
4343	return true;
4344	}
4345
4346	/ Oops, clause has wrong structure (probably var op var) /
4347	ReleaseVariableStats(*vardata);
4348	ReleaseVariableStats(rdata);
4349
4350	return false;
4351	}
4352
4353	/*
4354	* get_join_variables
4355	* Apply examine_variable() to each side of a join clause.
4356	* Also, attempt to identify whether the join clause has the same
4357	* or reversed sense compared to the SpecialJoinInfo.
4358	*
4359	* We consider the join clause "normal" if it is "lhs_var OP rhs_var",
4360	* or "reversed" if it is "rhs_var OP lhs_var". In complicated cases
4361	* where we can't tell for sure, we default to assuming it's normal.
4362	*/
4363	void
4364	get_join_variables(PlannerInfo root, List args, SpecialJoinInfo *sjinfo,
4365	VariableStatData vardata1, VariableStatData vardata2,
4366	bool *join_is_reversed)
4367	{
4368	Node *left,
4369	*right;
4370
4371	if (list_length(args) != `2`)
4372	elog(ERROR, "join operator should take two arguments");
4373
4374	left = (Node *) linitial(args);
4375	right = (Node *) lsecond(args);
4376
4377	examine_variable(root, left, `0`, vardata1);
4378	examine_variable(root, right, `0`, vardata2);
4379
4380	if (vardata1->rel &&
4381	bms_is_subset(vardata1->rel->relids, sjinfo->syn_righthand))
4382	join_is_reversed = true; /* var1 is on RHS /
4383	else if (vardata2->rel &&
4384	bms_is_subset(vardata2->rel->relids, sjinfo->syn_lefthand))
4385	join_is_reversed = true; /* var2 is on LHS /
4386	else
4387	*join_is_reversed = false;
4388	}
4389
4390	/*
4391	* examine_variable
4392	* Try to look up statistical data about an expression.
4393	* Fill in a VariableStatData struct to describe the expression.
4394	*
4395	* Inputs:
4396	* root: the planner info
4397	* node: the expression tree to examine
4398	* varRelid: see specs for restriction selectivity functions
4399	*
4400	* Outputs: *vardata is filled as follows:
4401	* var: the input expression (with any binary relabeling stripped, if
4402	* it is or contains a variable; but otherwise the type is preserved)
4403	* rel: RelOptInfo for relation containing variable; NULL if expression
4404	* contains no Vars (NOTE this could point to a RelOptInfo of a
4405	* subquery, not one in the current query).
4406	* statsTuple: the pg_statistic entry for the variable, if one exists;
4407	* otherwise NULL.
4408	* freefunc: pointer to a function to release statsTuple with.
4409	* vartype: exposed type of the expression; this should always match
4410	* the declared input type of the operator we are estimating for.
4411	* atttype, atttypmod: actual type/typmod of the "var" expression. This is
4412	* commonly the same as the exposed type of the variable argument,
4413	* but can be different in binary-compatible-type cases.
4414	* isunique: true if we were able to match the var to a unique index or a
4415	* single-column DISTINCT clause, implying its values are unique for
4416	* this query. (Caution: this should be trusted for statistical
4417	* purposes only, since we do not check indimmediate nor verify that
4418	* the exact same definition of equality applies.)
4419	* acl_ok: true if current user has permission to read the column(s)
4420	* underlying the pg_statistic entry. This is consulted by
4421	* statistic_proc_security_check().
4422	*
4423	* Caller is responsible for doing ReleaseVariableStats() before exiting.
4424	*/
4425	void
4426	examine_variable(PlannerInfo root, Node node, int varRelid,
4427	VariableStatData *vardata)
4428	{
4429	Node *basenode;
4430	Relids varnos;
4431	RelOptInfo *onerel;
4432
4433	/ Make sure we don't return dangling pointers in vardata /
4434	MemSet(vardata, `0`, sizeof(VariableStatData));
4435
4436	/ Save the exposed type of the expression /
4437	vardata->vartype = exprType(node);
4438
4439	/ Look inside any binary-compatible relabeling /
4440
4441	if (IsA(node, RelabelType))
4442	basenode = (Node ) ((RelabelType ) node)->arg;
4443	else
4444	basenode = node;
4445
4446	/ Fast path for a simple Var /
4447
4448	if (IsA(basenode, Var) &&
4449	(varRelid == `0` \|\| varRelid == ((Var *) basenode)->varno))
4450	{
4451	Var var = (Var ) basenode;
4452
4453	/ Set up result fields other than the stats tuple /
4454	vardata->var = basenode; / return Var without relabeling /
4455	vardata->rel = find_base_rel(root, var->varno);
4456	vardata->atttype = var->vartype;
4457	vardata->atttypmod = var->vartypmod;
4458	vardata->isunique = has_unique_index(vardata->rel, var->varattno);
4459
4460	/ Try to locate some stats /
4461	examine_simple_variable(root, var, vardata);
4462
4463	return;
4464	}
4465
4466	/*
4467	* Okay, it's a more complicated expression. Determine variable
4468	* membership. Note that when varRelid isn't zero, only vars of that
4469	* relation are considered "real" vars.
4470	*/
4471	varnos = pull_varnos(basenode);
4472
4473	onerel = NULL;
4474
4475	switch (bms_membership(varnos))
4476	{
4477	case BMS_EMPTY_SET:
4478	/ No Vars at all ... must be pseudo-constant clause /
4479	break;
4480	case BMS_SINGLETON:
4481	if (varRelid == `0` \|\| bms_is_member(varRelid, varnos))
4482	{
4483	onerel = find_base_rel(root,
4484	(varRelid ? varRelid : bms_singleton_member(varnos)));
4485	vardata->rel = onerel;
4486	node = basenode; / strip any relabeling /
4487	}
4488	/ else treat it as a constant /
4489	break;
4490	case BMS_MULTIPLE:
4491	if (varRelid == `0`)
4492	{
4493	/ treat it as a variable of a join relation /
4494	vardata->rel = find_join_rel(root, varnos);
4495	node = basenode; / strip any relabeling /
4496	}
4497	else if (bms_is_member(varRelid, varnos))
4498	{
4499	/ ignore the vars belonging to other relations /
4500	vardata->rel = find_base_rel(root, varRelid);
4501	node = basenode; / strip any relabeling /
4502	/ note: no point in expressional-index search here /
4503	}
4504	/ else treat it as a constant /
4505	break;
4506	}
4507
4508	bms_free(varnos);
4509
4510	vardata->var = node;
4511	vardata->atttype = exprType(node);
4512	vardata->atttypmod = exprTypmod(node);
4513
4514	if (onerel)
4515	{
4516	/*
4517	* We have an expression in vars of a single relation. Try to match
4518	* it to expressional index columns, in hopes of finding some
4519	* statistics.
4520	*
4521	* Note that we consider all index columns including INCLUDE columns,
4522	* since there could be stats for such columns. But the test for
4523	* uniqueness needs to be warier.
4524	*
4525	* XXX it's conceivable that there are multiple matches with different
4526	* index opfamilies; if so, we need to pick one that matches the
4527	* operator we are estimating for. FIXME later.
4528	*/
4529	ListCell *ilist;
4530
4531	foreach(ilist, onerel->indexlist)
4532	{
4533	IndexOptInfo index = (IndexOptInfo ) lfirst(ilist);
4534	ListCell *indexpr_item;
4535	int pos;
4536
4537	indexpr_item = list_head(index->indexprs);
4538	if (indexpr_item == NULL)
4539	continue; / no expressions here... /
4540
4541	for (pos = `0`; pos < index->ncolumns; pos++)
4542	{
4543	if (index->indexkeys[pos] == `0`)
4544	{
4545	Node *indexkey;
4546
4547	if (indexpr_item == NULL)
4548	elog(ERROR, "too few entries in indexprs list");
4549	indexkey = (Node *) lfirst(indexpr_item);
4550	if (indexkey && IsA(indexkey, RelabelType))
4551	indexkey = (Node ) ((RelabelType ) indexkey)->arg;
4552	if (equal(node, indexkey))
4553	{
4554	/*
4555	* Found a match ... is it a unique index? Tests here
4556	* should match has_unique_index().
4557	*/
4558	if (index->unique &&
4559	index->nkeycolumns == `1` &&
4560	pos == `0` &&
4561	(index->indpred == NIL \|\| index->predOK))
4562	vardata->isunique = true;
4563
4564	/*
4565	* Has it got stats? We only consider stats for
4566	* non-partial indexes, since partial indexes probably
4567	* don't reflect whole-relation statistics; the above
4568	* check for uniqueness is the only info we take from
4569	* a partial index.
4570	*
4571	* An index stats hook, however, must make its own
4572	* decisions about what to do with partial indexes.
4573	*/
4574	if (get_index_stats_hook &&
4575	(*get_index_stats_hook) (root, index->indexoid,
4576	pos + `1`, vardata))
4577	{
4578	/*
4579	* The hook took control of acquiring a stats
4580	* tuple. If it did supply a tuple, it'd better
4581	* have supplied a freefunc.
4582	*/
4583	if (HeapTupleIsValid(vardata->statsTuple) &&
4584	!vardata->freefunc)
4585	elog(ERROR, "no function provided to release variable stats with");
4586	}
4587	else if (index->indpred == NIL)
4588	{
4589	vardata->statsTuple =
4590	SearchSysCache3(STATRELATTINH,
4591	ObjectIdGetDatum(index->indexoid),
4592	Int16GetDatum(pos + `1`),
4593	BoolGetDatum(false));
4594	vardata->freefunc = ReleaseSysCache;
4595
4596	if (HeapTupleIsValid(vardata->statsTuple))
4597	{
4598	/ Get index's table for permission check /
4599	RangeTblEntry *rte;
4600	Oid userid;
4601
4602	rte = planner_rt_fetch(index->rel->relid, root);
4603	Assert(rte->rtekind == RTE_RELATION);
4604
4605	/*
4606	* Use checkAsUser if it's set, in case we're
4607	* accessing the table via a view.
4608	*/
4609	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4610
4611	/*
4612	* For simplicity, we insist on the whole
4613	* table being selectable, rather than trying
4614	* to identify which column(s) the index
4615	* depends on. Also require all rows to be
4616	* selectable --- there must be no
4617	* securityQuals from security barrier views
4618	* or RLS policies.
4619	*/
4620	vardata->acl_ok =
4621	rte->securityQuals == NIL &&
4622	(pg_class_aclcheck(rte->relid, userid,
4623	ACL_SELECT) == ACLCHECK_OK);
4624	}
4625	else
4626	{
4627	/ suppress leakproofness checks later /
4628	vardata->acl_ok = true;
4629	}
4630	}
4631	if (vardata->statsTuple)
4632	break;
4633	}
4634	indexpr_item = lnext(indexpr_item);
4635	}
4636	}
4637	if (vardata->statsTuple)
4638	break;
4639	}
4640	}
4641	}
4642
4643	/*
4644	* examine_simple_variable
4645	* Handle a simple Var for examine_variable
4646	*
4647	* This is split out as a subroutine so that we can recurse to deal with
4648	* Vars referencing subqueries.
4649	*
4650	* We already filled in all the fields of *vardata except for the stats tuple.
4651	*/
4652	static void
4653	examine_simple_variable(PlannerInfo root, Var var,
4654	VariableStatData *vardata)
4655	{
4656	RangeTblEntry *rte = root->simple_rte_array[var->varno];
4657
4658	Assert(IsA(rte, RangeTblEntry));
4659
4660	if (get_relation_stats_hook &&
4661	(*get_relation_stats_hook) (root, rte, var->varattno, vardata))
4662	{
4663	/*
4664	* The hook took control of acquiring a stats tuple. If it did supply
4665	* a tuple, it'd better have supplied a freefunc.
4666	*/
4667	if (HeapTupleIsValid(vardata->statsTuple) &&
4668	!vardata->freefunc)
4669	elog(ERROR, "no function provided to release variable stats with");
4670	}
4671	else if (rte->rtekind == RTE_RELATION)
4672	{
4673	/*
4674	* Plain table or parent of an inheritance appendrel, so look up the
4675	* column in pg_statistic
4676	*/
4677	vardata->statsTuple = SearchSysCache3(STATRELATTINH,
4678	ObjectIdGetDatum(rte->relid),
4679	Int16GetDatum(var->varattno),
4680	BoolGetDatum(rte->inh));
4681	vardata->freefunc = ReleaseSysCache;
4682
4683	if (HeapTupleIsValid(vardata->statsTuple))
4684	{
4685	Oid userid;
4686
4687	/*
4688	* Check if user has permission to read this column. We require
4689	* all rows to be accessible, so there must be no securityQuals
4690	* from security barrier views or RLS policies. Use checkAsUser
4691	* if it's set, in case we're accessing the table via a view.
4692	*/
4693	userid = rte->checkAsUser ? rte->checkAsUser : GetUserId();
4694
4695	vardata->acl_ok =
4696	rte->securityQuals == NIL &&
4697	((pg_class_aclcheck(rte->relid, userid,
4698	ACL_SELECT) == ACLCHECK_OK) \|\|
4699	(pg_attribute_aclcheck(rte->relid, var->varattno, userid,
4700	ACL_SELECT) == ACLCHECK_OK));
4701	}
4702	else
4703	{
4704	/ suppress any possible leakproofness checks later /
4705	vardata->acl_ok = true;
4706	}
4707	}
4708	else if (rte->rtekind == RTE_SUBQUERY && !rte->inh)
4709	{
4710	/*
4711	* Plain subquery (not one that was converted to an appendrel).
4712	*/
4713	Query *subquery = rte->subquery;
4714	RelOptInfo *rel;
4715	TargetEntry *ste;
4716
4717	/*
4718	* Punt if it's a whole-row var rather than a plain column reference.
4719	*/
4720	if (var->varattno == InvalidAttrNumber)
4721	return;
4722
4723	/*
4724	* Punt if subquery uses set operations or GROUP BY, as these will
4725	* mash underlying columns' stats beyond recognition. (Set ops are
4726	* particularly nasty; if we forged ahead, we would return stats
4727	* relevant to only the leftmost subselect...) DISTINCT is also
4728	* problematic, but we check that later because there is a possibility
4729	* of learning something even with it.
4730	*/
4731	if (subquery->setOperations \|\|
4732	subquery->groupClause)
4733	return;
4734
4735	/*
4736	* OK, fetch RelOptInfo for subquery. Note that we don't change the
4737	* rel returned in vardata, since caller expects it to be a rel of the
4738	* caller's query level. Because we might already be recursing, we
4739	* can't use that rel pointer either, but have to look up the Var's
4740	* rel afresh.
4741	*/
4742	rel = find_base_rel(root, var->varno);
4743
4744	/ If the subquery hasn't been planned yet, we have to punt /
4745	if (rel->subroot == NULL)
4746	return;
4747	Assert(IsA(rel->subroot, PlannerInfo));
4748
4749	/*
4750	* Switch our attention to the subquery as mangled by the planner. It
4751	* was okay to look at the pre-planning version for the tests above,
4752	* but now we need a Var that will refer to the subroot's live
4753	* RelOptInfos. For instance, if any subquery pullup happened during
4754	* planning, Vars in the targetlist might have gotten replaced, and we
4755	* need to see the replacement expressions.
4756	*/
4757	subquery = rel->subroot->parse;
4758	Assert(IsA(subquery, Query));
4759
4760	/ Get the subquery output expression referenced by the upper Var /
4761	ste = get_tle_by_resno(subquery->targetList, var->varattno);
4762	if (ste == NULL \|\| ste->resjunk)
4763	elog(ERROR, "subquery %s does not have attribute %d",
4764	rte->eref->aliasname, var->varattno);
4765	var = (Var *) ste->expr;
4766
4767	/*
4768	* If subquery uses DISTINCT, we can't make use of any stats for the
4769	* variable ... but, if it's the only DISTINCT column, we are entitled
4770	* to consider it unique. We do the test this way so that it works
4771	* for cases involving DISTINCT ON.
4772	*/
4773	if (subquery->distinctClause)
4774	{
4775	if (list_length(subquery->distinctClause) == `1` &&
4776	targetIsInSortList(ste, InvalidOid, subquery->distinctClause))
4777	vardata->isunique = true;
4778	/ cannot go further /
4779	return;
4780	}
4781
4782	/*
4783	* If the sub-query originated from a view with the security_barrier
4784	* attribute, we must not look at the variable's statistics, though it
4785	* seems all right to notice the existence of a DISTINCT clause. So
4786	* stop here.
4787	*
4788	* This is probably a harsher restriction than necessary; it's
4789	* certainly OK for the selectivity estimator (which is a C function,
4790	* and therefore omnipotent anyway) to look at the statistics. But
4791	* many selectivity estimators will happily *invoke the operator
4792	* function* to try to work out a good estimate - and that's not OK.
4793	* So for now, don't dig down for stats.
4794	*/
4795	if (rte->security_barrier)
4796	return;
4797
4798	/ Can only handle a simple Var of subquery's query level /
4799	if (var && IsA(var, Var) &&
4800	var->varlevelsup == `0`)
4801	{
4802	/*
4803	* OK, recurse into the subquery. Note that the original setting
4804	* of vardata->isunique (which will surely be false) is left
4805	* unchanged in this situation. That's what we want, since even
4806	* if the underlying column is unique, the subquery may have
4807	* joined to other tables in a way that creates duplicates.
4808	*/
4809	examine_simple_variable(rel->subroot, var, vardata);
4810	}
4811	}
4812	else
4813	{
4814	/*
4815	* Otherwise, the Var comes from a FUNCTION, VALUES, or CTE RTE. (We
4816	* won't see RTE_JOIN here because join alias Vars have already been
4817	* flattened.) There's not much we can do with function outputs, but
4818	* maybe someday try to be smarter about VALUES and/or CTEs.
4819	*/
4820	}
4821	}
4822
4823	/*
4824	* Check whether it is permitted to call func_oid passing some of the
4825	* pg_statistic data in vardata. We allow this either if the user has SELECT
4826	* privileges on the table or column underlying the pg_statistic data or if
4827	* the function is marked leak-proof.
4828	*/
4829	bool
4830	statistic_proc_security_check(VariableStatData *vardata, Oid func_oid)
4831	{
4832	if (vardata->acl_ok)
4833	return true;
4834
4835	if (!OidIsValid(func_oid))
4836	return false;
4837
4838	if (get_func_leakproof(func_oid))
4839	return true;
4840
4841	ereport(DEBUG2,
4842	(errmsg_internal("not using statistics because function \"%s\" is not leak-proof",
4843	get_func_name(func_oid))));
4844	return false;
4845	}
4846
4847	/*
4848	* get_variable_numdistinct
4849	* Estimate the number of distinct values of a variable.
4850	*
4851	* vardata: results of examine_variable
4852	* *isdefault: set to true if the result is a default rather than based on
4853	* anything meaningful.
4854	*
4855	* NB: be careful to produce a positive integral result, since callers may
4856	* compare the result to exact integer counts, or might divide by it.
4857	*/
4858	double
4859	get_variable_numdistinct(VariableStatData vardata, bool isdefault)
4860	{
4861	double stadistinct;
4862	double stanullfrac = `0.0`;
4863	double ntuples;
4864
4865	*isdefault = false;
4866
4867	/*
4868	* Determine the stadistinct value to use. There are cases where we can
4869	* get an estimate even without a pg_statistic entry, or can get a better
4870	* value than is in pg_statistic. Grab stanullfrac too if we can find it
4871	* (otherwise, assume no nulls, for lack of any better idea).
4872	*/
4873	if (HeapTupleIsValid(vardata->statsTuple))
4874	{
4875	/ Use the pg_statistic entry /
4876	Form_pg_statistic stats;
4877
4878	stats = (Form_pg_statistic) GETSTRUCT(vardata->statsTuple);
4879	stadistinct = stats->stadistinct;
4880	stanullfrac = stats->stanullfrac;
4881	}
4882	else if (vardata->vartype == BOOLOID)
4883	{
4884	/*
4885	* Special-case boolean columns: presumably, two distinct values.
4886	*
4887	* Are there any other datatypes we should wire in special estimates
4888	* for?
4889	*/
4890	stadistinct = `2.0`;
4891	}
4892	else if (vardata->rel && vardata->rel->rtekind == RTE_VALUES)
4893	{
4894	/*
4895	* If the Var represents a column of a VALUES RTE, assume it's unique.
4896	* This could of course be very wrong, but it should tend to be true
4897	* in well-written queries. We could consider examining the VALUES'
4898	* contents to get some real statistics; but that only works if the
4899	* entries are all constants, and it would be pretty expensive anyway.
4900	*/
4901	stadistinct = -`1.0`; / unique (and all non null) /
4902	}
4903	else
4904	{
4905	/*
4906	* We don't keep statistics for system columns, but in some cases we
4907	* can infer distinctness anyway.
4908	*/
4909	if (vardata->var && IsA(vardata->var, Var))
4910	{
4911	switch (((Var *) vardata->var)->varattno)
4912	{
4913	case SelfItemPointerAttributeNumber:
4914	stadistinct = -`1.0`; / unique (and all non null) /
4915	break;
4916	case TableOidAttributeNumber:
4917	stadistinct = `1.0`; / only 1 value /
4918	break;
4919	default:
4920	stadistinct = `0.0`; / means "unknown" /
4921	break;
4922	}
4923	}
4924	else
4925	stadistinct = `0.0`; / means "unknown" /
4926
4927	/*
4928	* XXX consider using estimate_num_groups on expressions?
4929	*/
4930	}
4931
4932	/*
4933	* If there is a unique index or DISTINCT clause for the variable, assume
4934	* it is unique no matter what pg_statistic says; the statistics could be
4935	* out of date, or we might have found a partial unique index that proves
4936	* the var is unique for this query. However, we'd better still believe
4937	* the null-fraction statistic.
4938	*/
4939	if (vardata->isunique)
4940	stadistinct = -`1.0` * (`1.0` - stanullfrac);
4941
4942	/*
4943	* If we had an absolute estimate, use that.
4944	*/
4945	if (stadistinct > `0.0`)
4946	return clamp_row_est(stadistinct);
4947
4948	/*
4949	* Otherwise we need to get the relation size; punt if not available.
4950	*/
4951	if (vardata->rel == NULL)
4952	{
4953	*isdefault = true;
4954	return DEFAULT_NUM_DISTINCT;
4955	}
4956	ntuples = vardata->rel->tuples;
4957	if (ntuples <= `0.0`)
4958	{
4959	*isdefault = true;
4960	return DEFAULT_NUM_DISTINCT;
4961	}
4962
4963	/*
4964	* If we had a relative estimate, use that.
4965	*/
4966	if (stadistinct < `0.0`)
4967	return clamp_row_est(-stadistinct * ntuples);
4968
4969	/*
4970	* With no data, estimate ndistinct = ntuples if the table is small, else
4971	* use default. We use DEFAULT_NUM_DISTINCT as the cutoff for "small" so
4972	* that the behavior isn't discontinuous.
4973	*/
4974	if (ntuples < DEFAULT_NUM_DISTINCT)
4975	return clamp_row_est(ntuples);
4976
4977	*isdefault = true;
4978	return DEFAULT_NUM_DISTINCT;
4979	}
4980
4981	/*
4982	* get_variable_range
4983	* Estimate the minimum and maximum value of the specified variable.
4984	* If successful, store values in min and max, and return true.
4985	* If no data available, return false.
4986	*
4987	* sortop is the "<" comparison operator to use. This should generally
4988	* be "<" not ">", as only the former is likely to be found in pg_statistic.
4989	*/
4990	static bool
4991	get_variable_range(PlannerInfo root, VariableStatData vardata, Oid sortop,
4992	Datum min, Datum max)
4993	{
4994	Datum tmin = `0`;
4995	Datum tmax = `0`;
4996	bool have_data = false;
4997	int16 typLen;
4998	bool typByVal;
4999	Oid opfuncoid;
5000	AttStatsSlot sslot;
5001	int i;
5002
5003	/*
5004	* XXX It's very tempting to try to use the actual column min and max, if
5005	* we can get them relatively-cheaply with an index probe. However, since
5006	* this function is called many times during join planning, that could
5007	* have unpleasant effects on planning speed. Need more investigation
5008	* before enabling this.
5009	*/
5010	#ifdef NOT_USED
5011	if (get_actual_variable_range(root, vardata, sortop, min, max))
5012	return true;
5013	#endif
5014
5015	if (!HeapTupleIsValid(vardata->statsTuple))
5016	{
5017	/ no stats available, so default result /
5018	return false;
5019	}
5020
5021	/*
5022	* If we can't apply the sortop to the stats data, just fail. In
5023	* principle, if there's a histogram and no MCVs, we could return the
5024	* histogram endpoints without ever applying the sortop ... but it's
5025	* probably not worth trying, because whatever the caller wants to do with
5026	* the endpoints would likely fail the security check too.
5027	*/
5028	if (!statistic_proc_security_check(vardata,
5029	(opfuncoid = get_opcode(sortop))))
5030	return false;
5031
5032	get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5033
5034	/*
5035	* If there is a histogram, grab the first and last values.
5036	*
5037	* If there is a histogram that is sorted with some other operator than
5038	* the one we want, fail --- this suggests that there is data we can't
5039	* use.
5040	*/
5041	if (get_attstatsslot(&sslot, vardata->statsTuple,
5042	STATISTIC_KIND_HISTOGRAM, sortop,
5043	ATTSTATSSLOT_VALUES))
5044	{
5045	if (sslot.nvalues > `0`)
5046	{
5047	tmin = datumCopy(sslot.values[`0`], typByVal, typLen);
5048	tmax = datumCopy(sslot.values[sslot.nvalues - `1`], typByVal, typLen);
5049	have_data = true;
5050	}
5051	free_attstatsslot(&sslot);
5052	}
5053	else if (get_attstatsslot(&sslot, vardata->statsTuple,
5054	STATISTIC_KIND_HISTOGRAM, InvalidOid,
5055	`0`))
5056	{
5057	free_attstatsslot(&sslot);
5058	return false;
5059	}
5060
5061	/*
5062	* If we have most-common-values info, look for extreme MCVs. This is
5063	* needed even if we also have a histogram, since the histogram excludes
5064	* the MCVs. However, usually the MCVs will not be the extreme values, so
5065	* avoid unnecessary data copying.
5066	*/
5067	if (get_attstatsslot(&sslot, vardata->statsTuple,
5068	STATISTIC_KIND_MCV, InvalidOid,
5069	ATTSTATSSLOT_VALUES))
5070	{
5071	bool tmin_is_mcv = false;
5072	bool tmax_is_mcv = false;
5073	FmgrInfo opproc;
5074
5075	fmgr_info(opfuncoid, &opproc);
5076
5077	for (i = `0`; i < sslot.nvalues; i++)
5078	{
5079	if (!have_data)
5080	{
5081	tmin = tmax = sslot.values[i];
5082	tmin_is_mcv = tmax_is_mcv = have_data = true;
5083	continue;
5084	}
5085	if (DatumGetBool(FunctionCall2Coll(&opproc,
5086	sslot.stacoll,
5087	sslot.values[i], tmin)))
5088	{
5089	tmin = sslot.values[i];
5090	tmin_is_mcv = true;
5091	}
5092	if (DatumGetBool(FunctionCall2Coll(&opproc,
5093	sslot.stacoll,
5094	tmax, sslot.values[i])))
5095	{
5096	tmax = sslot.values[i];
5097	tmax_is_mcv = true;
5098	}
5099	}
5100	if (tmin_is_mcv)
5101	tmin = datumCopy(tmin, typByVal, typLen);
5102	if (tmax_is_mcv)
5103	tmax = datumCopy(tmax, typByVal, typLen);
5104	free_attstatsslot(&sslot);
5105	}
5106
5107	*min = tmin;
5108	*max = tmax;
5109	return have_data;
5110	}
5111
5112
5113	/*
5114	* get_actual_variable_range
5115	* Attempt to identify the current actual minimum and/or maximum
5116	* of the specified variable, by looking for a suitable btree index
5117	* and fetching its low and/or high values.
5118	* If successful, store values in min and max, and return true.
5119	* (Either pointer can be NULL if that endpoint isn't needed.)
5120	* If no data available, return false.
5121	*
5122	* sortop is the "<" comparison operator to use.
5123	*/
5124	static bool
5125	get_actual_variable_range(PlannerInfo root, VariableStatData vardata,
5126	Oid sortop,
5127	Datum min, Datum max)
5128	{
5129	bool have_data = false;
5130	RelOptInfo *rel = vardata->rel;
5131	RangeTblEntry *rte;
5132	ListCell *lc;
5133
5134	/ No hope if no relation or it doesn't have indexes /
5135	if (rel == NULL \|\| rel->indexlist == NIL)
5136	return false;
5137	/ If it has indexes it must be a plain relation /
5138	rte = root->simple_rte_array[rel->relid];
5139	Assert(rte->rtekind == RTE_RELATION);
5140
5141	/ Search through the indexes to see if any match our problem /
5142	foreach(lc, rel->indexlist)
5143	{
5144	IndexOptInfo index = (IndexOptInfo ) lfirst(lc);
5145	ScanDirection indexscandir;
5146
5147	/ Ignore non-btree indexes /
5148	if (index->relam != BTREE_AM_OID)
5149	continue;
5150
5151	/*
5152	* Ignore partial indexes --- we only want stats that cover the entire
5153	* relation.
5154	*/
5155	if (index->indpred != NIL)
5156	continue;
5157
5158	/*
5159	* The index list might include hypothetical indexes inserted by a
5160	* get_relation_info hook --- don't try to access them.
5161	*/
5162	if (index->hypothetical)
5163	continue;
5164
5165	/*
5166	* The first index column must match the desired variable and sort
5167	* operator --- but we can use a descending-order index.
5168	*/
5169	if (!match_index_to_operand(vardata->var, `0`, index))
5170	continue;
5171	switch (get_op_opfamily_strategy(sortop, index->sortopfamily[`0`]))
5172	{
5173	case BTLessStrategyNumber:
5174	if (index->reverse_sort[`0`])
5175	indexscandir = BackwardScanDirection;
5176	else
5177	indexscandir = ForwardScanDirection;
5178	break;
5179	case BTGreaterStrategyNumber:
5180	if (index->reverse_sort[`0`])
5181	indexscandir = ForwardScanDirection;
5182	else
5183	indexscandir = BackwardScanDirection;
5184	break;
5185	default:
5186	/ index doesn't match the sortop /
5187	continue;
5188	}
5189
5190	/*
5191	* Found a suitable index to extract data from. Set up some data that
5192	* can be used by both invocations of get_actual_variable_endpoint.
5193	*/
5194	{
5195	MemoryContext tmpcontext;
5196	MemoryContext oldcontext;
5197	Relation heapRel;
5198	Relation indexRel;
5199	TupleTableSlot *slot;
5200	int16 typLen;
5201	bool typByVal;
5202	ScanKeyData scankeys[`1`];
5203
5204	/ Make sure any cruft gets recycled when we're done /
5205	tmpcontext = AllocSetContextCreate(CurrentMemoryContext,
5206	"get_actual_variable_range workspace",
5207	ALLOCSET_DEFAULT_SIZES);
5208	oldcontext = MemoryContextSwitchTo(tmpcontext);
5209
5210	/*
5211	* Open the table and index so we can read from them. We should
5212	* already have some type of lock on each.
5213	*/
5214	heapRel = table_open(rte->relid, NoLock);
5215	indexRel = index_open(index->indexoid, NoLock);
5216
5217	/ build some stuff needed for indexscan execution /
5218	slot = table_slot_create(heapRel, NULL);
5219	get_typlenbyval(vardata->atttype, &typLen, &typByVal);
5220
5221	/ set up an IS NOT NULL scan key so that we ignore nulls /
5222	ScanKeyEntryInitialize(&scankeys[`0`],
5223	SK_ISNULL \| SK_SEARCHNOTNULL,
5224	`1`, / index col to scan /
5225	InvalidStrategy, / no strategy /
5226	InvalidOid, / no strategy subtype /
5227	InvalidOid, / no collation /
5228	InvalidOid, / no reg proc for this /
5229	(Datum) `0`); / constant /
5230
5231	/ If min is requested ... /
5232	if (min)
5233	{
5234	have_data = get_actual_variable_endpoint(heapRel,
5235	indexRel,
5236	indexscandir,
5237	scankeys,
5238	typLen,
5239	typByVal,
5240	slot,
5241	oldcontext,
5242	min);
5243	}
5244	else
5245	{
5246	/ If min not requested, assume index is nonempty /
5247	have_data = true;
5248	}
5249
5250	/ If max is requested, and we didn't find the index is empty /
5251	if (max && have_data)
5252	{
5253	/ scan in the opposite direction; all else is the same /
5254	have_data = get_actual_variable_endpoint(heapRel,
5255	indexRel,
5256	-indexscandir,
5257	scankeys,
5258	typLen,
5259	typByVal,
5260	slot,
5261	oldcontext,
5262	max);
5263	}
5264
5265	/ Clean everything up /
5266	ExecDropSingleTupleTableSlot(slot);
5267
5268	index_close(indexRel, NoLock);
5269	table_close(heapRel, NoLock);
5270
5271	MemoryContextSwitchTo(oldcontext);
5272	MemoryContextDelete(tmpcontext);
5273
5274	/ And we're done /
5275	break;
5276	}
5277	}
5278
5279	return have_data;
5280	}
5281
5282	/*
5283	* Get one endpoint datum (min or max depending on indexscandir) from the
5284	* specified index. Return true if successful, false if index is empty.
5285	* On success, endpoint value is stored to *endpointDatum (and copied into
5286	* outercontext).
5287	*
5288	* scankeys is a 1-element scankey array set up to reject nulls.
5289	* typLen/typByVal describe the datatype of the index's first column.
5290	* tableslot is a slot suitable to hold table tuples, in case we need
5291	* to probe the heap.
5292	* (We could compute these values locally, but that would mean computing them
5293	* twice when get_actual_variable_range needs both the min and the max.)
5294	*/
5295	static bool
5296	get_actual_variable_endpoint(Relation heapRel,
5297	Relation indexRel,
5298	ScanDirection indexscandir,
5299	ScanKey scankeys,
5300	int16 typLen,
5301	bool typByVal,
5302	TupleTableSlot *tableslot,
5303	MemoryContext outercontext,
5304	Datum *endpointDatum)
5305	{
5306	bool have_data = false;
5307	SnapshotData SnapshotNonVacuumable;
5308	IndexScanDesc index_scan;
5309	Buffer vmbuffer = InvalidBuffer;
5310	ItemPointer tid;
5311	Datum values[INDEX_MAX_KEYS];
5312	bool isnull[INDEX_MAX_KEYS];
5313	MemoryContext oldcontext;
5314
5315	/*
5316	* We use the index-only-scan machinery for this. With mostly-static
5317	* tables that's a win because it avoids a heap visit. It's also a win
5318	* for dynamic data, but the reason is less obvious; read on for details.
5319	*
5320	* In principle, we should scan the index with our current active
5321	* snapshot, which is the best approximation we've got to what the query
5322	* will see when executed. But that won't be exact if a new snap is taken
5323	* before running the query, and it can be very expensive if a lot of
5324	* recently-dead or uncommitted rows exist at the beginning or end of the
5325	* index (because we'll laboriously fetch each one and reject it).
5326	* Instead, we use SnapshotNonVacuumable. That will accept recently-dead
5327	* and uncommitted rows as well as normal visible rows. On the other
5328	* hand, it will reject known-dead rows, and thus not give a bogus answer
5329	* when the extreme value has been deleted (unless the deletion was quite
5330	* recent); that case motivates not using SnapshotAny here.
5331	*
5332	* A crucial point here is that SnapshotNonVacuumable, with
5333	* RecentGlobalXmin as horizon, yields the inverse of the condition that
5334	* the indexscan will use to decide that index entries are killable (see
5335	* heap_hot_search_buffer()). Therefore, if the snapshot rejects a tuple
5336	* (or more precisely, all tuples of a HOT chain) and we have to continue
5337	* scanning past it, we know that the indexscan will mark that index entry
5338	* killed. That means that the next get_actual_variable_endpoint() call
5339	* will not have to re-consider that index entry. In this way we avoid
5340	* repetitive work when this function is used a lot during planning.
5341	*
5342	* But using SnapshotNonVacuumable creates a hazard of its own. In a
5343	* recently-created index, some index entries may point at "broken" HOT
5344	* chains in which not all the tuple versions contain data matching the
5345	* index entry. The live tuple version(s) certainly do match the index,
5346	* but SnapshotNonVacuumable can accept recently-dead tuple versions that
5347	* don't match. Hence, if we took data from the selected heap tuple, we
5348	* might get a bogus answer that's not close to the index extremal value,
5349	* or could even be NULL. We avoid this hazard because we take the data
5350	* from the index entry not the heap.
5351	*/
5352	InitNonVacuumableSnapshot(SnapshotNonVacuumable, RecentGlobalXmin);
5353
5354	index_scan = index_beginscan(heapRel, indexRel,
5355	&SnapshotNonVacuumable,
5356	`1`, `0`);
5357	/ Set it up for index-only scan /
5358	index_scan->xs_want_itup = true;
5359	index_rescan(index_scan, scankeys, `1`, NULL, `0`);
5360
5361	/ Fetch first/next tuple in specified direction /
5362	while ((tid = index_getnext_tid(index_scan, indexscandir)) != NULL)
5363	{
5364	if (!VM_ALL_VISIBLE(heapRel,
5365	ItemPointerGetBlockNumber(tid),
5366	&vmbuffer))
5367	{
5368	/ Rats, we have to visit the heap to check visibility /
5369	if (!index_fetch_heap(index_scan, tableslot))
5370	continue; / no visible tuple, try next index entry /
5371
5372	/ We don't actually need the heap tuple for anything /
5373	ExecClearTuple(tableslot);
5374
5375	/*
5376	* We don't care whether there's more than one visible tuple in
5377	* the HOT chain; if any are visible, that's good enough.
5378	*/
5379	}
5380
5381	/*
5382	* We expect that btree will return data in IndexTuple not HeapTuple
5383	* format. It's not lossy either.
5384	*/
5385	if (!index_scan->xs_itup)
5386	elog(ERROR, "no data returned for index-only scan");
5387	if (index_scan->xs_recheck)
5388	elog(ERROR, "unexpected recheck indication from btree");
5389
5390	/ OK to deconstruct the index tuple /
5391	index_deform_tuple(index_scan->xs_itup,
5392	index_scan->xs_itupdesc,
5393	values, isnull);
5394
5395	/ Shouldn't have got a null, but be careful /
5396	if (isnull[`0`])
5397	elog(ERROR, "found unexpected null value in index \"%s\"",
5398	RelationGetRelationName(indexRel));
5399
5400	/ Copy the index column value out to caller's context /
5401	oldcontext = MemoryContextSwitchTo(outercontext);
5402	*endpointDatum = datumCopy(values[`0`], typByVal, typLen);
5403	MemoryContextSwitchTo(oldcontext);
5404	have_data = true;
5405	break;
5406	}
5407
5408	if (vmbuffer != InvalidBuffer)
5409	ReleaseBuffer(vmbuffer);
5410	index_endscan(index_scan);
5411
5412	return have_data;
5413	}
5414
5415	/*
5416	* find_join_input_rel
5417	* Look up the input relation for a join.
5418	*
5419	* We assume that the input relation's RelOptInfo must have been constructed
5420	* already.
5421	*/
5422	static RelOptInfo *
5423	find_join_input_rel(PlannerInfo *root, Relids relids)
5424	{
5425	RelOptInfo *rel = NULL;
5426
5427	switch (bms_membership(relids))
5428	{
5429	case BMS_EMPTY_SET:
5430	/ should not happen /
5431	break;
5432	case BMS_SINGLETON:
5433	rel = find_base_rel(root, bms_singleton_member(relids));
5434	break;
5435	case BMS_MULTIPLE:
5436	rel = find_join_rel(root, relids);
5437	break;
5438	}
5439
5440	if (rel == NULL)
5441	elog(ERROR, "could not find RelOptInfo for given relids");
5442
5443	return rel;
5444	}
5445
5446
5447	/-------------------------------------------------------------------------*
5448	*
5449	* Index cost estimation functions
5450	*
5451	*-------------------------------------------------------------------------
5452	*/
5453
5454	/*
5455	* Extract the actual indexquals (as RestrictInfos) from an IndexClause list
5456	*/
5457	List *
5458	get_quals_from_indexclauses(List *indexclauses)
5459	{
5460	List *result = NIL;
5461	ListCell *lc;
5462
5463	foreach(lc, indexclauses)
5464	{
5465	IndexClause *iclause = lfirst_node(IndexClause, lc);
5466	ListCell *lc2;
5467
5468	foreach(lc2, iclause->indexquals)
5469	{
5470	RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
5471
5472	result = lappend(result, rinfo);
5473	}
5474	}
5475	return result;
5476	}
5477
5478	/*
5479	* Compute the total evaluation cost of the comparison operands in a list
5480	* of index qual expressions. Since we know these will be evaluated just
5481	* once per scan, there's no need to distinguish startup from per-row cost.
5482	*
5483	* This can be used either on the result of get_quals_from_indexclauses(),
5484	* or directly on an indexorderbys list. In both cases, we expect that the
5485	* index key expression is on the left side of binary clauses.
5486	*/
5487	Cost
5488	index_other_operands_eval_cost(PlannerInfo root, List indexquals)
5489	{
5490	Cost qual_arg_cost = `0`;
5491	ListCell *lc;
5492
5493	foreach(lc, indexquals)
5494	{
5495	Expr clause = (Expr ) lfirst(lc);
5496	Node *other_operand;
5497	QualCost index_qual_cost;
5498
5499	/*
5500	* Index quals will have RestrictInfos, indexorderbys won't. Look
5501	* through RestrictInfo if present.
5502	*/
5503	if (IsA(clause, RestrictInfo))
5504	clause = ((RestrictInfo *) clause)->clause;
5505
5506	if (IsA(clause, OpExpr))
5507	{
5508	OpExpr op = (OpExpr ) clause;
5509
5510	other_operand = (Node *) lsecond(op->args);
5511	}
5512	else if (IsA(clause, RowCompareExpr))
5513	{
5514	RowCompareExpr rc = (RowCompareExpr ) clause;
5515
5516	other_operand = (Node *) rc->rargs;
5517	}
5518	else if (IsA(clause, ScalarArrayOpExpr))
5519	{
5520	ScalarArrayOpExpr saop = (ScalarArrayOpExpr ) clause;
5521
5522	other_operand = (Node *) lsecond(saop->args);
5523	}
5524	else if (IsA(clause, NullTest))
5525	{
5526	other_operand = NULL;
5527	}
5528	else
5529	{
5530	elog(ERROR, "unsupported indexqual type: %d",
5531	(int) nodeTag(clause));
5532	other_operand = NULL; / keep compiler quiet /
5533	}
5534
5535	cost_qual_eval_node(&index_qual_cost, other_operand, root);
5536	qual_arg_cost += index_qual_cost.startup + index_qual_cost.per_tuple;
5537	}
5538	return qual_arg_cost;
5539	}
5540
5541	void
5542	genericcostestimate(PlannerInfo *root,
5543	IndexPath *path,
5544	double loop_count,
5545	GenericCosts *costs)
5546	{
5547	IndexOptInfo *index = path->indexinfo;
5548	List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
5549	List *indexOrderBys = path->indexorderbys;
5550	Cost indexStartupCost;
5551	Cost indexTotalCost;
5552	Selectivity indexSelectivity;
5553	double indexCorrelation;
5554	double numIndexPages;
5555	double numIndexTuples;
5556	double spc_random_page_cost;
5557	double num_sa_scans;
5558	double num_outer_scans;
5559	double num_scans;
5560	double qual_op_cost;
5561	double qual_arg_cost;
5562	List *selectivityQuals;
5563	ListCell *l;
5564
5565	/*
5566	* If the index is partial, AND the index predicate with the explicitly
5567	* given indexquals to produce a more accurate idea of the index
5568	* selectivity.
5569	*/
5570	selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
5571
5572	/*
5573	* Check for ScalarArrayOpExpr index quals, and estimate the number of
5574	* index scans that will be performed.
5575	*/
5576	num_sa_scans = `1`;
5577	foreach(l, indexQuals)
5578	{
5579	RestrictInfo rinfo = (RestrictInfo ) lfirst(l);
5580
5581	if (IsA(rinfo->clause, ScalarArrayOpExpr))
5582	{
5583	ScalarArrayOpExpr saop = (ScalarArrayOpExpr ) rinfo->clause;
5584	int alength = estimate_array_length(lsecond(saop->args));
5585
5586	if (alength > `1`)
5587	num_sa_scans *= alength;
5588	}
5589	}
5590
5591	/ Estimate the fraction of main-table tuples that will be visited /
5592	indexSelectivity = clauselist_selectivity(root, selectivityQuals,
5593	index->rel->relid,
5594	JOIN_INNER,
5595	NULL);
5596
5597	/*
5598	* If caller didn't give us an estimate, estimate the number of index
5599	* tuples that will be visited. We do it in this rather peculiar-looking
5600	* way in order to get the right answer for partial indexes.
5601	*/
5602	numIndexTuples = costs->numIndexTuples;
5603	if (numIndexTuples <= `0.0`)
5604	{
5605	numIndexTuples = indexSelectivity * index->rel->tuples;
5606
5607	/*
5608	* The above calculation counts all the tuples visited across all
5609	* scans induced by ScalarArrayOpExpr nodes. We want to consider the
5610	* average per-indexscan number, so adjust. This is a handy place to
5611	* round to integer, too. (If caller supplied tuple estimate, it's
5612	* responsible for handling these considerations.)
5613	*/
5614	numIndexTuples = rint(numIndexTuples / num_sa_scans);
5615	}
5616
5617	/*
5618	* We can bound the number of tuples by the index size in any case. Also,
5619	* always estimate at least one tuple is touched, even when
5620	* indexSelectivity estimate is tiny.
5621	*/
5622	if (numIndexTuples > index->tuples)
5623	numIndexTuples = index->tuples;
5624	if (numIndexTuples < `1.0`)
5625	numIndexTuples = `1.0`;
5626
5627	/*
5628	* Estimate the number of index pages that will be retrieved.
5629	*
5630	* We use the simplistic method of taking a pro-rata fraction of the total
5631	* number of index pages. In effect, this counts only leaf pages and not
5632	* any overhead such as index metapage or upper tree levels.
5633	*
5634	* In practice access to upper index levels is often nearly free because
5635	* those tend to stay in cache under load; moreover, the cost involved is
5636	* highly dependent on index type. We therefore ignore such costs here
5637	* and leave it to the caller to add a suitable charge if needed.
5638	*/
5639	if (index->pages > `1` && index->tuples > `1`)
5640	numIndexPages = ceil(numIndexTuples * index->pages / index->tuples);
5641	else
5642	numIndexPages = `1.0`;
5643
5644	/ fetch estimated page cost for tablespace containing index /
5645	get_tablespace_page_costs(index->reltablespace,
5646	&spc_random_page_cost,
5647	NULL);
5648
5649	/*
5650	* Now compute the disk access costs.
5651	*
5652	* The above calculations are all per-index-scan. However, if we are in a
5653	* nestloop inner scan, we can expect the scan to be repeated (with
5654	* different search keys) for each row of the outer relation. Likewise,
5655	* ScalarArrayOpExpr quals result in multiple index scans. This creates
5656	* the potential for cache effects to reduce the number of disk page
5657	* fetches needed. We want to estimate the average per-scan I/O cost in
5658	* the presence of caching.
5659	*
5660	* We use the Mackert-Lohman formula (see costsize.c for details) to
5661	* estimate the total number of page fetches that occur. While this
5662	* wasn't what it was designed for, it seems a reasonable model anyway.
5663	* Note that we are counting pages not tuples anymore, so we take N = T =
5664	* index size, as if there were one "tuple" per page.
5665	*/
5666	num_outer_scans = loop_count;
5667	num_scans = num_sa_scans * num_outer_scans;
5668
5669	if (num_scans > `1`)
5670	{
5671	double pages_fetched;
5672
5673	/ total page fetches ignoring cache effects /
5674	pages_fetched = numIndexPages * num_scans;
5675
5676	/ use Mackert and Lohman formula to adjust for cache effects /
5677	pages_fetched = index_pages_fetched(pages_fetched,
5678	index->pages,
5679	(double) index->pages,
5680	root);
5681
5682	/*
5683	* Now compute the total disk access cost, and then report a pro-rated
5684	* share for each outer scan. (Don't pro-rate for ScalarArrayOpExpr,
5685	* since that's internal to the indexscan.)
5686	*/
5687	indexTotalCost = (pages_fetched * spc_random_page_cost)
5688	/ num_outer_scans;
5689	}
5690	else
5691	{
5692	/*
5693	* For a single index scan, we just charge spc_random_page_cost per
5694	* page touched.
5695	*/
5696	indexTotalCost = numIndexPages * spc_random_page_cost;
5697	}
5698
5699	/*
5700	* CPU cost: any complex expressions in the indexquals will need to be
5701	* evaluated once at the start of the scan to reduce them to runtime keys
5702	* to pass to the index AM (see nodeIndexscan.c). We model the per-tuple
5703	* CPU costs as cpu_index_tuple_cost plus one cpu_operator_cost per
5704	* indexqual operator. Because we have numIndexTuples as a per-scan
5705	* number, we have to multiply by num_sa_scans to get the correct result
5706	* for ScalarArrayOpExpr cases. Similarly add in costs for any index
5707	* ORDER BY expressions.
5708	*
5709	* Note: this neglects the possible costs of rechecking lossy operators.
5710	* Detecting that that might be needed seems more expensive than it's
5711	* worth, though, considering all the other inaccuracies here ...
5712	*/
5713	qual_arg_cost = index_other_operands_eval_cost(root, indexQuals) +
5714	index_other_operands_eval_cost(root, indexOrderBys);
5715	qual_op_cost = cpu_operator_cost *
5716	(list_length(indexQuals) + list_length(indexOrderBys));
5717
5718	indexStartupCost = qual_arg_cost;
5719	indexTotalCost += qual_arg_cost;
5720	indexTotalCost += numIndexTuples * num_sa_scans * (cpu_index_tuple_cost + qual_op_cost);
5721
5722	/*
5723	* Generic assumption about index correlation: there isn't any.
5724	*/
5725	indexCorrelation = `0.0`;
5726
5727	/*
5728	* Return everything to caller.
5729	*/
5730	costs->indexStartupCost = indexStartupCost;
5731	costs->indexTotalCost = indexTotalCost;
5732	costs->indexSelectivity = indexSelectivity;
5733	costs->indexCorrelation = indexCorrelation;
5734	costs->numIndexPages = numIndexPages;
5735	costs->numIndexTuples = numIndexTuples;
5736	costs->spc_random_page_cost = spc_random_page_cost;
5737	costs->num_sa_scans = num_sa_scans;
5738	}
5739
5740	/*
5741	* If the index is partial, add its predicate to the given qual list.
5742	*
5743	* ANDing the index predicate with the explicitly given indexquals produces
5744	* a more accurate idea of the index's selectivity. However, we need to be
5745	* careful not to insert redundant clauses, because clauselist_selectivity()
5746	* is easily fooled into computing a too-low selectivity estimate. Our
5747	* approach is to add only the predicate clause(s) that cannot be proven to
5748	* be implied by the given indexquals. This successfully handles cases such
5749	* as a qual "x = 42" used with a partial index "WHERE x >= 40 AND x < 50".
5750	* There are many other cases where we won't detect redundancy, leading to a
5751	* too-low selectivity estimate, which will bias the system in favor of using
5752	* partial indexes where possible. That is not necessarily bad though.
5753	*
5754	* Note that indexQuals contains RestrictInfo nodes while the indpred
5755	* does not, so the output list will be mixed. This is OK for both
5756	* predicate_implied_by() and clauselist_selectivity(), but might be
5757	* problematic if the result were passed to other things.
5758	*/
5759	List *
5760	add_predicate_to_index_quals(IndexOptInfo index, List indexQuals)
5761	{
5762	List *predExtraQuals = NIL;
5763	ListCell *lc;
5764
5765	if (index->indpred == NIL)
5766	return indexQuals;
5767
5768	foreach(lc, index->indpred)
5769	{
5770	Node predQual = (Node ) lfirst(lc);
5771	List *oneQual = list_make1(predQual);
5772
5773	if (!predicate_implied_by(oneQual, indexQuals, false))
5774	predExtraQuals = list_concat(predExtraQuals, oneQual);
5775	}
5776	/ list_concat avoids modifying the passed-in indexQuals list /
5777	return list_concat(predExtraQuals, indexQuals);
5778	}
5779
5780
5781	void
5782	btcostestimate(PlannerInfo root, IndexPath path, double loop_count,
5783	Cost indexStartupCost, Cost indexTotalCost,
5784	Selectivity indexSelectivity, double* *indexCorrelation,
5785	double *indexPages)
5786	{
5787	IndexOptInfo *index = path->indexinfo;
5788	GenericCosts costs;
5789	Oid relid;
5790	AttrNumber colnum;
5791	VariableStatData vardata;
5792	double numIndexTuples;
5793	Cost descentCost;
5794	List *indexBoundQuals;
5795	int indexcol;
5796	bool eqQualHere;
5797	bool found_saop;
5798	bool found_is_null_op;
5799	double num_sa_scans;
5800	ListCell *lc;
5801
5802	/*
5803	* For a btree scan, only leading '=' quals plus inequality quals for the
5804	* immediately next attribute contribute to index selectivity (these are
5805	* the "boundary quals" that determine the starting and stopping points of
5806	* the index scan). Additional quals can suppress visits to the heap, so
5807	* it's OK to count them in indexSelectivity, but they should not count
5808	* for estimating numIndexTuples. So we must examine the given indexquals
5809	* to find out which ones count as boundary quals. We rely on the
5810	* knowledge that they are given in index column order.
5811	*
5812	* For a RowCompareExpr, we consider only the first column, just as
5813	* rowcomparesel() does.
5814	*
5815	* If there's a ScalarArrayOpExpr in the quals, we'll actually perform N
5816	* index scans not one, but the ScalarArrayOpExpr's operator can be
5817	* considered to act the same as it normally does.
5818	*/
5819	indexBoundQuals = NIL;
5820	indexcol = `0`;
5821	eqQualHere = false;
5822	found_saop = false;
5823	found_is_null_op = false;
5824	num_sa_scans = `1`;
5825	foreach(lc, path->indexclauses)
5826	{
5827	IndexClause *iclause = lfirst_node(IndexClause, lc);
5828	ListCell *lc2;
5829
5830	if (indexcol != iclause->indexcol)
5831	{
5832	/ Beginning of a new column's quals /
5833	if (!eqQualHere)
5834	break; / done if no '=' qual for indexcol /
5835	eqQualHere = false;
5836	indexcol++;
5837	if (indexcol != iclause->indexcol)
5838	break; / no quals at all for indexcol /
5839	}
5840
5841	/ Examine each indexqual associated with this index clause /
5842	foreach(lc2, iclause->indexquals)
5843	{
5844	RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
5845	Expr *clause = rinfo->clause;
5846	Oid clause_op = InvalidOid;
5847	int op_strategy;
5848
5849	if (IsA(clause, OpExpr))
5850	{
5851	OpExpr op = (OpExpr ) clause;
5852
5853	clause_op = op->opno;
5854	}
5855	else if (IsA(clause, RowCompareExpr))
5856	{
5857	RowCompareExpr rc = (RowCompareExpr ) clause;
5858
5859	clause_op = linitial_oid(rc->opnos);
5860	}
5861	else if (IsA(clause, ScalarArrayOpExpr))
5862	{
5863	ScalarArrayOpExpr saop = (ScalarArrayOpExpr ) clause;
5864	Node other_operand = (Node ) lsecond(saop->args);
5865	int alength = estimate_array_length(other_operand);
5866
5867	clause_op = saop->opno;
5868	found_saop = true;
5869	/ count number of SA scans induced by indexBoundQuals only /
5870	if (alength > `1`)
5871	num_sa_scans *= alength;
5872	}
5873	else if (IsA(clause, NullTest))
5874	{
5875	NullTest nt = (NullTest ) clause;
5876
5877	if (nt->nulltesttype == IS_NULL)
5878	{
5879	found_is_null_op = true;
5880	/ IS NULL is like = for selectivity purposes /
5881	eqQualHere = true;
5882	}
5883	}
5884	else
5885	elog(ERROR, "unsupported indexqual type: %d",
5886	(int) nodeTag(clause));
5887
5888	/ check for equality operator /
5889	if (OidIsValid(clause_op))
5890	{
5891	op_strategy = get_op_opfamily_strategy(clause_op,
5892	index->opfamily[indexcol]);
5893	Assert(op_strategy != `0`); / not a member of opfamily?? /
5894	if (op_strategy == BTEqualStrategyNumber)
5895	eqQualHere = true;
5896	}
5897
5898	indexBoundQuals = lappend(indexBoundQuals, rinfo);
5899	}
5900	}
5901
5902	/*
5903	* If index is unique and we found an '=' clause for each column, we can
5904	* just assume numIndexTuples = 1 and skip the expensive
5905	* clauselist_selectivity calculations. However, a ScalarArrayOp or
5906	* NullTest invalidates that theory, even though it sets eqQualHere.
5907	*/
5908	if (index->unique &&
5909	indexcol == index->nkeycolumns - `1` &&
5910	eqQualHere &&
5911	!found_saop &&
5912	!found_is_null_op)
5913	numIndexTuples = `1.0`;
5914	else
5915	{
5916	List *selectivityQuals;
5917	Selectivity btreeSelectivity;
5918
5919	/*
5920	* If the index is partial, AND the index predicate with the
5921	* index-bound quals to produce a more accurate idea of the number of
5922	* rows covered by the bound conditions.
5923	*/
5924	selectivityQuals = add_predicate_to_index_quals(index, indexBoundQuals);
5925
5926	btreeSelectivity = clauselist_selectivity(root, selectivityQuals,
5927	index->rel->relid,
5928	JOIN_INNER,
5929	NULL);
5930	numIndexTuples = btreeSelectivity * index->rel->tuples;
5931
5932	/*
5933	* As in genericcostestimate(), we have to adjust for any
5934	* ScalarArrayOpExpr quals included in indexBoundQuals, and then round
5935	* to integer.
5936	*/
5937	numIndexTuples = rint(numIndexTuples / num_sa_scans);
5938	}
5939
5940	/*
5941	* Now do generic index cost estimation.
5942	*/
5943	MemSet(&costs, `0`, sizeof(costs));
5944	costs.numIndexTuples = numIndexTuples;
5945
5946	genericcostestimate(root, path, loop_count, &costs);
5947
5948	/*
5949	* Add a CPU-cost component to represent the costs of initial btree
5950	* descent. We don't charge any I/O cost for touching upper btree levels,
5951	* since they tend to stay in cache, but we still have to do about log2(N)
5952	* comparisons to descend a btree of N leaf tuples. We charge one
5953	* cpu_operator_cost per comparison.
5954	*
5955	* If there are ScalarArrayOpExprs, charge this once per SA scan. The
5956	* ones after the first one are not startup cost so far as the overall
5957	* plan is concerned, so add them only to "total" cost.
5958	*/
5959	if (index->tuples > `1`) / avoid computing log(0) /
5960	{
5961	descentCost = ceil(log(index->tuples) / log(`2.0`)) * cpu_operator_cost;
5962	costs.indexStartupCost += descentCost;
5963	costs.indexTotalCost += costs.num_sa_scans * descentCost;
5964	}
5965
5966	/*
5967	* Even though we're not charging I/O cost for touching upper btree pages,
5968	* it's still reasonable to charge some CPU cost per page descended
5969	* through. Moreover, if we had no such charge at all, bloated indexes
5970	* would appear to have the same search cost as unbloated ones, at least
5971	* in cases where only a single leaf page is expected to be visited. This
5972	* cost is somewhat arbitrarily set at 50x cpu_operator_cost per page
5973	* touched. The number of such pages is btree tree height plus one (ie,
5974	* we charge for the leaf page too). As above, charge once per SA scan.
5975	*/
5976	descentCost = (index->tree_height + `1`) * `50.0` * cpu_operator_cost;
5977	costs.indexStartupCost += descentCost;
5978	costs.indexTotalCost += costs.num_sa_scans * descentCost;
5979
5980	/*
5981	* If we can get an estimate of the first column's ordering correlation C
5982	* from pg_statistic, estimate the index correlation as C for a
5983	* single-column index, or C * 0.75 for multiple columns. (The idea here
5984	* is that multiple columns dilute the importance of the first column's
5985	* ordering, but don't negate it entirely. Before 8.0 we divided the
5986	* correlation by the number of columns, but that seems too strong.)
5987	*/
5988	MemSet(&vardata, `0`, sizeof(vardata));
5989
5990	if (index->indexkeys[`0`] != `0`)
5991	{
5992	/ Simple variable --- look to stats for the underlying table /
5993	RangeTblEntry *rte = planner_rt_fetch(index->rel->relid, root);
5994
5995	Assert(rte->rtekind == RTE_RELATION);
5996	relid = rte->relid;
5997	Assert(relid != InvalidOid);
5998	colnum = index->indexkeys[`0`];
5999
6000	if (get_relation_stats_hook &&
6001	(*get_relation_stats_hook) (root, rte, colnum, &vardata))
6002	{
6003	/*
6004	* The hook took control of acquiring a stats tuple. If it did
6005	* supply a tuple, it'd better have supplied a freefunc.
6006	*/
6007	if (HeapTupleIsValid(vardata.statsTuple) &&
6008	!vardata.freefunc)
6009	elog(ERROR, "no function provided to release variable stats with");
6010	}
6011	else
6012	{
6013	vardata.statsTuple = SearchSysCache3(STATRELATTINH,
6014	ObjectIdGetDatum(relid),
6015	Int16GetDatum(colnum),
6016	BoolGetDatum(rte->inh));
6017	vardata.freefunc = ReleaseSysCache;
6018	}
6019	}
6020	else
6021	{
6022	/ Expression --- maybe there are stats for the index itself /
6023	relid = index->indexoid;
6024	colnum = `1`;
6025
6026	if (get_index_stats_hook &&
6027	(*get_index_stats_hook) (root, relid, colnum, &vardata))
6028	{
6029	/*
6030	* The hook took control of acquiring a stats tuple. If it did
6031	* supply a tuple, it'd better have supplied a freefunc.
6032	*/
6033	if (HeapTupleIsValid(vardata.statsTuple) &&
6034	!vardata.freefunc)
6035	elog(ERROR, "no function provided to release variable stats with");
6036	}
6037	else
6038	{
6039	vardata.statsTuple = SearchSysCache3(STATRELATTINH,
6040	ObjectIdGetDatum(relid),
6041	Int16GetDatum(colnum),
6042	BoolGetDatum(false));
6043	vardata.freefunc = ReleaseSysCache;
6044	}
6045	}
6046
6047	if (HeapTupleIsValid(vardata.statsTuple))
6048	{
6049	Oid sortop;
6050	AttStatsSlot sslot;
6051
6052	sortop = get_opfamily_member(index->opfamily[`0`],
6053	index->opcintype[`0`],
6054	index->opcintype[`0`],
6055	BTLessStrategyNumber);
6056	if (OidIsValid(sortop) &&
6057	get_attstatsslot(&sslot, vardata.statsTuple,
6058	STATISTIC_KIND_CORRELATION, sortop,
6059	ATTSTATSSLOT_NUMBERS))
6060	{
6061	double varCorrelation;
6062
6063	Assert(sslot.nnumbers == `1`);
6064	varCorrelation = sslot.numbers[`0`];
6065
6066	if (index->reverse_sort[`0`])
6067	varCorrelation = -varCorrelation;
6068
6069	if (index->nkeycolumns > `1`)
6070	costs.indexCorrelation = varCorrelation * `0.75`;
6071	else
6072	costs.indexCorrelation = varCorrelation;
6073
6074	free_attstatsslot(&sslot);
6075	}
6076	}
6077
6078	ReleaseVariableStats(vardata);
6079
6080	*indexStartupCost = costs.indexStartupCost;
6081	*indexTotalCost = costs.indexTotalCost;
6082	*indexSelectivity = costs.indexSelectivity;
6083	*indexCorrelation = costs.indexCorrelation;
6084	*indexPages = costs.numIndexPages;
6085	}
6086
6087	void
6088	hashcostestimate(PlannerInfo root, IndexPath path, double loop_count,
6089	Cost indexStartupCost, Cost indexTotalCost,
6090	Selectivity indexSelectivity, double* *indexCorrelation,
6091	double *indexPages)
6092	{
6093	GenericCosts costs;
6094
6095	MemSet(&costs, `0`, sizeof(costs));
6096
6097	genericcostestimate(root, path, loop_count, &costs);
6098
6099	/*
6100	* A hash index has no descent costs as such, since the index AM can go
6101	* directly to the target bucket after computing the hash value. There
6102	* are a couple of other hash-specific costs that we could conceivably add
6103	* here, though:
6104	*
6105	* Ideally we'd charge spc_random_page_cost for each page in the target
6106	* bucket, not just the numIndexPages pages that genericcostestimate
6107	* thought we'd visit. However in most cases we don't know which bucket
6108	* that will be. There's no point in considering the average bucket size
6109	* because the hash AM makes sure that's always one page.
6110	*
6111	* Likewise, we could consider charging some CPU for each index tuple in
6112	* the bucket, if we knew how many there were. But the per-tuple cost is
6113	* just a hash value comparison, not a general datatype-dependent
6114	* comparison, so any such charge ought to be quite a bit less than
6115	* cpu_operator_cost; which makes it probably not worth worrying about.
6116	*
6117	* A bigger issue is that chance hash-value collisions will result in
6118	* wasted probes into the heap. We don't currently attempt to model this
6119	* cost on the grounds that it's rare, but maybe it's not rare enough.
6120	* (Any fix for this ought to consider the generic lossy-operator problem,
6121	* though; it's not entirely hash-specific.)
6122	*/
6123
6124	*indexStartupCost = costs.indexStartupCost;
6125	*indexTotalCost = costs.indexTotalCost;
6126	*indexSelectivity = costs.indexSelectivity;
6127	*indexCorrelation = costs.indexCorrelation;
6128	*indexPages = costs.numIndexPages;
6129	}
6130
6131	void
6132	gistcostestimate(PlannerInfo root, IndexPath path, double loop_count,
6133	Cost indexStartupCost, Cost indexTotalCost,
6134	Selectivity indexSelectivity, double* *indexCorrelation,
6135	double *indexPages)
6136	{
6137	IndexOptInfo *index = path->indexinfo;
6138	GenericCosts costs;
6139	Cost descentCost;
6140
6141	MemSet(&costs, `0`, sizeof(costs));
6142
6143	genericcostestimate(root, path, loop_count, &costs);
6144
6145	/*
6146	* We model index descent costs similarly to those for btree, but to do
6147	* that we first need an idea of the tree height. We somewhat arbitrarily
6148	* assume that the fanout is 100, meaning the tree height is at most
6149	* log100(index->pages).
6150	*
6151	* Although this computation isn't really expensive enough to require
6152	* caching, we might as well use index->tree_height to cache it.
6153	*/
6154	if (index->tree_height < `0`) / unknown? /
6155	{
6156	if (index->pages > `1`) / avoid computing log(0) /
6157	index->tree_height = (int) (log(index->pages) / log(`100.0`));
6158	else
6159	index->tree_height = `0`;
6160	}
6161
6162	/*
6163	* Add a CPU-cost component to represent the costs of initial descent. We
6164	* just use log(N) here not log2(N) since the branching factor isn't
6165	* necessarily two anyway. As for btree, charge once per SA scan.
6166	*/
6167	if (index->tuples > `1`) / avoid computing log(0) /
6168	{
6169	descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
6170	costs.indexStartupCost += descentCost;
6171	costs.indexTotalCost += costs.num_sa_scans * descentCost;
6172	}
6173
6174	/*
6175	* Likewise add a per-page charge, calculated the same as for btrees.
6176	*/
6177	descentCost = (index->tree_height + `1`) * `50.0` * cpu_operator_cost;
6178	costs.indexStartupCost += descentCost;
6179	costs.indexTotalCost += costs.num_sa_scans * descentCost;
6180
6181	*indexStartupCost = costs.indexStartupCost;
6182	*indexTotalCost = costs.indexTotalCost;
6183	*indexSelectivity = costs.indexSelectivity;
6184	*indexCorrelation = costs.indexCorrelation;
6185	*indexPages = costs.numIndexPages;
6186	}
6187
6188	void
6189	spgcostestimate(PlannerInfo root, IndexPath path, double loop_count,
6190	Cost indexStartupCost, Cost indexTotalCost,
6191	Selectivity indexSelectivity, double* *indexCorrelation,
6192	double *indexPages)
6193	{
6194	IndexOptInfo *index = path->indexinfo;
6195	GenericCosts costs;
6196	Cost descentCost;
6197
6198	MemSet(&costs, `0`, sizeof(costs));
6199
6200	genericcostestimate(root, path, loop_count, &costs);
6201
6202	/*
6203	* We model index descent costs similarly to those for btree, but to do
6204	* that we first need an idea of the tree height. We somewhat arbitrarily
6205	* assume that the fanout is 100, meaning the tree height is at most
6206	* log100(index->pages).
6207	*
6208	* Although this computation isn't really expensive enough to require
6209	* caching, we might as well use index->tree_height to cache it.
6210	*/
6211	if (index->tree_height < `0`) / unknown? /
6212	{
6213	if (index->pages > `1`) / avoid computing log(0) /
6214	index->tree_height = (int) (log(index->pages) / log(`100.0`));
6215	else
6216	index->tree_height = `0`;
6217	}
6218
6219	/*
6220	* Add a CPU-cost component to represent the costs of initial descent. We
6221	* just use log(N) here not log2(N) since the branching factor isn't
6222	* necessarily two anyway. As for btree, charge once per SA scan.
6223	*/
6224	if (index->tuples > `1`) / avoid computing log(0) /
6225	{
6226	descentCost = ceil(log(index->tuples)) * cpu_operator_cost;
6227	costs.indexStartupCost += descentCost;
6228	costs.indexTotalCost += costs.num_sa_scans * descentCost;
6229	}
6230
6231	/*
6232	* Likewise add a per-page charge, calculated the same as for btrees.
6233	*/
6234	descentCost = (index->tree_height + `1`) * `50.0` * cpu_operator_cost;
6235	costs.indexStartupCost += descentCost;
6236	costs.indexTotalCost += costs.num_sa_scans * descentCost;
6237
6238	*indexStartupCost = costs.indexStartupCost;
6239	*indexTotalCost = costs.indexTotalCost;
6240	*indexSelectivity = costs.indexSelectivity;
6241	*indexCorrelation = costs.indexCorrelation;
6242	*indexPages = costs.numIndexPages;
6243	}
6244
6245
6246	/*
6247	* Support routines for gincostestimate
6248	*/
6249
6250	typedef struct
6251	{
6252	bool haveFullScan;
6253	double partialEntries;
6254	double exactEntries;
6255	double searchEntries;
6256	double arrayScans;
6257	} GinQualCounts;
6258
6259	/*
6260	* Estimate the number of index terms that need to be searched for while
6261	* testing the given GIN query, and increment the counts in *counts
6262	* appropriately. If the query is unsatisfiable, return false.
6263	*/
6264	static bool
6265	gincost_pattern(IndexOptInfo index, int* indexcol,
6266	Oid clause_op, Datum query,
6267	GinQualCounts *counts)
6268	{
6269	Oid extractProcOid;
6270	Oid collation;
6271	int strategy_op;
6272	Oid lefttype,
6273	righttype;
6274	int32 nentries = `0`;
6275	bool *partial_matches = NULL;
6276	Pointer *extra_data = NULL;
6277	bool *nullFlags = NULL;
6278	int32 searchMode = GIN_SEARCH_MODE_DEFAULT;
6279	int32 i;
6280
6281	Assert(indexcol < index->nkeycolumns);
6282
6283	/*
6284	* Get the operator's strategy number and declared input data types within
6285	* the index opfamily. (We don't need the latter, but we use
6286	* get_op_opfamily_properties because it will throw error if it fails to
6287	* find a matching pg_amop entry.)
6288	*/
6289	get_op_opfamily_properties(clause_op, index->opfamily[indexcol], false,
6290	&strategy_op, &lefttype, &righttype);
6291
6292	/*
6293	* GIN always uses the "default" support functions, which are those with
6294	* lefttype == righttype == the opclass' opcintype (see
6295	* IndexSupportInitialize in relcache.c).
6296	*/
6297	extractProcOid = get_opfamily_proc(index->opfamily[indexcol],
6298	index->opcintype[indexcol],
6299	index->opcintype[indexcol],
6300	GIN_EXTRACTQUERY_PROC);
6301
6302	if (!OidIsValid(extractProcOid))
6303	{
6304	/ should not happen; throw same error as index_getprocinfo /
6305	elog(ERROR, "missing support function %d for attribute %d of index \"%s\"",
6306	GIN_EXTRACTQUERY_PROC, indexcol + `1`,
6307	get_rel_name(index->indexoid));
6308	}
6309
6310	/*
6311	* Choose collation to pass to extractProc (should match initGinState).
6312	*/
6313	if (OidIsValid(index->indexcollations[indexcol]))
6314	collation = index->indexcollations[indexcol];
6315	else
6316	collation = DEFAULT_COLLATION_OID;
6317
6318	OidFunctionCall7Coll(extractProcOid,
6319	collation,
6320	query,
6321	PointerGetDatum(&nentries),
6322	UInt16GetDatum(strategy_op),
6323	PointerGetDatum(&partial_matches),
6324	PointerGetDatum(&extra_data),
6325	PointerGetDatum(&nullFlags),
6326	PointerGetDatum(&searchMode));
6327
6328	if (nentries <= `0` && searchMode == GIN_SEARCH_MODE_DEFAULT)
6329	{
6330	/ No match is possible /
6331	return false;
6332	}
6333
6334	for (i = `0`; i < nentries; i++)
6335	{
6336	/*
6337	* For partial match we haven't any information to estimate number of
6338	* matched entries in index, so, we just estimate it as 100
6339	*/
6340	if (partial_matches && partial_matches[i])
6341	counts->partialEntries += `100`;
6342	else
6343	counts->exactEntries++;
6344
6345	counts->searchEntries++;
6346	}
6347
6348	if (searchMode == GIN_SEARCH_MODE_INCLUDE_EMPTY)
6349	{
6350	/ Treat "include empty" like an exact-match item /
6351	counts->exactEntries++;
6352	counts->searchEntries++;
6353	}
6354	else if (searchMode != GIN_SEARCH_MODE_DEFAULT)
6355	{
6356	/ It's GIN_SEARCH_MODE_ALL /
6357	counts->haveFullScan = true;
6358	}
6359
6360	return true;
6361	}
6362
6363	/*
6364	* Estimate the number of index terms that need to be searched for while
6365	* testing the given GIN index clause, and increment the counts in *counts
6366	* appropriately. If the query is unsatisfiable, return false.
6367	*/
6368	static bool
6369	gincost_opexpr(PlannerInfo *root,
6370	IndexOptInfo *index,
6371	int indexcol,
6372	OpExpr *clause,
6373	GinQualCounts *counts)
6374	{
6375	Oid clause_op = clause->opno;
6376	Node operand = (Node ) lsecond(clause->args);
6377
6378	/ aggressively reduce to a constant, and look through relabeling /
6379	operand = estimate_expression_value(root, operand);
6380
6381	if (IsA(operand, RelabelType))
6382	operand = (Node ) ((RelabelType ) operand)->arg;
6383
6384	/*
6385	* It's impossible to call extractQuery method for unknown operand. So
6386	* unless operand is a Const we can't do much; just assume there will be
6387	* one ordinary search entry from the operand at runtime.
6388	*/
6389	if (!IsA(operand, Const))
6390	{
6391	counts->exactEntries++;
6392	counts->searchEntries++;
6393	return true;
6394	}
6395
6396	/ If Const is null, there can be no matches /
6397	if (((Const *) operand)->constisnull)
6398	return false;
6399
6400	/ Otherwise, apply extractQuery and get the actual term counts /
6401	return gincost_pattern(index, indexcol, clause_op,
6402	((Const *) operand)->constvalue,
6403	counts);
6404	}
6405
6406	/*
6407	* Estimate the number of index terms that need to be searched for while
6408	* testing the given GIN index clause, and increment the counts in *counts
6409	* appropriately. If the query is unsatisfiable, return false.
6410	*
6411	* A ScalarArrayOpExpr will give rise to N separate indexscans at runtime,
6412	* each of which involves one value from the RHS array, plus all the
6413	* non-array quals (if any). To model this, we average the counts across
6414	* the RHS elements, and add the averages to the counts in *counts (which
6415	* correspond to per-indexscan costs). We also multiply counts->arrayScans
6416	* by N, causing gincostestimate to scale up its estimates accordingly.
6417	*/
6418	static bool
6419	gincost_scalararrayopexpr(PlannerInfo *root,
6420	IndexOptInfo *index,
6421	int indexcol,
6422	ScalarArrayOpExpr *clause,
6423	double numIndexEntries,
6424	GinQualCounts *counts)
6425	{
6426	Oid clause_op = clause->opno;
6427	Node rightop = (Node ) lsecond(clause->args);
6428	ArrayType *arrayval;
6429	int16 elmlen;
6430	bool elmbyval;
6431	char elmalign;
6432	int numElems;
6433	Datum *elemValues;
6434	bool *elemNulls;
6435	GinQualCounts arraycounts;
6436	int numPossible = `0`;
6437	int i;
6438
6439	Assert(clause->useOr);
6440
6441	/ aggressively reduce to a constant, and look through relabeling /
6442	rightop = estimate_expression_value(root, rightop);
6443
6444	if (IsA(rightop, RelabelType))
6445	rightop = (Node ) ((RelabelType ) rightop)->arg;
6446
6447	/*
6448	* It's impossible to call extractQuery method for unknown operand. So
6449	* unless operand is a Const we can't do much; just assume there will be
6450	* one ordinary search entry from each array entry at runtime, and fall
6451	* back on a probably-bad estimate of the number of array entries.
6452	*/
6453	if (!IsA(rightop, Const))
6454	{
6455	counts->exactEntries++;
6456	counts->searchEntries++;
6457	counts->arrayScans *= estimate_array_length(rightop);
6458	return true;
6459	}
6460
6461	/ If Const is null, there can be no matches /
6462	if (((Const *) rightop)->constisnull)
6463	return false;
6464
6465	/ Otherwise, extract the array elements and iterate over them /
6466	arrayval = DatumGetArrayTypeP(((Const *) rightop)->constvalue);
6467	get_typlenbyvalalign(ARR_ELEMTYPE(arrayval),
6468	&elmlen, &elmbyval, &elmalign);
6469	deconstruct_array(arrayval,
6470	ARR_ELEMTYPE(arrayval),
6471	elmlen, elmbyval, elmalign,
6472	&elemValues, &elemNulls, &numElems);
6473
6474	memset(&arraycounts, `0`, sizeof(arraycounts));
6475
6476	for (i = `0`; i < numElems; i++)
6477	{
6478	GinQualCounts elemcounts;
6479
6480	/ NULL can't match anything, so ignore, as the executor will /
6481	if (elemNulls[i])
6482	continue;
6483
6484	/ Otherwise, apply extractQuery and get the actual term counts /
6485	memset(&elemcounts, `0`, sizeof(elemcounts));
6486
6487	if (gincost_pattern(index, indexcol, clause_op, elemValues[i],
6488	&elemcounts))
6489	{
6490	/ We ignore array elements that are unsatisfiable patterns /
6491	numPossible++;
6492
6493	if (elemcounts.haveFullScan)
6494	{
6495	/*
6496	* Full index scan will be required. We treat this as if
6497	* every key in the index had been listed in the query; is
6498	* that reasonable?
6499	*/
6500	elemcounts.partialEntries = `0`;
6501	elemcounts.exactEntries = numIndexEntries;
6502	elemcounts.searchEntries = numIndexEntries;
6503	}
6504	arraycounts.partialEntries += elemcounts.partialEntries;
6505	arraycounts.exactEntries += elemcounts.exactEntries;
6506	arraycounts.searchEntries += elemcounts.searchEntries;
6507	}
6508	}
6509
6510	if (numPossible == `0`)
6511	{
6512	/ No satisfiable patterns in the array /
6513	return false;
6514	}
6515
6516	/*
6517	* Now add the averages to the global counts. This will give us an
6518	* estimate of the average number of terms searched for in each indexscan,
6519	* including contributions from both array and non-array quals.
6520	*/
6521	counts->partialEntries += arraycounts.partialEntries / numPossible;
6522	counts->exactEntries += arraycounts.exactEntries / numPossible;
6523	counts->searchEntries += arraycounts.searchEntries / numPossible;
6524
6525	counts->arrayScans *= numPossible;
6526
6527	return true;
6528	}
6529
6530	/*
6531	* GIN has search behavior completely different from other index types
6532	*/
6533	void
6534	gincostestimate(PlannerInfo root, IndexPath path, double loop_count,
6535	Cost indexStartupCost, Cost indexTotalCost,
6536	Selectivity indexSelectivity, double* *indexCorrelation,
6537	double *indexPages)
6538	{
6539	IndexOptInfo *index = path->indexinfo;
6540	List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
6541	List *selectivityQuals;
6542	double numPages = index->pages,
6543	numTuples = index->tuples;
6544	double numEntryPages,
6545	numDataPages,
6546	numPendingPages,
6547	numEntries;
6548	GinQualCounts counts;
6549	bool matchPossible;
6550	double partialScale;
6551	double entryPagesFetched,
6552	dataPagesFetched,
6553	dataPagesFetchedBySel;
6554	double qual_op_cost,
6555	qual_arg_cost,
6556	spc_random_page_cost,
6557	outer_scans;
6558	Relation indexRel;
6559	GinStatsData ginStats;
6560	ListCell *lc;
6561
6562	/*
6563	* Obtain statistical information from the meta page, if possible. Else
6564	* set ginStats to zeroes, and we'll cope below.
6565	*/
6566	if (!index->hypothetical)
6567	{
6568	/ Lock should have already been obtained in plancat.c /
6569	indexRel = index_open(index->indexoid, NoLock);
6570	ginGetStats(indexRel, &ginStats);
6571	index_close(indexRel, NoLock);
6572	}
6573	else
6574	{
6575	memset(&ginStats, `0`, sizeof(ginStats));
6576	}
6577
6578	/*
6579	* Assuming we got valid (nonzero) stats at all, nPendingPages can be
6580	* trusted, but the other fields are data as of the last VACUUM. We can
6581	* scale them up to account for growth since then, but that method only
6582	* goes so far; in the worst case, the stats might be for a completely
6583	* empty index, and scaling them will produce pretty bogus numbers.
6584	* Somewhat arbitrarily, set the cutoff for doing scaling at 4X growth; if
6585	* it's grown more than that, fall back to estimating things only from the
6586	* assumed-accurate index size. But we'll trust nPendingPages in any case
6587	* so long as it's not clearly insane, ie, more than the index size.
6588	*/
6589	if (ginStats.nPendingPages < numPages)
6590	numPendingPages = ginStats.nPendingPages;
6591	else
6592	numPendingPages = `0`;
6593
6594	if (numPages > `0` && ginStats.nTotalPages <= numPages &&
6595	ginStats.nTotalPages > numPages / `4` &&
6596	ginStats.nEntryPages > `0` && ginStats.nEntries > `0`)
6597	{
6598	/*
6599	* OK, the stats seem close enough to sane to be trusted. But we
6600	* still need to scale them by the ratio numPages / nTotalPages to
6601	* account for growth since the last VACUUM.
6602	*/
6603	double scale = numPages / ginStats.nTotalPages;
6604
6605	numEntryPages = ceil(ginStats.nEntryPages * scale);
6606	numDataPages = ceil(ginStats.nDataPages * scale);
6607	numEntries = ceil(ginStats.nEntries * scale);
6608	/ ensure we didn't round up too much /
6609	numEntryPages = Min(numEntryPages, numPages - numPendingPages);
6610	numDataPages = Min(numDataPages,
6611	numPages - numPendingPages - numEntryPages);
6612	}
6613	else
6614	{
6615	/*
6616	* We might get here because it's a hypothetical index, or an index
6617	* created pre-9.1 and never vacuumed since upgrading (in which case
6618	* its stats would read as zeroes), or just because it's grown too
6619	* much since the last VACUUM for us to put our faith in scaling.
6620	*
6621	* Invent some plausible internal statistics based on the index page
6622	* count (and clamp that to at least 10 pages, just in case). We
6623	* estimate that 90% of the index is entry pages, and the rest is data
6624	* pages. Estimate 100 entries per entry page; this is rather bogus
6625	* since it'll depend on the size of the keys, but it's more robust
6626	* than trying to predict the number of entries per heap tuple.
6627	*/
6628	numPages = Max(numPages, `10`);
6629	numEntryPages = floor((numPages - numPendingPages) * `0.90`);
6630	numDataPages = numPages - numPendingPages - numEntryPages;
6631	numEntries = floor(numEntryPages * `100`);
6632	}
6633
6634	/ In an empty index, numEntries could be zero. Avoid divide-by-zero /
6635	if (numEntries < `1`)
6636	numEntries = `1`;
6637
6638	/*
6639	* If the index is partial, AND the index predicate with the index-bound
6640	* quals to produce a more accurate idea of the number of rows covered by
6641	* the bound conditions.
6642	*/
6643	selectivityQuals = add_predicate_to_index_quals(index, indexQuals);
6644
6645	/ Estimate the fraction of main-table tuples that will be visited /
6646	*indexSelectivity = clauselist_selectivity(root, selectivityQuals,
6647	index->rel->relid,
6648	JOIN_INNER,
6649	NULL);
6650
6651	/ fetch estimated page cost for tablespace containing index /
6652	get_tablespace_page_costs(index->reltablespace,
6653	&spc_random_page_cost,
6654	NULL);
6655
6656	/*
6657	* Generic assumption about index correlation: there isn't any.
6658	*/
6659	*indexCorrelation = `0.0`;
6660
6661	/*
6662	* Examine quals to estimate number of search entries & partial matches
6663	*/
6664	memset(&counts, `0`, sizeof(counts));
6665	counts.arrayScans = `1`;
6666	matchPossible = true;
6667
6668	foreach(lc, path->indexclauses)
6669	{
6670	IndexClause *iclause = lfirst_node(IndexClause, lc);
6671	ListCell *lc2;
6672
6673	foreach(lc2, iclause->indexquals)
6674	{
6675	RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
6676	Expr *clause = rinfo->clause;
6677
6678	if (IsA(clause, OpExpr))
6679	{
6680	matchPossible = gincost_opexpr(root,
6681	index,
6682	iclause->indexcol,
6683	(OpExpr *) clause,
6684	&counts);
6685	if (!matchPossible)
6686	break;
6687	}
6688	else if (IsA(clause, ScalarArrayOpExpr))
6689	{
6690	matchPossible = gincost_scalararrayopexpr(root,
6691	index,
6692	iclause->indexcol,
6693	(ScalarArrayOpExpr *) clause,
6694	numEntries,
6695	&counts);
6696	if (!matchPossible)
6697	break;
6698	}
6699	else
6700	{
6701	/ shouldn't be anything else for a GIN index /
6702	elog(ERROR, "unsupported GIN indexqual type: %d",
6703	(int) nodeTag(clause));
6704	}
6705	}
6706	}
6707
6708	/ Fall out if there were any provably-unsatisfiable quals /
6709	if (!matchPossible)
6710	{
6711	*indexStartupCost = `0`;
6712	*indexTotalCost = `0`;
6713	*indexSelectivity = `0`;
6714	return;
6715	}
6716
6717	if (counts.haveFullScan \|\| indexQuals == NIL)
6718	{
6719	/*
6720	* Full index scan will be required. We treat this as if every key in
6721	* the index had been listed in the query; is that reasonable?
6722	*/
6723	counts.partialEntries = `0`;
6724	counts.exactEntries = numEntries;
6725	counts.searchEntries = numEntries;
6726	}
6727
6728	/ Will we have more than one iteration of a nestloop scan? /
6729	outer_scans = loop_count;
6730
6731	/*
6732	* Compute cost to begin scan, first of all, pay attention to pending
6733	* list.
6734	*/
6735	entryPagesFetched = numPendingPages;
6736
6737	/*
6738	* Estimate number of entry pages read. We need to do
6739	* counts.searchEntries searches. Use a power function as it should be,
6740	* but tuples on leaf pages usually is much greater. Here we include all
6741	* searches in entry tree, including search of first entry in partial
6742	* match algorithm
6743	*/
6744	entryPagesFetched += ceil(counts.searchEntries * rint(pow(numEntryPages, `0.15`)));
6745
6746	/*
6747	* Add an estimate of entry pages read by partial match algorithm. It's a
6748	* scan over leaf pages in entry tree. We haven't any useful stats here,
6749	* so estimate it as proportion. Because counts.partialEntries is really
6750	* pretty bogus (see code above), it's possible that it is more than
6751	* numEntries; clamp the proportion to ensure sanity.
6752	*/
6753	partialScale = counts.partialEntries / numEntries;
6754	partialScale = Min(partialScale, `1.0`);
6755
6756	entryPagesFetched += ceil(numEntryPages * partialScale);
6757
6758	/*
6759	* Partial match algorithm reads all data pages before doing actual scan,
6760	* so it's a startup cost. Again, we haven't any useful stats here, so
6761	* estimate it as proportion.
6762	*/
6763	dataPagesFetched = ceil(numDataPages * partialScale);
6764
6765	/*
6766	* Calculate cache effects if more than one scan due to nestloops or array
6767	* quals. The result is pro-rated per nestloop scan, but the array qual
6768	* factor shouldn't be pro-rated (compare genericcostestimate).
6769	*/
6770	if (outer_scans > `1` \|\| counts.arrayScans > `1`)
6771	{
6772	entryPagesFetched = outer_scans counts.arrayScans;
6773	entryPagesFetched = index_pages_fetched(entryPagesFetched,
6774	(BlockNumber) numEntryPages,
6775	numEntryPages, root);
6776	entryPagesFetched /= outer_scans;
6777	dataPagesFetched = outer_scans counts.arrayScans;
6778	dataPagesFetched = index_pages_fetched(dataPagesFetched,
6779	(BlockNumber) numDataPages,
6780	numDataPages, root);
6781	dataPagesFetched /= outer_scans;
6782	}
6783
6784	/*
6785	* Here we use random page cost because logically-close pages could be far
6786	* apart on disk.
6787	*/
6788	indexStartupCost = (entryPagesFetched + dataPagesFetched) spc_random_page_cost;
6789
6790	/*
6791	* Now compute the number of data pages fetched during the scan.
6792	*
6793	* We assume every entry to have the same number of items, and that there
6794	* is no overlap between them. (XXX: tsvector and array opclasses collect
6795	* statistics on the frequency of individual keys; it would be nice to use
6796	* those here.)
6797	*/
6798	dataPagesFetched = ceil(numDataPages * counts.exactEntries / numEntries);
6799
6800	/*
6801	* If there is a lot of overlap among the entries, in particular if one of
6802	* the entries is very frequent, the above calculation can grossly
6803	* under-estimate. As a simple cross-check, calculate a lower bound based
6804	* on the overall selectivity of the quals. At a minimum, we must read
6805	* one item pointer for each matching entry.
6806	*
6807	* The width of each item pointer varies, based on the level of
6808	* compression. We don't have statistics on that, but an average of
6809	* around 3 bytes per item is fairly typical.
6810	*/
6811	dataPagesFetchedBySel = ceil(indexSelectivity
6812	(numTuples / (BLCKSZ / `3`)));
6813	if (dataPagesFetchedBySel > dataPagesFetched)
6814	dataPagesFetched = dataPagesFetchedBySel;
6815
6816	/ Account for cache effects, the same as above /
6817	if (outer_scans > `1` \|\| counts.arrayScans > `1`)
6818	{
6819	dataPagesFetched = outer_scans counts.arrayScans;
6820	dataPagesFetched = index_pages_fetched(dataPagesFetched,
6821	(BlockNumber) numDataPages,
6822	numDataPages, root);
6823	dataPagesFetched /= outer_scans;
6824	}
6825
6826	/ And apply random_page_cost as the cost per page /
6827	indexTotalCost = indexStartupCost +
6828	dataPagesFetched * spc_random_page_cost;
6829
6830	/*
6831	* Add on index qual eval costs, much as in genericcostestimate. But we
6832	* can disregard indexorderbys, since GIN doesn't support those.
6833	*/
6834	qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
6835	qual_op_cost = cpu_operator_cost * list_length(indexQuals);
6836
6837	*indexStartupCost += qual_arg_cost;
6838	*indexTotalCost += qual_arg_cost;
6839	indexTotalCost += (numTuples indexSelectivity) (cpu_index_tuple_cost + qual_op_cost);
6840	*indexPages = dataPagesFetched;
6841	}
6842
6843	/*
6844	* BRIN has search behavior completely different from other index types
6845	*/
6846	void
6847	brincostestimate(PlannerInfo root, IndexPath path, double loop_count,
6848	Cost indexStartupCost, Cost indexTotalCost,
6849	Selectivity indexSelectivity, double* *indexCorrelation,
6850	double *indexPages)
6851	{
6852	IndexOptInfo *index = path->indexinfo;
6853	List *indexQuals = get_quals_from_indexclauses(path->indexclauses);
6854	double numPages = index->pages;
6855	RelOptInfo *baserel = index->rel;
6856	RangeTblEntry *rte = planner_rt_fetch(baserel->relid, root);
6857	Cost spc_seq_page_cost;
6858	Cost spc_random_page_cost;
6859	double qual_arg_cost;
6860	double qualSelectivity;
6861	BrinStatsData statsData;
6862	double indexRanges;
6863	double minimalRanges;
6864	double estimatedRanges;
6865	double selec;
6866	Relation indexRel;
6867	ListCell *l;
6868	VariableStatData vardata;
6869
6870	Assert(rte->rtekind == RTE_RELATION);
6871
6872	/ fetch estimated page cost for the tablespace containing the index /
6873	get_tablespace_page_costs(index->reltablespace,
6874	&spc_random_page_cost,
6875	&spc_seq_page_cost);
6876
6877	/*
6878	* Obtain some data from the index itself. A lock should have already
6879	* been obtained on the index in plancat.c.
6880	*/
6881	indexRel = index_open(index->indexoid, NoLock);
6882	brinGetStats(indexRel, &statsData);
6883	index_close(indexRel, NoLock);
6884
6885	/*
6886	* Compute index correlation
6887	*
6888	* Because we can use all index quals equally when scanning, we can use
6889	* the largest correlation (in absolute value) among columns used by the
6890	* query. Start at zero, the worst possible case. If we cannot find any
6891	* correlation statistics, we will keep it as 0.
6892	*/
6893	*indexCorrelation = `0`;
6894
6895	foreach(l, path->indexclauses)
6896	{
6897	IndexClause *iclause = lfirst_node(IndexClause, l);
6898	AttrNumber attnum = index->indexkeys[iclause->indexcol];
6899
6900	/ attempt to lookup stats in relation for this index column /
6901	if (attnum != `0`)
6902	{
6903	/ Simple variable -- look to stats for the underlying table /
6904	if (get_relation_stats_hook &&
6905	(*get_relation_stats_hook) (root, rte, attnum, &vardata))
6906	{
6907	/*
6908	* The hook took control of acquiring a stats tuple. If it
6909	* did supply a tuple, it'd better have supplied a freefunc.
6910	*/
6911	if (HeapTupleIsValid(vardata.statsTuple) && !vardata.freefunc)
6912	elog(ERROR,
6913	"no function provided to release variable stats with");
6914	}
6915	else
6916	{
6917	vardata.statsTuple =
6918	SearchSysCache3(STATRELATTINH,
6919	ObjectIdGetDatum(rte->relid),
6920	Int16GetDatum(attnum),
6921	BoolGetDatum(false));
6922	vardata.freefunc = ReleaseSysCache;
6923	}
6924	}
6925	else
6926	{
6927	/*
6928	* Looks like we've found an expression column in the index. Let's
6929	* see if there's any stats for it.
6930	*/
6931
6932	/ get the attnum from the 0-based index. /
6933	attnum = iclause->indexcol + `1`;
6934
6935	if (get_index_stats_hook &&
6936	(*get_index_stats_hook) (root, index->indexoid, attnum, &vardata))
6937	{
6938	/*
6939	* The hook took control of acquiring a stats tuple. If it
6940	* did supply a tuple, it'd better have supplied a freefunc.
6941	*/
6942	if (HeapTupleIsValid(vardata.statsTuple) &&
6943	!vardata.freefunc)
6944	elog(ERROR, "no function provided to release variable stats with");
6945	}
6946	else
6947	{
6948	vardata.statsTuple = SearchSysCache3(STATRELATTINH,
6949	ObjectIdGetDatum(index->indexoid),
6950	Int16GetDatum(attnum),
6951	BoolGetDatum(false));
6952	vardata.freefunc = ReleaseSysCache;
6953	}
6954	}
6955
6956	if (HeapTupleIsValid(vardata.statsTuple))
6957	{
6958	AttStatsSlot sslot;
6959
6960	if (get_attstatsslot(&sslot, vardata.statsTuple,
6961	STATISTIC_KIND_CORRELATION, InvalidOid,
6962	ATTSTATSSLOT_NUMBERS))
6963	{
6964	double varCorrelation = `0.0`;
6965
6966	if (sslot.nnumbers > `0`)
6967	varCorrelation = Abs(sslot.numbers[`0`]);
6968
6969	if (varCorrelation > *indexCorrelation)
6970	*indexCorrelation = varCorrelation;
6971
6972	free_attstatsslot(&sslot);
6973	}
6974	}
6975
6976	ReleaseVariableStats(vardata);
6977	}
6978
6979	qualSelectivity = clauselist_selectivity(root, indexQuals,
6980	baserel->relid,
6981	JOIN_INNER, NULL);
6982
6983	/ work out the actual number of ranges in the index /
6984	indexRanges = Max(ceil((double) baserel->pages / statsData.pagesPerRange),
6985	`1.0`);
6986
6987	/*
6988	* Now calculate the minimum possible ranges we could match with if all of
6989	* the rows were in the perfect order in the table's heap.
6990	*/
6991	minimalRanges = ceil(indexRanges * qualSelectivity);
6992
6993	/*
6994	* Now estimate the number of ranges that we'll touch by using the
6995	* indexCorrelation from the stats. Careful not to divide by zero (note
6996	* we're using the absolute value of the correlation).
6997	*/
6998	if (*indexCorrelation < `1.0e-10`)
6999	estimatedRanges = indexRanges;
7000	else
7001	estimatedRanges = Min(minimalRanges / *indexCorrelation, indexRanges);
7002
7003	/ we expect to visit this portion of the table /
7004	selec = estimatedRanges / indexRanges;
7005
7006	CLAMP_PROBABILITY(selec);
7007
7008	*indexSelectivity = selec;
7009
7010	/*
7011	* Compute the index qual costs, much as in genericcostestimate, to add to
7012	* the index costs. We can disregard indexorderbys, since BRIN doesn't
7013	* support those.
7014	*/
7015	qual_arg_cost = index_other_operands_eval_cost(root, indexQuals);
7016
7017	/*
7018	* Compute the startup cost as the cost to read the whole revmap
7019	* sequentially, including the cost to execute the index quals.
7020	*/
7021	*indexStartupCost =
7022	spc_seq_page_cost * statsData.revmapNumPages * loop_count;
7023	*indexStartupCost += qual_arg_cost;
7024
7025	/*
7026	* To read a BRIN index there might be a bit of back and forth over
7027	* regular pages, as revmap might point to them out of sequential order;
7028	* calculate the total cost as reading the whole index in random order.
7029	*/
7030	indexTotalCost = indexStartupCost +
7031	spc_random_page_cost * (numPages - statsData.revmapNumPages) * loop_count;
7032
7033	/*
7034	* Charge a small amount per range tuple which we expect to match to. This
7035	* is meant to reflect the costs of manipulating the bitmap. The BRIN scan
7036	* will set a bit for each page in the range when we find a matching
7037	* range, so we must multiply the charge by the number of pages in the
7038	* range.
7039	*/
7040	indexTotalCost += `0.1` cpu_operator_cost * estimatedRanges *
7041	statsData.pagesPerRange;
7042
7043	*indexPages = index->pages;
7044	}
7045

Browse the source code of PostgreSQL/src/backend/utils/adt/selfuncs.c