analyzejoins.c source code [PostgreSQL/src/backend/optimizer/plan/analyzejoins.c]

1	/-------------------------------------------------------------------------*
2	*
3	* analyzejoins.c
4	* Routines for simplifying joins after initial query analysis
5	*
6	* While we do a great deal of join simplification in prep/prepjointree.c,
7	* certain optimizations cannot be performed at that stage for lack of
8	* detailed information about the query. The routines here are invoked
9	* after initsplan.c has done its work, and can do additional join removal
10	* and simplification steps based on the information extracted. The penalty
11	* is that we have to work harder to clean up after ourselves when we modify
12	* the query, since the derived data structures have to be updated too.
13	*
14	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
15	* Portions Copyright (c) 1994, Regents of the University of California
16	*
17	*
18	* IDENTIFICATION
19	* src/backend/optimizer/plan/analyzejoins.c
20	*
21	*-------------------------------------------------------------------------
22	*/
23	#include "postgres.h"
24
25	#include "nodes/nodeFuncs.h"
26	#include "optimizer/clauses.h"
27	#include "optimizer/joininfo.h"
28	#include "optimizer/optimizer.h"
29	#include "optimizer/pathnode.h"
30	#include "optimizer/paths.h"
31	#include "optimizer/planmain.h"
32	#include "optimizer/tlist.h"
33	#include "utils/lsyscache.h"
34
35	/ local functions /
36	static bool join_is_removable(PlannerInfo root, SpecialJoinInfo sjinfo);
37	static void remove_rel_from_query(PlannerInfo root, int* relid,
38	Relids joinrelids);
39	static List remove_rel_from_joinlist(List joinlist, int relid, int *nremoved);
40	static bool rel_supports_distinctness(PlannerInfo root, RelOptInfo rel);
41	static bool rel_is_distinct_for(PlannerInfo root, RelOptInfo rel,
42	List *clause_list);
43	static Oid distinct_col_search(int colno, List colnos, List opids);
44	static bool is_innerrel_unique_for(PlannerInfo *root,
45	Relids joinrelids,
46	Relids outerrelids,
47	RelOptInfo *innerrel,
48	JoinType jointype,
49	List *restrictlist);
50
51
52	/*
53	* remove_useless_joins
54	* Check for relations that don't actually need to be joined at all,
55	* and remove them from the query.
56	*
57	* We are passed the current joinlist and return the updated list. Other
58	* data structures that have to be updated are accessible via "root".
59	*/
60	List *
61	remove_useless_joins(PlannerInfo root, List joinlist)
62	{
63	ListCell *lc;
64
65	/*
66	* We are only interested in relations that are left-joined to, so we can
67	* scan the join_info_list to find them easily.
68	*/
69	restart:
70	foreach(lc, root->join_info_list)
71	{
72	SpecialJoinInfo sjinfo = (SpecialJoinInfo ) lfirst(lc);
73	int innerrelid;
74	int nremoved;
75
76	/ Skip if not removable /
77	if (!join_is_removable(root, sjinfo))
78	continue;
79
80	/*
81	* Currently, join_is_removable can only succeed when the sjinfo's
82	* righthand is a single baserel. Remove that rel from the query and
83	* joinlist.
84	*/
85	innerrelid = bms_singleton_member(sjinfo->min_righthand);
86
87	remove_rel_from_query(root, innerrelid,
88	bms_union(sjinfo->min_lefthand,
89	sjinfo->min_righthand));
90
91	/ We verify that exactly one reference gets removed from joinlist /
92	nremoved = `0`;
93	joinlist = remove_rel_from_joinlist(joinlist, innerrelid, &nremoved);
94	if (nremoved != `1`)
95	elog(ERROR, "failed to find relation %d in joinlist", innerrelid);
96
97	/*
98	* We can delete this SpecialJoinInfo from the list too, since it's no
99	* longer of interest.
100	*/
101	root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
102
103	/*
104	* Restart the scan. This is necessary to ensure we find all
105	* removable joins independently of ordering of the join_info_list
106	* (note that removal of attr_needed bits may make a join appear
107	* removable that did not before). Also, since we just deleted the
108	* current list cell, we'd have to have some kluge to continue the
109	* list scan anyway.
110	*/
111	goto restart;
112	}
113
114	return joinlist;
115	}
116
117	/*
118	* clause_sides_match_join
119	* Determine whether a join clause is of the right form to use in this join.
120	*
121	* We already know that the clause is a binary opclause referencing only the
122	* rels in the current join. The point here is to check whether it has the
123	* form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
124	* rather than mixing outer and inner vars on either side. If it matches,
125	* we set the transient flag outer_is_left to identify which side is which.
126	*/
127	static inline bool
128	clause_sides_match_join(RestrictInfo *rinfo, Relids outerrelids,
129	Relids innerrelids)
130	{
131	if (bms_is_subset(rinfo->left_relids, outerrelids) &&
132	bms_is_subset(rinfo->right_relids, innerrelids))
133	{
134	/ lefthand side is outer /
135	rinfo->outer_is_left = true;
136	return true;
137	}
138	else if (bms_is_subset(rinfo->left_relids, innerrelids) &&
139	bms_is_subset(rinfo->right_relids, outerrelids))
140	{
141	/ righthand side is outer /
142	rinfo->outer_is_left = false;
143	return true;
144	}
145	return false; / no good for these input relations /
146	}
147
148	/*
149	* join_is_removable
150	* Check whether we need not perform this special join at all, because
151	* it will just duplicate its left input.
152	*
153	* This is true for a left join for which the join condition cannot match
154	* more than one inner-side row. (There are other possibly interesting
155	* cases, but we don't have the infrastructure to prove them.) We also
156	* have to check that the inner side doesn't generate any variables needed
157	* above the join.
158	*/
159	static bool
160	join_is_removable(PlannerInfo root, SpecialJoinInfo sjinfo)
161	{
162	int innerrelid;
163	RelOptInfo *innerrel;
164	Relids joinrelids;
165	List *clause_list = NIL;
166	ListCell *l;
167	int attroff;
168
169	/*
170	* Must be a non-delaying left join to a single baserel, else we aren't
171	* going to be able to do anything with it.
172	*/
173	if (sjinfo->jointype != JOIN_LEFT \|\|
174	sjinfo->delay_upper_joins)
175	return false;
176
177	if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
178	return false;
179
180	innerrel = find_base_rel(root, innerrelid);
181
182	/*
183	* Before we go to the effort of checking whether any innerrel variables
184	* are needed above the join, make a quick check to eliminate cases in
185	* which we will surely be unable to prove uniqueness of the innerrel.
186	*/
187	if (!rel_supports_distinctness(root, innerrel))
188	return false;
189
190	/ Compute the relid set for the join we are considering /
191	joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
192
193	/*
194	* We can't remove the join if any inner-rel attributes are used above the
195	* join.
196	*
197	* Note that this test only detects use of inner-rel attributes in higher
198	* join conditions and the target list. There might be such attributes in
199	* pushed-down conditions at this join, too. We check that case below.
200	*
201	* As a micro-optimization, it seems better to start with max_attr and
202	* count down rather than starting with min_attr and counting up, on the
203	* theory that the system attributes are somewhat less likely to be wanted
204	* and should be tested last.
205	*/
206	for (attroff = innerrel->max_attr - innerrel->min_attr;
207	attroff >= `0`;
208	attroff--)
209	{
210	if (!bms_is_subset(innerrel->attr_needed[attroff], joinrelids))
211	return false;
212	}
213
214	/*
215	* Similarly check that the inner rel isn't needed by any PlaceHolderVars
216	* that will be used above the join. We only need to fail if such a PHV
217	* actually references some inner-rel attributes; but the correct check
218	* for that is relatively expensive, so we first check against ph_eval_at,
219	* which must mention the inner rel if the PHV uses any inner-rel attrs as
220	* non-lateral references. Note that if the PHV's syntactic scope is just
221	* the inner rel, we can't drop the rel even if the PHV is variable-free.
222	*/
223	foreach(l, root->placeholder_list)
224	{
225	PlaceHolderInfo phinfo = (PlaceHolderInfo ) lfirst(l);
226
227	if (bms_overlap(phinfo->ph_lateral, innerrel->relids))
228	return false; / it references innerrel laterally /
229	if (bms_is_subset(phinfo->ph_needed, joinrelids))
230	continue; / PHV is not used above the join /
231	if (!bms_overlap(phinfo->ph_eval_at, innerrel->relids))
232	continue; / it definitely doesn't reference innerrel /
233	if (bms_is_subset(phinfo->ph_eval_at, innerrel->relids))
234	return false; / there isn't any other place to eval PHV /
235	if (bms_overlap(pull_varnos((Node *) phinfo->ph_var->phexpr),
236	innerrel->relids))
237	return false; / it does reference innerrel /
238	}
239
240	/*
241	* Search for mergejoinable clauses that constrain the inner rel against
242	* either the outer rel or a pseudoconstant. If an operator is
243	* mergejoinable then it behaves like equality for some btree opclass, so
244	* it's what we want. The mergejoinability test also eliminates clauses
245	* containing volatile functions, which we couldn't depend on.
246	*/
247	foreach(l, innerrel->joininfo)
248	{
249	RestrictInfo restrictinfo = (RestrictInfo ) lfirst(l);
250
251	/*
252	* If it's not a join clause for this outer join, we can't use it.
253	* Note that if the clause is pushed-down, then it is logically from
254	* above the outer join, even if it references no other rels (it might
255	* be from WHERE, for example).
256	*/
257	if (RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
258	{
259	/*
260	* If such a clause actually references the inner rel then join
261	* removal has to be disallowed. We have to check this despite
262	* the previous attr_needed checks because of the possibility of
263	* pushed-down clauses referencing the rel.
264	*/
265	if (bms_is_member(innerrelid, restrictinfo->clause_relids))
266	return false;
267	continue; / else, ignore; not useful here /
268	}
269
270	/ Ignore if it's not a mergejoinable clause /
271	if (!restrictinfo->can_join \|\|
272	restrictinfo->mergeopfamilies == NIL)
273	continue; / not mergejoinable /
274
275	/*
276	* Check if clause has the form "outer op inner" or "inner op outer",
277	* and if so mark which side is inner.
278	*/
279	if (!clause_sides_match_join(restrictinfo, sjinfo->min_lefthand,
280	innerrel->relids))
281	continue; / no good for these input relations /
282
283	/ OK, add to list /
284	clause_list = lappend(clause_list, restrictinfo);
285	}
286
287	/*
288	* Now that we have the relevant equality join clauses, try to prove the
289	* innerrel distinct.
290	*/
291	if (rel_is_distinct_for(root, innerrel, clause_list))
292	return true;
293
294	/*
295	* Some day it would be nice to check for other methods of establishing
296	* distinctness.
297	*/
298	return false;
299	}
300
301
302	/*
303	* Remove the target relid from the planner's data structures, having
304	* determined that there is no need to include it in the query.
305	*
306	* We are not terribly thorough here. We must make sure that the rel is
307	* no longer treated as a baserel, and that attributes of other baserels
308	* are no longer marked as being needed at joins involving this rel.
309	* Also, join quals involving the rel have to be removed from the joininfo
310	* lists, but only if they belong to the outer join identified by joinrelids.
311	*/
312	static void
313	remove_rel_from_query(PlannerInfo root, int* relid, Relids joinrelids)
314	{
315	RelOptInfo *rel = find_base_rel(root, relid);
316	List *joininfos;
317	Index rti;
318	ListCell *l;
319	ListCell *nextl;
320
321	/*
322	* Mark the rel as "dead" to show it is no longer part of the join tree.
323	* (Removing it from the baserel array altogether seems too risky.)
324	*/
325	rel->reloptkind = RELOPT_DEADREL;
326
327	/*
328	* Remove references to the rel from other baserels' attr_needed arrays.
329	*/
330	for (rti = `1`; rti < root->simple_rel_array_size; rti++)
331	{
332	RelOptInfo *otherrel = root->simple_rel_array[rti];
333	int attroff;
334
335	/ there may be empty slots corresponding to non-baserel RTEs /
336	if (otherrel == NULL)
337	continue;
338
339	Assert(otherrel->relid == rti); / sanity check on array /
340
341	/ no point in processing target rel itself /
342	if (otherrel == rel)
343	continue;
344
345	for (attroff = otherrel->max_attr - otherrel->min_attr;
346	attroff >= `0`;
347	attroff--)
348	{
349	otherrel->attr_needed[attroff] =
350	bms_del_member(otherrel->attr_needed[attroff], relid);
351	}
352	}
353
354	/*
355	* Likewise remove references from SpecialJoinInfo data structures.
356	*
357	* This is relevant in case the outer join we're deleting is nested inside
358	* other outer joins: the upper joins' relid sets have to be adjusted. The
359	* RHS of the target outer join will be made empty here, but that's OK
360	* since caller will delete that SpecialJoinInfo entirely.
361	*/
362	foreach(l, root->join_info_list)
363	{
364	SpecialJoinInfo sjinfo = (SpecialJoinInfo ) lfirst(l);
365
366	sjinfo->min_lefthand = bms_del_member(sjinfo->min_lefthand, relid);
367	sjinfo->min_righthand = bms_del_member(sjinfo->min_righthand, relid);
368	sjinfo->syn_lefthand = bms_del_member(sjinfo->syn_lefthand, relid);
369	sjinfo->syn_righthand = bms_del_member(sjinfo->syn_righthand, relid);
370	}
371
372	/*
373	* Likewise remove references from PlaceHolderVar data structures,
374	* removing any no-longer-needed placeholders entirely.
375	*
376	* Removal is a bit tricker than it might seem: we can remove PHVs that
377	* are used at the target rel and/or in the join qual, but not those that
378	* are used at join partner rels or above the join. It's not that easy to
379	* distinguish PHVs used at partner rels from those used in the join qual,
380	* since they will both have ph_needed sets that are subsets of
381	* joinrelids. However, a PHV used at a partner rel could not have the
382	* target rel in ph_eval_at, so we check that while deciding whether to
383	* remove or just update the PHV. There is no corresponding test in
384	* join_is_removable because it doesn't need to distinguish those cases.
385	*/
386	for (l = list_head(root->placeholder_list); l != NULL; l = nextl)
387	{
388	PlaceHolderInfo phinfo = (PlaceHolderInfo ) lfirst(l);
389
390	nextl = lnext(l);
391	Assert(!bms_is_member(relid, phinfo->ph_lateral));
392	if (bms_is_subset(phinfo->ph_needed, joinrelids) &&
393	bms_is_member(relid, phinfo->ph_eval_at))
394	root->placeholder_list = list_delete_ptr(root->placeholder_list,
395	phinfo);
396	else
397	{
398	phinfo->ph_eval_at = bms_del_member(phinfo->ph_eval_at, relid);
399	Assert(!bms_is_empty(phinfo->ph_eval_at));
400	phinfo->ph_needed = bms_del_member(phinfo->ph_needed, relid);
401	}
402	}
403
404	/*
405	* Remove any joinquals referencing the rel from the joininfo lists.
406	*
407	* In some cases, a joinqual has to be put back after deleting its
408	* reference to the target rel. This can occur for pseudoconstant and
409	* outerjoin-delayed quals, which can get marked as requiring the rel in
410	* order to force them to be evaluated at or above the join. We can't
411	* just discard them, though. Only quals that logically belonged to the
412	* outer join being discarded should be removed from the query.
413	*
414	* We must make a copy of the rel's old joininfo list before starting the
415	* loop, because otherwise remove_join_clause_from_rels would destroy the
416	* list while we're scanning it.
417	*/
418	joininfos = list_copy(rel->joininfo);
419	foreach(l, joininfos)
420	{
421	RestrictInfo rinfo = (RestrictInfo ) lfirst(l);
422
423	remove_join_clause_from_rels(root, rinfo, rinfo->required_relids);
424
425	if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids))
426	{
427	/ Recheck that qual doesn't actually reference the target rel /
428	Assert(!bms_is_member(relid, rinfo->clause_relids));
429
430	/*
431	* The required_relids probably aren't shared with anything else,
432	* but let's copy them just to be sure.
433	*/
434	rinfo->required_relids = bms_copy(rinfo->required_relids);
435	rinfo->required_relids = bms_del_member(rinfo->required_relids,
436	relid);
437	distribute_restrictinfo_to_rels(root, rinfo);
438	}
439	}
440
441	/*
442	* There may be references to the rel in root->fkey_list, but if so,
443	* match_foreign_keys_to_quals() will get rid of them.
444	*/
445	}
446
447	/*
448	* Remove any occurrences of the target relid from a joinlist structure.
449	*
450	* It's easiest to build a whole new list structure, so we handle it that
451	* way. Efficiency is not a big deal here.
452	*
453	* *nremoved is incremented by the number of occurrences removed (there
454	* should be exactly one, but the caller checks that).
455	*/
456	static List *
457	remove_rel_from_joinlist(List joinlist, int* relid, int *nremoved)
458	{
459	List *result = NIL;
460	ListCell *jl;
461
462	foreach(jl, joinlist)
463	{
464	Node jlnode = (Node ) lfirst(jl);
465
466	if (IsA(jlnode, RangeTblRef))
467	{
468	int varno = ((RangeTblRef *) jlnode)->rtindex;
469
470	if (varno == relid)
471	(*nremoved)++;
472	else
473	result = lappend(result, jlnode);
474	}
475	else if (IsA(jlnode, List))
476	{
477	/ Recurse to handle subproblem /
478	List *sublist;
479
480	sublist = remove_rel_from_joinlist((List *) jlnode,
481	relid, nremoved);
482	/ Avoid including empty sub-lists in the result /
483	if (sublist)
484	result = lappend(result, sublist);
485	}
486	else
487	{
488	elog(ERROR, "unrecognized joinlist node type: %d",
489	(int) nodeTag(jlnode));
490	}
491	}
492
493	return result;
494	}
495
496
497	/*
498	* reduce_unique_semijoins
499	* Check for semijoins that can be simplified to plain inner joins
500	* because the inner relation is provably unique for the join clauses.
501	*
502	* Ideally this would happen during reduce_outer_joins, but we don't have
503	* enough information at that point.
504	*
505	* To perform the strength reduction when applicable, we need only delete
506	* the semijoin's SpecialJoinInfo from root->join_info_list. (We don't
507	* bother fixing the join type attributed to it in the query jointree,
508	* since that won't be consulted again.)
509	*/
510	void
511	reduce_unique_semijoins(PlannerInfo *root)
512	{
513	ListCell *lc;
514	ListCell *next;
515
516	/*
517	* Scan the join_info_list to find semijoins. We can't use foreach
518	* because we may delete the current cell.
519	*/
520	for (lc = list_head(root->join_info_list); lc != NULL; lc = next)
521	{
522	SpecialJoinInfo sjinfo = (SpecialJoinInfo ) lfirst(lc);
523	int innerrelid;
524	RelOptInfo *innerrel;
525	Relids joinrelids;
526	List *restrictlist;
527
528	next = lnext(lc);
529
530	/*
531	* Must be a non-delaying semijoin to a single baserel, else we aren't
532	* going to be able to do anything with it. (It's probably not
533	* possible for delay_upper_joins to be set on a semijoin, but we
534	* might as well check.)
535	*/
536	if (sjinfo->jointype != JOIN_SEMI \|\|
537	sjinfo->delay_upper_joins)
538	continue;
539
540	if (!bms_get_singleton_member(sjinfo->min_righthand, &innerrelid))
541	continue;
542
543	innerrel = find_base_rel(root, innerrelid);
544
545	/*
546	* Before we trouble to run generate_join_implied_equalities, make a
547	* quick check to eliminate cases in which we will surely be unable to
548	* prove uniqueness of the innerrel.
549	*/
550	if (!rel_supports_distinctness(root, innerrel))
551	continue;
552
553	/ Compute the relid set for the join we are considering /
554	joinrelids = bms_union(sjinfo->min_lefthand, sjinfo->min_righthand);
555
556	/*
557	* Since we're only considering a single-rel RHS, any join clauses it
558	* has must be clauses linking it to the semijoin's min_lefthand. We
559	* can also consider EC-derived join clauses.
560	*/
561	restrictlist =
562	list_concat(generate_join_implied_equalities(root,
563	joinrelids,
564	sjinfo->min_lefthand,
565	innerrel),
566	innerrel->joininfo);
567
568	/ Test whether the innerrel is unique for those clauses. /
569	if (!innerrel_is_unique(root,
570	joinrelids, sjinfo->min_lefthand, innerrel,
571	JOIN_SEMI, restrictlist, true))
572	continue;
573
574	/ OK, remove the SpecialJoinInfo from the list. /
575	root->join_info_list = list_delete_ptr(root->join_info_list, sjinfo);
576	}
577	}
578
579
580	/*
581	* rel_supports_distinctness
582	* Could the relation possibly be proven distinct on some set of columns?
583	*
584	* This is effectively a pre-checking function for rel_is_distinct_for().
585	* It must return true if rel_is_distinct_for() could possibly return true
586	* with this rel, but it should not expend a lot of cycles. The idea is
587	* that callers can avoid doing possibly-expensive processing to compute
588	* rel_is_distinct_for()'s argument lists if the call could not possibly
589	* succeed.
590	*/
591	static bool
592	rel_supports_distinctness(PlannerInfo root, RelOptInfo rel)
593	{
594	/ We only know about baserels ... /
595	if (rel->reloptkind != RELOPT_BASEREL)
596	return false;
597	if (rel->rtekind == RTE_RELATION)
598	{
599	/*
600	* For a plain relation, we only know how to prove uniqueness by
601	* reference to unique indexes. Make sure there's at least one
602	* suitable unique index. It must be immediately enforced, and if
603	* it's a partial index, it must match the query. (Keep these
604	* conditions in sync with relation_has_unique_index_for!)
605	*/
606	ListCell *lc;
607
608	foreach(lc, rel->indexlist)
609	{
610	IndexOptInfo ind = (IndexOptInfo ) lfirst(lc);
611
612	if (ind->unique && ind->immediate &&
613	(ind->indpred == NIL \|\| ind->predOK))
614	return true;
615	}
616	}
617	else if (rel->rtekind == RTE_SUBQUERY)
618	{
619	Query *subquery = root->simple_rte_array[rel->relid]->subquery;
620
621	/ Check if the subquery has any qualities that support distinctness /
622	if (query_supports_distinctness(subquery))
623	return true;
624	}
625	/ We have no proof rules for any other rtekinds. /
626	return false;
627	}
628
629	/*
630	* rel_is_distinct_for
631	* Does the relation return only distinct rows according to clause_list?
632	*
633	* clause_list is a list of join restriction clauses involving this rel and
634	* some other one. Return true if no two rows emitted by this rel could
635	* possibly join to the same row of the other rel.
636	*
637	* The caller must have already determined that each condition is a
638	* mergejoinable equality with an expression in this relation on one side, and
639	* an expression not involving this relation on the other. The transient
640	* outer_is_left flag is used to identify which side references this relation:
641	* left side if outer_is_left is false, right side if it is true.
642	*
643	* Note that the passed-in clause_list may be destructively modified! This
644	* is OK for current uses, because the clause_list is built by the caller for
645	* the sole purpose of passing to this function.
646	*/
647	static bool
648	rel_is_distinct_for(PlannerInfo root, RelOptInfo rel, List *clause_list)
649	{
650	/*
651	* We could skip a couple of tests here if we assume all callers checked
652	* rel_supports_distinctness first, but it doesn't seem worth taking any
653	* risk for.
654	*/
655	if (rel->reloptkind != RELOPT_BASEREL)
656	return false;
657	if (rel->rtekind == RTE_RELATION)
658	{
659	/*
660	* Examine the indexes to see if we have a matching unique index.
661	* relation_has_unique_index_for automatically adds any usable
662	* restriction clauses for the rel, so we needn't do that here.
663	*/
664	if (relation_has_unique_index_for(root, rel, clause_list, NIL, NIL))
665	return true;
666	}
667	else if (rel->rtekind == RTE_SUBQUERY)
668	{
669	Index relid = rel->relid;
670	Query *subquery = root->simple_rte_array[relid]->subquery;
671	List *colnos = NIL;
672	List *opids = NIL;
673	ListCell *l;
674
675	/*
676	* Build the argument lists for query_is_distinct_for: a list of
677	* output column numbers that the query needs to be distinct over, and
678	* a list of equality operators that the output columns need to be
679	* distinct according to.
680	*
681	* (XXX we are not considering restriction clauses attached to the
682	* subquery; is that worth doing?)
683	*/
684	foreach(l, clause_list)
685	{
686	RestrictInfo *rinfo = lfirst_node(RestrictInfo, l);
687	Oid op;
688	Var *var;
689
690	/*
691	* Get the equality operator we need uniqueness according to.
692	* (This might be a cross-type operator and thus not exactly the
693	* same operator the subquery would consider; that's all right
694	* since query_is_distinct_for can resolve such cases.) The
695	* caller's mergejoinability test should have selected only
696	* OpExprs.
697	*/
698	op = castNode(OpExpr, rinfo->clause)->opno;
699
700	/ caller identified the inner side for us /
701	if (rinfo->outer_is_left)
702	var = (Var *) get_rightop(rinfo->clause);
703	else
704	var = (Var *) get_leftop(rinfo->clause);
705
706	/*
707	* We may ignore any RelabelType node above the operand. (There
708	* won't be more than one, since eval_const_expressions() has been
709	* applied already.)
710	*/
711	if (var && IsA(var, RelabelType))
712	var = (Var ) ((RelabelType ) var)->arg;
713
714	/*
715	* If inner side isn't a Var referencing a subquery output column,
716	* this clause doesn't help us.
717	*/
718	if (!var \|\| !IsA(var, Var) \|\|
719	var->varno != relid \|\| var->varlevelsup != `0`)
720	continue;
721
722	colnos = lappend_int(colnos, var->varattno);
723	opids = lappend_oid(opids, op);
724	}
725
726	if (query_is_distinct_for(subquery, colnos, opids))
727	return true;
728	}
729	return false;
730	}
731
732
733	/*
734	* query_supports_distinctness - could the query possibly be proven distinct
735	* on some set of output columns?
736	*
737	* This is effectively a pre-checking function for query_is_distinct_for().
738	* It must return true if query_is_distinct_for() could possibly return true
739	* with this query, but it should not expend a lot of cycles. The idea is
740	* that callers can avoid doing possibly-expensive processing to compute
741	* query_is_distinct_for()'s argument lists if the call could not possibly
742	* succeed.
743	*/
744	bool
745	query_supports_distinctness(Query *query)
746	{
747	/ SRFs break distinctness except with DISTINCT, see below /
748	if (query->hasTargetSRFs && query->distinctClause == NIL)
749	return false;
750
751	/ check for features we can prove distinctness with /
752	if (query->distinctClause != NIL \|\|
753	query->groupClause != NIL \|\|
754	query->groupingSets != NIL \|\|
755	query->hasAggs \|\|
756	query->havingQual \|\|
757	query->setOperations)
758	return true;
759
760	return false;
761	}
762
763	/*
764	* query_is_distinct_for - does query never return duplicates of the
765	* specified columns?
766	*
767	* query is a not-yet-planned subquery (in current usage, it's always from
768	* a subquery RTE, which the planner avoids scribbling on).
769	*
770	* colnos is an integer list of output column numbers (resno's). We are
771	* interested in whether rows consisting of just these columns are certain
772	* to be distinct. "Distinctness" is defined according to whether the
773	* corresponding upper-level equality operators listed in opids would think
774	* the values are distinct. (Note: the opids entries could be cross-type
775	* operators, and thus not exactly the equality operators that the subquery
776	* would use itself. We use equality_ops_are_compatible() to check
777	* compatibility. That looks at btree or hash opfamily membership, and so
778	* should give trustworthy answers for all operators that we might need
779	* to deal with here.)
780	*/
781	bool
782	query_is_distinct_for(Query query, List colnos, List *opids)
783	{
784	ListCell *l;
785	Oid opid;
786
787	Assert(list_length(colnos) == list_length(opids));
788
789	/*
790	* DISTINCT (including DISTINCT ON) guarantees uniqueness if all the
791	* columns in the DISTINCT clause appear in colnos and operator semantics
792	* match. This is true even if there are SRFs in the DISTINCT columns or
793	* elsewhere in the tlist.
794	*/
795	if (query->distinctClause)
796	{
797	foreach(l, query->distinctClause)
798	{
799	SortGroupClause sgc = (SortGroupClause ) lfirst(l);
800	TargetEntry *tle = get_sortgroupclause_tle(sgc,
801	query->targetList);
802
803	opid = distinct_col_search(tle->resno, colnos, opids);
804	if (!OidIsValid(opid) \|\|
805	!equality_ops_are_compatible(opid, sgc->eqop))
806	break; / exit early if no match /
807	}
808	if (l == NULL) / had matches for all? /
809	return true;
810	}
811
812	/*
813	* Otherwise, a set-returning function in the query's targetlist can
814	* result in returning duplicate rows, despite any grouping that might
815	* occur before tlist evaluation. (If all tlist SRFs are within GROUP BY
816	* columns, it would be safe because they'd be expanded before grouping.
817	* But it doesn't currently seem worth the effort to check for that.)
818	*/
819	if (query->hasTargetSRFs)
820	return false;
821
822	/*
823	* Similarly, GROUP BY without GROUPING SETS guarantees uniqueness if all
824	* the grouped columns appear in colnos and operator semantics match.
825	*/
826	if (query->groupClause && !query->groupingSets)
827	{
828	foreach(l, query->groupClause)
829	{
830	SortGroupClause sgc = (SortGroupClause ) lfirst(l);
831	TargetEntry *tle = get_sortgroupclause_tle(sgc,
832	query->targetList);
833
834	opid = distinct_col_search(tle->resno, colnos, opids);
835	if (!OidIsValid(opid) \|\|
836	!equality_ops_are_compatible(opid, sgc->eqop))
837	break; / exit early if no match /
838	}
839	if (l == NULL) / had matches for all? /
840	return true;
841	}
842	else if (query->groupingSets)
843	{
844	/*
845	* If we have grouping sets with expressions, we probably don't have
846	* uniqueness and analysis would be hard. Punt.
847	*/
848	if (query->groupClause)
849	return false;
850
851	/*
852	* If we have no groupClause (therefore no grouping expressions), we
853	* might have one or many empty grouping sets. If there's just one,
854	* then we're returning only one row and are certainly unique. But
855	* otherwise, we know we're certainly not unique.
856	*/
857	if (list_length(query->groupingSets) == `1` &&
858	((GroupingSet *) linitial(query->groupingSets))->kind == GROUPING_SET_EMPTY)
859	return true;
860	else
861	return false;
862	}
863	else
864	{
865	/*
866	* If we have no GROUP BY, but do have aggregates or HAVING, then the
867	* result is at most one row so it's surely unique, for any operators.
868	*/
869	if (query->hasAggs \|\| query->havingQual)
870	return true;
871	}
872
873	/*
874	* UNION, INTERSECT, EXCEPT guarantee uniqueness of the whole output row,
875	* except with ALL.
876	*/
877	if (query->setOperations)
878	{
879	SetOperationStmt *topop = castNode(SetOperationStmt, query->setOperations);
880
881	Assert(topop->op != SETOP_NONE);
882
883	if (!topop->all)
884	{
885	ListCell *lg;
886
887	/ We're good if all the nonjunk output columns are in colnos /
888	lg = list_head(topop->groupClauses);
889	foreach(l, query->targetList)
890	{
891	TargetEntry tle = (TargetEntry ) lfirst(l);
892	SortGroupClause *sgc;
893
894	if (tle->resjunk)
895	continue; / ignore resjunk columns /
896
897	/ non-resjunk columns should have grouping clauses /
898	Assert(lg != NULL);
899	sgc = (SortGroupClause *) lfirst(lg);
900	lg = lnext(lg);
901
902	opid = distinct_col_search(tle->resno, colnos, opids);
903	if (!OidIsValid(opid) \|\|
904	!equality_ops_are_compatible(opid, sgc->eqop))
905	break; / exit early if no match /
906	}
907	if (l == NULL) / had matches for all? /
908	return true;
909	}
910	}
911
912	/*
913	* XXX Are there any other cases in which we can easily see the result
914	* must be distinct?
915	*
916	* If you do add more smarts to this function, be sure to update
917	* query_supports_distinctness() to match.
918	*/
919
920	return false;
921	}
922
923	/*
924	* distinct_col_search - subroutine for query_is_distinct_for
925	*
926	* If colno is in colnos, return the corresponding element of opids,
927	* else return InvalidOid. (Ordinarily colnos would not contain duplicates,
928	* but if it does, we arbitrarily select the first match.)
929	*/
930	static Oid
931	distinct_col_search(int colno, List colnos, List opids)
932	{
933	ListCell *lc1,
934	*lc2;
935
936	forboth(lc1, colnos, lc2, opids)
937	{
938	if (colno == lfirst_int(lc1))
939	return lfirst_oid(lc2);
940	}
941	return InvalidOid;
942	}
943
944
945	/*
946	* innerrel_is_unique
947	* Check if the innerrel provably contains at most one tuple matching any
948	* tuple from the outerrel, based on join clauses in the 'restrictlist'.
949	*
950	* We need an actual RelOptInfo for the innerrel, but it's sufficient to
951	* identify the outerrel by its Relids. This asymmetry supports use of this
952	* function before joinrels have been built. (The caller is expected to
953	* also supply the joinrelids, just to save recalculating that.)
954	*
955	* The proof must be made based only on clauses that will be "joinquals"
956	* rather than "otherquals" at execution. For an inner join there's no
957	* difference; but if the join is outer, we must ignore pushed-down quals,
958	* as those will become "otherquals". Note that this means the answer might
959	* vary depending on whether IS_OUTER_JOIN(jointype); since we cache the
960	* answer without regard to that, callers must take care not to call this
961	* with jointypes that would be classified differently by IS_OUTER_JOIN().
962	*
963	* The actual proof is undertaken by is_innerrel_unique_for(); this function
964	* is a frontend that is mainly concerned with caching the answers.
965	* In particular, the force_cache argument allows overriding the internal
966	* heuristic about whether to cache negative answers; it should be "true"
967	* if making an inquiry that is not part of the normal bottom-up join search
968	* sequence.
969	*/
970	bool
971	innerrel_is_unique(PlannerInfo *root,
972	Relids joinrelids,
973	Relids outerrelids,
974	RelOptInfo *innerrel,
975	JoinType jointype,
976	List *restrictlist,
977	bool force_cache)
978	{
979	MemoryContext old_context;
980	ListCell *lc;
981
982	/ Certainly can't prove uniqueness when there are no joinclauses /
983	if (restrictlist == NIL)
984	return false;
985
986	/*
987	* Make a quick check to eliminate cases in which we will surely be unable
988	* to prove uniqueness of the innerrel.
989	*/
990	if (!rel_supports_distinctness(root, innerrel))
991	return false;
992
993	/*
994	* Query the cache to see if we've managed to prove that innerrel is
995	* unique for any subset of this outerrel. We don't need an exact match,
996	* as extra outerrels can't make the innerrel any less unique (or more
997	* formally, the restrictlist for a join to a superset outerrel must be a
998	* superset of the conditions we successfully used before).
999	*/
1000	foreach(lc, innerrel->unique_for_rels)
1001	{
1002	Relids unique_for_rels = (Relids) lfirst(lc);
1003
1004	if (bms_is_subset(unique_for_rels, outerrelids))
1005	return true; / Success! /
1006	}
1007
1008	/*
1009	* Conversely, we may have already determined that this outerrel, or some
1010	* superset thereof, cannot prove this innerrel to be unique.
1011	*/
1012	foreach(lc, innerrel->non_unique_for_rels)
1013	{
1014	Relids unique_for_rels = (Relids) lfirst(lc);
1015
1016	if (bms_is_subset(outerrelids, unique_for_rels))
1017	return false;
1018	}
1019
1020	/ No cached information, so try to make the proof. /
1021	if (is_innerrel_unique_for(root, joinrelids, outerrelids, innerrel,
1022	jointype, restrictlist))
1023	{
1024	/*
1025	* Cache the positive result for future probes, being sure to keep it
1026	* in the planner_cxt even if we are working in GEQO.
1027	*
1028	* Note: one might consider trying to isolate the minimal subset of
1029	* the outerrels that proved the innerrel unique. But it's not worth
1030	* the trouble, because the planner builds up joinrels incrementally
1031	* and so we'll see the minimally sufficient outerrels before any
1032	* supersets of them anyway.
1033	*/
1034	old_context = MemoryContextSwitchTo(root->planner_cxt);
1035	innerrel->unique_for_rels = lappend(innerrel->unique_for_rels,
1036	bms_copy(outerrelids));
1037	MemoryContextSwitchTo(old_context);
1038
1039	return true; / Success! /
1040	}
1041	else
1042	{
1043	/*
1044	* None of the join conditions for outerrel proved innerrel unique, so
1045	* we can safely reject this outerrel or any subset of it in future
1046	* checks.
1047	*
1048	* However, in normal planning mode, caching this knowledge is totally
1049	* pointless; it won't be queried again, because we build up joinrels
1050	* from smaller to larger. It is useful in GEQO mode, where the
1051	* knowledge can be carried across successive planning attempts; and
1052	* it's likely to be useful when using join-search plugins, too. Hence
1053	* cache when join_search_private is non-NULL. (Yeah, that's a hack,
1054	* but it seems reasonable.)
1055	*
1056	* Also, allow callers to override that heuristic and force caching;
1057	* that's useful for reduce_unique_semijoins, which calls here before
1058	* the normal join search starts.
1059	*/
1060	if (force_cache \|\| root->join_search_private)
1061	{
1062	old_context = MemoryContextSwitchTo(root->planner_cxt);
1063	innerrel->non_unique_for_rels =
1064	lappend(innerrel->non_unique_for_rels,
1065	bms_copy(outerrelids));
1066	MemoryContextSwitchTo(old_context);
1067	}
1068
1069	return false;
1070	}
1071	}
1072
1073	/*
1074	* is_innerrel_unique_for
1075	* Check if the innerrel provably contains at most one tuple matching any
1076	* tuple from the outerrel, based on join clauses in the 'restrictlist'.
1077	*/
1078	static bool
1079	is_innerrel_unique_for(PlannerInfo *root,
1080	Relids joinrelids,
1081	Relids outerrelids,
1082	RelOptInfo *innerrel,
1083	JoinType jointype,
1084	List *restrictlist)
1085	{
1086	List *clause_list = NIL;
1087	ListCell *lc;
1088
1089	/*
1090	* Search for mergejoinable clauses that constrain the inner rel against
1091	* the outer rel. If an operator is mergejoinable then it behaves like
1092	* equality for some btree opclass, so it's what we want. The
1093	* mergejoinability test also eliminates clauses containing volatile
1094	* functions, which we couldn't depend on.
1095	*/
1096	foreach(lc, restrictlist)
1097	{
1098	RestrictInfo restrictinfo = (RestrictInfo ) lfirst(lc);
1099
1100	/*
1101	* As noted above, if it's a pushed-down clause and we're at an outer
1102	* join, we can't use it.
1103	*/
1104	if (IS_OUTER_JOIN(jointype) &&
1105	RINFO_IS_PUSHED_DOWN(restrictinfo, joinrelids))
1106	continue;
1107
1108	/ Ignore if it's not a mergejoinable clause /
1109	if (!restrictinfo->can_join \|\|
1110	restrictinfo->mergeopfamilies == NIL)
1111	continue; / not mergejoinable /
1112
1113	/*
1114	* Check if clause has the form "outer op inner" or "inner op outer",
1115	* and if so mark which side is inner.
1116	*/
1117	if (!clause_sides_match_join(restrictinfo, outerrelids,
1118	innerrel->relids))
1119	continue; / no good for these input relations /
1120
1121	/ OK, add to list /
1122	clause_list = lappend(clause_list, restrictinfo);
1123	}
1124
1125	/ Let rel_is_distinct_for() do the hard work /
1126	return rel_is_distinct_for(root, innerrel, clause_list);
1127	}
1128

Browse the source code of PostgreSQL/src/backend/optimizer/plan/analyzejoins.c