joinrels.c source code [PostgreSQL/src/backend/optimizer/path/joinrels.c]

1	/-------------------------------------------------------------------------*
2	*
3	* joinrels.c
4	* Routines to determine which relations should be joined
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	*
10	* IDENTIFICATION
11	* src/backend/optimizer/path/joinrels.c
12	*
13	*-------------------------------------------------------------------------
14	*/
15	#include "postgres.h"
16
17	#include "miscadmin.h"
18	#include "optimizer/appendinfo.h"
19	#include "optimizer/joininfo.h"
20	#include "optimizer/pathnode.h"
21	#include "optimizer/paths.h"
22	#include "partitioning/partbounds.h"
23	#include "utils/lsyscache.h"
24	#include "utils/memutils.h"
25
26
27	static void make_rels_by_clause_joins(PlannerInfo *root,
28	RelOptInfo *old_rel,
29	ListCell *other_rels);
30	static void make_rels_by_clauseless_joins(PlannerInfo *root,
31	RelOptInfo *old_rel,
32	ListCell *other_rels);
33	static bool has_join_restriction(PlannerInfo root, RelOptInfo rel);
34	static bool has_legal_joinclause(PlannerInfo root, RelOptInfo rel);
35	static bool restriction_is_constant_false(List *restrictlist,
36	RelOptInfo *joinrel,
37	bool only_pushed_down);
38	static void populate_joinrel_with_paths(PlannerInfo root, RelOptInfo rel1,
39	RelOptInfo rel2, RelOptInfo joinrel,
40	SpecialJoinInfo sjinfo, List restrictlist);
41	static void try_partitionwise_join(PlannerInfo root, RelOptInfo rel1,
42	RelOptInfo rel2, RelOptInfo joinrel,
43	SpecialJoinInfo *parent_sjinfo,
44	List *parent_restrictlist);
45	static SpecialJoinInfo build_child_join_sjinfo(PlannerInfo root,
46	SpecialJoinInfo *parent_sjinfo,
47	Relids left_relids, Relids right_relids);
48	static int match_expr_to_partition_keys(Expr expr, RelOptInfo rel,
49	bool strict_op);
50
51
52	/*
53	* join_search_one_level
54	* Consider ways to produce join relations containing exactly 'level'
55	* jointree items. (This is one step of the dynamic-programming method
56	* embodied in standard_join_search.) Join rel nodes for each feasible
57	* combination of lower-level rels are created and returned in a list.
58	* Implementation paths are created for each such joinrel, too.
59	*
60	* level: level of rels we want to make this time
61	* root->join_rel_level[j], 1 <= j < level, is a list of rels containing j items
62	*
63	* The result is returned in root->join_rel_level[level].
64	*/
65	void
66	join_search_one_level(PlannerInfo root, int* level)
67	{
68	List **joinrels = root->join_rel_level;
69	ListCell *r;
70	int k;
71
72	Assert(joinrels[level] == NIL);
73
74	/ Set join_cur_level so that new joinrels are added to proper list /
75	root->join_cur_level = level;
76
77	/*
78	* First, consider left-sided and right-sided plans, in which rels of
79	* exactly level-1 member relations are joined against initial relations.
80	* We prefer to join using join clauses, but if we find a rel of level-1
81	* members that has no join clauses, we will generate Cartesian-product
82	* joins against all initial rels not already contained in it.
83	*/
84	foreach(r, joinrels[level - `1`])
85	{
86	RelOptInfo old_rel = (RelOptInfo ) lfirst(r);
87
88	if (old_rel->joininfo != NIL \|\| old_rel->has_eclass_joins \|\|
89	has_join_restriction(root, old_rel))
90	{
91	/*
92	* There are join clauses or join order restrictions relevant to
93	* this rel, so consider joins between this rel and (only) those
94	* initial rels it is linked to by a clause or restriction.
95	*
96	* At level 2 this condition is symmetric, so there is no need to
97	* look at initial rels before this one in the list; we already
98	* considered such joins when we were at the earlier rel. (The
99	* mirror-image joins are handled automatically by make_join_rel.)
100	* In later passes (level > 2), we join rels of the previous level
101	* to each initial rel they don't already include but have a join
102	* clause or restriction with.
103	*/
104	ListCell *other_rels;
105
106	if (level == `2`) / consider remaining initial rels /
107	other_rels = lnext(r);
108	else / consider all initial rels /
109	other_rels = list_head(joinrels[`1`]);
110
111	make_rels_by_clause_joins(root,
112	old_rel,
113	other_rels);
114	}
115	else
116	{
117	/*
118	* Oops, we have a relation that is not joined to any other
119	* relation, either directly or by join-order restrictions.
120	* Cartesian product time.
121	*
122	* We consider a cartesian product with each not-already-included
123	* initial rel, whether it has other join clauses or not. At
124	* level 2, if there are two or more clauseless initial rels, we
125	* will redundantly consider joining them in both directions; but
126	* such cases aren't common enough to justify adding complexity to
127	* avoid the duplicated effort.
128	*/
129	make_rels_by_clauseless_joins(root,
130	old_rel,
131	list_head(joinrels[`1`]));
132	}
133	}
134
135	/*
136	* Now, consider "bushy plans" in which relations of k initial rels are
137	* joined to relations of level-k initial rels, for 2 <= k <= level-2.
138	*
139	* We only consider bushy-plan joins for pairs of rels where there is a
140	* suitable join clause (or join order restriction), in order to avoid
141	* unreasonable growth of planning time.
142	*/
143	for (k = `2`;; k++)
144	{
145	int other_level = level - k;
146
147	/*
148	* Since make_join_rel(x, y) handles both x,y and y,x cases, we only
149	* need to go as far as the halfway point.
150	*/
151	if (k > other_level)
152	break;
153
154	foreach(r, joinrels[k])
155	{
156	RelOptInfo old_rel = (RelOptInfo ) lfirst(r);
157	ListCell *other_rels;
158	ListCell *r2;
159
160	/*
161	* We can ignore relations without join clauses here, unless they
162	* participate in join-order restrictions --- then we might have
163	* to force a bushy join plan.
164	*/
165	if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins &&
166	!has_join_restriction(root, old_rel))
167	continue;
168
169	if (k == other_level)
170	other_rels = lnext(r); / only consider remaining rels /
171	else
172	other_rels = list_head(joinrels[other_level]);
173
174	for_each_cell(r2, other_rels)
175	{
176	RelOptInfo new_rel = (RelOptInfo ) lfirst(r2);
177
178	if (!bms_overlap(old_rel->relids, new_rel->relids))
179	{
180	/*
181	* OK, we can build a rel of the right level from this
182	* pair of rels. Do so if there is at least one relevant
183	* join clause or join order restriction.
184	*/
185	if (have_relevant_joinclause(root, old_rel, new_rel) \|\|
186	have_join_order_restriction(root, old_rel, new_rel))
187	{
188	(void) make_join_rel(root, old_rel, new_rel);
189	}
190	}
191	}
192	}
193	}
194
195	/----------*
196	* Last-ditch effort: if we failed to find any usable joins so far, force
197	* a set of cartesian-product joins to be generated. This handles the
198	* special case where all the available rels have join clauses but we
199	* cannot use any of those clauses yet. This can only happen when we are
200	* considering a join sub-problem (a sub-joinlist) and all the rels in the
201	* sub-problem have only join clauses with rels outside the sub-problem.
202	* An example is
203	*
204	* SELECT ... FROM a INNER JOIN b ON TRUE, c, d, ...
205	* WHERE a.w = c.x and b.y = d.z;
206	*
207	* If the "a INNER JOIN b" sub-problem does not get flattened into the
208	* upper level, we must be willing to make a cartesian join of a and b;
209	* but the code above will not have done so, because it thought that both
210	* a and b have joinclauses. We consider only left-sided and right-sided
211	* cartesian joins in this case (no bushy).
212	*----------
213	*/
214	if (joinrels[level] == NIL)
215	{
216	/*
217	* This loop is just like the first one, except we always call
218	* make_rels_by_clauseless_joins().
219	*/
220	foreach(r, joinrels[level - `1`])
221	{
222	RelOptInfo old_rel = (RelOptInfo ) lfirst(r);
223
224	make_rels_by_clauseless_joins(root,
225	old_rel,
226	list_head(joinrels[`1`]));
227	}
228
229	/----------*
230	* When special joins are involved, there may be no legal way
231	* to make an N-way join for some values of N. For example consider
232	*
233	* SELECT ... FROM t1 WHERE
234	* x IN (SELECT ... FROM t2,t3 WHERE ...) AND
235	* y IN (SELECT ... FROM t4,t5 WHERE ...)
236	*
237	* We will flatten this query to a 5-way join problem, but there are
238	* no 4-way joins that join_is_legal() will consider legal. We have
239	* to accept failure at level 4 and go on to discover a workable
240	* bushy plan at level 5.
241	*
242	* However, if there are no special joins and no lateral references
243	* then join_is_legal() should never fail, and so the following sanity
244	* check is useful.
245	*----------
246	*/
247	if (joinrels[level] == NIL &&
248	root->join_info_list == NIL &&
249	!root->hasLateralRTEs)
250	elog(ERROR, "failed to build any %d-way joins", level);
251	}
252	}
253
254	/*
255	* make_rels_by_clause_joins
256	* Build joins between the given relation 'old_rel' and other relations
257	* that participate in join clauses that 'old_rel' also participates in
258	* (or participate in join-order restrictions with it).
259	* The join rels are returned in root->join_rel_level[join_cur_level].
260	*
261	* Note: at levels above 2 we will generate the same joined relation in
262	* multiple ways --- for example (a join b) join c is the same RelOptInfo as
263	* (b join c) join a, though the second case will add a different set of Paths
264	* to it. This is the reason for using the join_rel_level mechanism, which
265	* automatically ensures that each new joinrel is only added to the list once.
266	*
267	* 'old_rel' is the relation entry for the relation to be joined
268	* 'other_rels': the first cell in a linked list containing the other
269	* rels to be considered for joining
270	*
271	* Currently, this is only used with initial rels in other_rels, but it
272	* will work for joining to joinrels too.
273	*/
274	static void
275	make_rels_by_clause_joins(PlannerInfo *root,
276	RelOptInfo *old_rel,
277	ListCell *other_rels)
278	{
279	ListCell *l;
280
281	for_each_cell(l, other_rels)
282	{
283	RelOptInfo other_rel = (RelOptInfo ) lfirst(l);
284
285	if (!bms_overlap(old_rel->relids, other_rel->relids) &&
286	(have_relevant_joinclause(root, old_rel, other_rel) \|\|
287	have_join_order_restriction(root, old_rel, other_rel)))
288	{
289	(void) make_join_rel(root, old_rel, other_rel);
290	}
291	}
292	}
293
294	/*
295	* make_rels_by_clauseless_joins
296	* Given a relation 'old_rel' and a list of other relations
297	* 'other_rels', create a join relation between 'old_rel' and each
298	* member of 'other_rels' that isn't already included in 'old_rel'.
299	* The join rels are returned in root->join_rel_level[join_cur_level].
300	*
301	* 'old_rel' is the relation entry for the relation to be joined
302	* 'other_rels': the first cell of a linked list containing the
303	* other rels to be considered for joining
304	*
305	* Currently, this is only used with initial rels in other_rels, but it would
306	* work for joining to joinrels too.
307	*/
308	static void
309	make_rels_by_clauseless_joins(PlannerInfo *root,
310	RelOptInfo *old_rel,
311	ListCell *other_rels)
312	{
313	ListCell *l;
314
315	for_each_cell(l, other_rels)
316	{
317	RelOptInfo other_rel = (RelOptInfo ) lfirst(l);
318
319	if (!bms_overlap(other_rel->relids, old_rel->relids))
320	{
321	(void) make_join_rel(root, old_rel, other_rel);
322	}
323	}
324	}
325
326
327	/*
328	* join_is_legal
329	* Determine whether a proposed join is legal given the query's
330	* join order constraints; and if it is, determine the join type.
331	*
332	* Caller must supply not only the two rels, but the union of their relids.
333	* (We could simplify the API by computing joinrelids locally, but this
334	* would be redundant work in the normal path through make_join_rel.)
335	*
336	* On success, *sjinfo_p is set to NULL if this is to be a plain inner join,
337	* else it's set to point to the associated SpecialJoinInfo node. Also,
338	* *reversed_p is set true if the given relations need to be swapped to
339	* match the SpecialJoinInfo node.
340	*/
341	static bool
342	join_is_legal(PlannerInfo root, RelOptInfo rel1, RelOptInfo *rel2,
343	Relids joinrelids,
344	SpecialJoinInfo *sjinfo_p, bool reversed_p)
345	{
346	SpecialJoinInfo *match_sjinfo;
347	bool reversed;
348	bool unique_ified;
349	bool must_be_leftjoin;
350	ListCell *l;
351
352	/*
353	* Ensure output params are set on failure return. This is just to
354	* suppress uninitialized-variable warnings from overly anal compilers.
355	*/
356	*sjinfo_p = NULL;
357	*reversed_p = false;
358
359	/*
360	* If we have any special joins, the proposed join might be illegal; and
361	* in any case we have to determine its join type. Scan the join info
362	* list for matches and conflicts.
363	*/
364	match_sjinfo = NULL;
365	reversed = false;
366	unique_ified = false;
367	must_be_leftjoin = false;
368
369	foreach(l, root->join_info_list)
370	{
371	SpecialJoinInfo sjinfo = (SpecialJoinInfo ) lfirst(l);
372
373	/*
374	* This special join is not relevant unless its RHS overlaps the
375	* proposed join. (Check this first as a fast path for dismissing
376	* most irrelevant SJs quickly.)
377	*/
378	if (!bms_overlap(sjinfo->min_righthand, joinrelids))
379	continue;
380
381	/*
382	* Also, not relevant if proposed join is fully contained within RHS
383	* (ie, we're still building up the RHS).
384	*/
385	if (bms_is_subset(joinrelids, sjinfo->min_righthand))
386	continue;
387
388	/*
389	* Also, not relevant if SJ is already done within either input.
390	*/
391	if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
392	bms_is_subset(sjinfo->min_righthand, rel1->relids))
393	continue;
394	if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
395	bms_is_subset(sjinfo->min_righthand, rel2->relids))
396	continue;
397
398	/*
399	* If it's a semijoin and we already joined the RHS to any other rels
400	* within either input, then we must have unique-ified the RHS at that
401	* point (see below). Therefore the semijoin is no longer relevant in
402	* this join path.
403	*/
404	if (sjinfo->jointype == JOIN_SEMI)
405	{
406	if (bms_is_subset(sjinfo->syn_righthand, rel1->relids) &&
407	!bms_equal(sjinfo->syn_righthand, rel1->relids))
408	continue;
409	if (bms_is_subset(sjinfo->syn_righthand, rel2->relids) &&
410	!bms_equal(sjinfo->syn_righthand, rel2->relids))
411	continue;
412	}
413
414	/*
415	* If one input contains min_lefthand and the other contains
416	* min_righthand, then we can perform the SJ at this join.
417	*
418	* Reject if we get matches to more than one SJ; that implies we're
419	* considering something that's not really valid.
420	*/
421	if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
422	bms_is_subset(sjinfo->min_righthand, rel2->relids))
423	{
424	if (match_sjinfo)
425	return false; / invalid join path /
426	match_sjinfo = sjinfo;
427	reversed = false;
428	}
429	else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
430	bms_is_subset(sjinfo->min_righthand, rel1->relids))
431	{
432	if (match_sjinfo)
433	return false; / invalid join path /
434	match_sjinfo = sjinfo;
435	reversed = true;
436	}
437	else if (sjinfo->jointype == JOIN_SEMI &&
438	bms_equal(sjinfo->syn_righthand, rel2->relids) &&
439	create_unique_path(root, rel2, rel2->cheapest_total_path,
440	sjinfo) != NULL)
441	{
442	/----------*
443	* For a semijoin, we can join the RHS to anything else by
444	* unique-ifying the RHS (if the RHS can be unique-ified).
445	* We will only get here if we have the full RHS but less
446	* than min_lefthand on the LHS.
447	*
448	* The reason to consider such a join path is exemplified by
449	* SELECT ... FROM a,b WHERE (a.x,b.y) IN (SELECT c1,c2 FROM c)
450	* If we insist on doing this as a semijoin we will first have
451	* to form the cartesian product of A*B. But if we unique-ify
452	* C then the semijoin becomes a plain innerjoin and we can join
453	* in any order, eg C to A and then to B. When C is much smaller
454	* than A and B this can be a huge win. So we allow C to be
455	* joined to just A or just B here, and then make_join_rel has
456	* to handle the case properly.
457	*
458	* Note that actually we'll allow unique-ified C to be joined to
459	* some other relation D here, too. That is legal, if usually not
460	* very sane, and this routine is only concerned with legality not
461	* with whether the join is good strategy.
462	*----------
463	*/
464	if (match_sjinfo)
465	return false; / invalid join path /
466	match_sjinfo = sjinfo;
467	reversed = false;
468	unique_ified = true;
469	}
470	else if (sjinfo->jointype == JOIN_SEMI &&
471	bms_equal(sjinfo->syn_righthand, rel1->relids) &&
472	create_unique_path(root, rel1, rel1->cheapest_total_path,
473	sjinfo) != NULL)
474	{
475	/ Reversed semijoin case /
476	if (match_sjinfo)
477	return false; / invalid join path /
478	match_sjinfo = sjinfo;
479	reversed = true;
480	unique_ified = true;
481	}
482	else
483	{
484	/*
485	* Otherwise, the proposed join overlaps the RHS but isn't a valid
486	* implementation of this SJ. But don't panic quite yet: the RHS
487	* violation might have occurred previously, in one or both input
488	* relations, in which case we must have previously decided that
489	* it was OK to commute some other SJ with this one. If we need
490	* to perform this join to finish building up the RHS, rejecting
491	* it could lead to not finding any plan at all. (This can occur
492	* because of the heuristics elsewhere in this file that postpone
493	* clauseless joins: we might not consider doing a clauseless join
494	* within the RHS until after we've performed other, validly
495	* commutable SJs with one or both sides of the clauseless join.)
496	* This consideration boils down to the rule that if both inputs
497	* overlap the RHS, we can allow the join --- they are either
498	* fully within the RHS, or represent previously-allowed joins to
499	* rels outside it.
500	*/
501	if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
502	bms_overlap(rel2->relids, sjinfo->min_righthand))
503	continue; / assume valid previous violation of RHS /
504
505	/*
506	* The proposed join could still be legal, but only if we're
507	* allowed to associate it into the RHS of this SJ. That means
508	* this SJ must be a LEFT join (not SEMI or ANTI, and certainly
509	* not FULL) and the proposed join must not overlap the LHS.
510	*/
511	if (sjinfo->jointype != JOIN_LEFT \|\|
512	bms_overlap(joinrelids, sjinfo->min_lefthand))
513	return false; / invalid join path /
514
515	/*
516	* To be valid, the proposed join must be a LEFT join; otherwise
517	* it can't associate into this SJ's RHS. But we may not yet have
518	* found the SpecialJoinInfo matching the proposed join, so we
519	* can't test that yet. Remember the requirement for later.
520	*/
521	must_be_leftjoin = true;
522	}
523	}
524
525	/*
526	* Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the
527	* proposed join can't associate into an SJ's RHS.
528	*
529	* Also, fail if the proposed join's predicate isn't strict; we're
530	* essentially checking to see if we can apply outer-join identity 3, and
531	* that's a requirement. (This check may be redundant with checks in
532	* make_outerjoininfo, but I'm not quite sure, and it's cheap to test.)
533	*/
534	if (must_be_leftjoin &&
535	(match_sjinfo == NULL \|\|
536	match_sjinfo->jointype != JOIN_LEFT \|\|
537	!match_sjinfo->lhs_strict))
538	return false; / invalid join path /
539
540	/*
541	* We also have to check for constraints imposed by LATERAL references.
542	*/
543	if (root->hasLateralRTEs)
544	{
545	bool lateral_fwd;
546	bool lateral_rev;
547	Relids join_lateral_rels;
548
549	/*
550	* The proposed rels could each contain lateral references to the
551	* other, in which case the join is impossible. If there are lateral
552	* references in just one direction, then the join has to be done with
553	* a nestloop with the lateral referencer on the inside. If the join
554	* matches an SJ that cannot be implemented by such a nestloop, the
555	* join is impossible.
556	*
557	* Also, if the lateral reference is only indirect, we should reject
558	* the join; whatever rel(s) the reference chain goes through must be
559	* joined to first.
560	*
561	* Another case that might keep us from building a valid plan is the
562	* implementation restriction described by have_dangerous_phv().
563	*/
564	lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
565	lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
566	if (lateral_fwd && lateral_rev)
567	return false; / have lateral refs in both directions /
568	if (lateral_fwd)
569	{
570	/ has to be implemented as nestloop with rel1 on left /
571	if (match_sjinfo &&
572	(reversed \|\|
573	unique_ified \|\|
574	match_sjinfo->jointype == JOIN_FULL))
575	return false; / not implementable as nestloop /
576	/ check there is a direct reference from rel2 to rel1 /
577	if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids))
578	return false; / only indirect refs, so reject /
579	/ check we won't have a dangerous PHV /
580	if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
581	return false; / might be unable to handle required PHV /
582	}
583	else if (lateral_rev)
584	{
585	/ has to be implemented as nestloop with rel2 on left /
586	if (match_sjinfo &&
587	(!reversed \|\|
588	unique_ified \|\|
589	match_sjinfo->jointype == JOIN_FULL))
590	return false; / not implementable as nestloop /
591	/ check there is a direct reference from rel1 to rel2 /
592	if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids))
593	return false; / only indirect refs, so reject /
594	/ check we won't have a dangerous PHV /
595	if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
596	return false; / might be unable to handle required PHV /
597	}
598
599	/*
600	* LATERAL references could also cause problems later on if we accept
601	* this join: if the join's minimum parameterization includes any rels
602	* that would have to be on the inside of an outer join with this join
603	* rel, then it's never going to be possible to build the complete
604	* query using this join. We should reject this join not only because
605	* it'll save work, but because if we don't, the clauseless-join
606	* heuristics might think that legality of this join means that some
607	* other join rel need not be formed, and that could lead to failure
608	* to find any plan at all. We have to consider not only rels that
609	* are directly on the inner side of an OJ with the joinrel, but also
610	* ones that are indirectly so, so search to find all such rels.
611	*/
612	join_lateral_rels = min_join_parameterization(root, joinrelids,
613	rel1, rel2);
614	if (join_lateral_rels)
615	{
616	Relids join_plus_rhs = bms_copy(joinrelids);
617	bool more;
618
619	do
620	{
621	more = false;
622	foreach(l, root->join_info_list)
623	{
624	SpecialJoinInfo sjinfo = (SpecialJoinInfo ) lfirst(l);
625
626	/ ignore full joins --- their ordering is predetermined /
627	if (sjinfo->jointype == JOIN_FULL)
628	continue;
629
630	if (bms_overlap(sjinfo->min_lefthand, join_plus_rhs) &&
631	!bms_is_subset(sjinfo->min_righthand, join_plus_rhs))
632	{
633	join_plus_rhs = bms_add_members(join_plus_rhs,
634	sjinfo->min_righthand);
635	more = true;
636	}
637	}
638	} while (more);
639	if (bms_overlap(join_plus_rhs, join_lateral_rels))
640	return false; / will not be able to join to some RHS rel /
641	}
642	}
643
644	/ Otherwise, it's a valid join /
645	*sjinfo_p = match_sjinfo;
646	*reversed_p = reversed;
647	return true;
648	}
649
650
651	/*
652	* make_join_rel
653	* Find or create a join RelOptInfo that represents the join of
654	* the two given rels, and add to it path information for paths
655	* created with the two rels as outer and inner rel.
656	* (The join rel may already contain paths generated from other
657	* pairs of rels that add up to the same set of base rels.)
658	*
659	* NB: will return NULL if attempted join is not valid. This can happen
660	* when working with outer joins, or with IN or EXISTS clauses that have been
661	* turned into joins.
662	*/
663	RelOptInfo *
664	make_join_rel(PlannerInfo root, RelOptInfo rel1, RelOptInfo *rel2)
665	{
666	Relids joinrelids;
667	SpecialJoinInfo *sjinfo;
668	bool reversed;
669	SpecialJoinInfo sjinfo_data;
670	RelOptInfo *joinrel;
671	List *restrictlist;
672
673	/ We should never try to join two overlapping sets of rels. /
674	Assert(!bms_overlap(rel1->relids, rel2->relids));
675
676	/ Construct Relids set that identifies the joinrel. /
677	joinrelids = bms_union(rel1->relids, rel2->relids);
678
679	/ Check validity and determine join type. /
680	if (!join_is_legal(root, rel1, rel2, joinrelids,
681	&sjinfo, &reversed))
682	{
683	/ invalid join path /
684	bms_free(joinrelids);
685	return NULL;
686	}
687
688	/ Swap rels if needed to match the join info. /
689	if (reversed)
690	{
691	RelOptInfo *trel = rel1;
692
693	rel1 = rel2;
694	rel2 = trel;
695	}
696
697	/*
698	* If it's a plain inner join, then we won't have found anything in
699	* join_info_list. Make up a SpecialJoinInfo so that selectivity
700	* estimation functions will know what's being joined.
701	*/
702	if (sjinfo == NULL)
703	{
704	sjinfo = &sjinfo_data;
705	sjinfo->type = T_SpecialJoinInfo;
706	sjinfo->min_lefthand = rel1->relids;
707	sjinfo->min_righthand = rel2->relids;
708	sjinfo->syn_lefthand = rel1->relids;
709	sjinfo->syn_righthand = rel2->relids;
710	sjinfo->jointype = JOIN_INNER;
711	/ we don't bother trying to make the remaining fields valid /
712	sjinfo->lhs_strict = false;
713	sjinfo->delay_upper_joins = false;
714	sjinfo->semi_can_btree = false;
715	sjinfo->semi_can_hash = false;
716	sjinfo->semi_operators = NIL;
717	sjinfo->semi_rhs_exprs = NIL;
718	}
719
720	/*
721	* Find or build the join RelOptInfo, and compute the restrictlist that
722	* goes with this particular joining.
723	*/
724	joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
725	&restrictlist);
726
727	/*
728	* If we've already proven this join is empty, we needn't consider any
729	* more paths for it.
730	*/
731	if (is_dummy_rel(joinrel))
732	{
733	bms_free(joinrelids);
734	return joinrel;
735	}
736
737	/ Add paths to the join relation. /
738	populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo,
739	restrictlist);
740
741	bms_free(joinrelids);
742
743	return joinrel;
744	}
745
746	/*
747	* populate_joinrel_with_paths
748	* Add paths to the given joinrel for given pair of joining relations. The
749	* SpecialJoinInfo provides details about the join and the restrictlist
750	* contains the join clauses and the other clauses applicable for given pair
751	* of the joining relations.
752	*/
753	static void
754	populate_joinrel_with_paths(PlannerInfo root, RelOptInfo rel1,
755	RelOptInfo rel2, RelOptInfo joinrel,
756	SpecialJoinInfo sjinfo, List restrictlist)
757	{
758	/*
759	* Consider paths using each rel as both outer and inner. Depending on
760	* the join type, a provably empty outer or inner rel might mean the join
761	* is provably empty too; in which case throw away any previously computed
762	* paths and mark the join as dummy. (We do it this way since it's
763	* conceivable that dummy-ness of a multi-element join might only be
764	* noticeable for certain construction paths.)
765	*
766	* Also, a provably constant-false join restriction typically means that
767	* we can skip evaluating one or both sides of the join. We do this by
768	* marking the appropriate rel as dummy. For outer joins, a
769	* constant-false restriction that is pushed down still means the whole
770	* join is dummy, while a non-pushed-down one means that no inner rows
771	* will join so we can treat the inner rel as dummy.
772	*
773	* We need only consider the jointypes that appear in join_info_list, plus
774	* JOIN_INNER.
775	*/
776	switch (sjinfo->jointype)
777	{
778	case JOIN_INNER:
779	if (is_dummy_rel(rel1) \|\| is_dummy_rel(rel2) \|\|
780	restriction_is_constant_false(restrictlist, joinrel, false))
781	{
782	mark_dummy_rel(joinrel);
783	break;
784	}
785	add_paths_to_joinrel(root, joinrel, rel1, rel2,
786	JOIN_INNER, sjinfo,
787	restrictlist);
788	add_paths_to_joinrel(root, joinrel, rel2, rel1,
789	JOIN_INNER, sjinfo,
790	restrictlist);
791	break;
792	case JOIN_LEFT:
793	if (is_dummy_rel(rel1) \|\|
794	restriction_is_constant_false(restrictlist, joinrel, true))
795	{
796	mark_dummy_rel(joinrel);
797	break;
798	}
799	if (restriction_is_constant_false(restrictlist, joinrel, false) &&
800	bms_is_subset(rel2->relids, sjinfo->syn_righthand))
801	mark_dummy_rel(rel2);
802	add_paths_to_joinrel(root, joinrel, rel1, rel2,
803	JOIN_LEFT, sjinfo,
804	restrictlist);
805	add_paths_to_joinrel(root, joinrel, rel2, rel1,
806	JOIN_RIGHT, sjinfo,
807	restrictlist);
808	break;
809	case JOIN_FULL:
810	if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) \|\|
811	restriction_is_constant_false(restrictlist, joinrel, true))
812	{
813	mark_dummy_rel(joinrel);
814	break;
815	}
816	add_paths_to_joinrel(root, joinrel, rel1, rel2,
817	JOIN_FULL, sjinfo,
818	restrictlist);
819	add_paths_to_joinrel(root, joinrel, rel2, rel1,
820	JOIN_FULL, sjinfo,
821	restrictlist);
822
823	/*
824	* If there are join quals that aren't mergeable or hashable, we
825	* may not be able to build any valid plan. Complain here so that
826	* we can give a somewhat-useful error message. (Since we have no
827	* flexibility of planning for a full join, there's no chance of
828	* succeeding later with another pair of input rels.)
829	*/
830	if (joinrel->pathlist == NIL)
831	ereport(ERROR,
832	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
833	errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions")));
834	break;
835	case JOIN_SEMI:
836
837	/*
838	* We might have a normal semijoin, or a case where we don't have
839	* enough rels to do the semijoin but can unique-ify the RHS and
840	* then do an innerjoin (see comments in join_is_legal). In the
841	* latter case we can't apply JOIN_SEMI joining.
842	*/
843	if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
844	bms_is_subset(sjinfo->min_righthand, rel2->relids))
845	{
846	if (is_dummy_rel(rel1) \|\| is_dummy_rel(rel2) \|\|
847	restriction_is_constant_false(restrictlist, joinrel, false))
848	{
849	mark_dummy_rel(joinrel);
850	break;
851	}
852	add_paths_to_joinrel(root, joinrel, rel1, rel2,
853	JOIN_SEMI, sjinfo,
854	restrictlist);
855	}
856
857	/*
858	* If we know how to unique-ify the RHS and one input rel is
859	* exactly the RHS (not a superset) we can consider unique-ifying
860	* it and then doing a regular join. (The create_unique_path
861	* check here is probably redundant with what join_is_legal did,
862	* but if so the check is cheap because it's cached. So test
863	* anyway to be sure.)
864	*/
865	if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
866	create_unique_path(root, rel2, rel2->cheapest_total_path,
867	sjinfo) != NULL)
868	{
869	if (is_dummy_rel(rel1) \|\| is_dummy_rel(rel2) \|\|
870	restriction_is_constant_false(restrictlist, joinrel, false))
871	{
872	mark_dummy_rel(joinrel);
873	break;
874	}
875	add_paths_to_joinrel(root, joinrel, rel1, rel2,
876	JOIN_UNIQUE_INNER, sjinfo,
877	restrictlist);
878	add_paths_to_joinrel(root, joinrel, rel2, rel1,
879	JOIN_UNIQUE_OUTER, sjinfo,
880	restrictlist);
881	}
882	break;
883	case JOIN_ANTI:
884	if (is_dummy_rel(rel1) \|\|
885	restriction_is_constant_false(restrictlist, joinrel, true))
886	{
887	mark_dummy_rel(joinrel);
888	break;
889	}
890	if (restriction_is_constant_false(restrictlist, joinrel, false) &&
891	bms_is_subset(rel2->relids, sjinfo->syn_righthand))
892	mark_dummy_rel(rel2);
893	add_paths_to_joinrel(root, joinrel, rel1, rel2,
894	JOIN_ANTI, sjinfo,
895	restrictlist);
896	break;
897	default:
898	/ other values not expected here /
899	elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
900	break;
901	}
902
903	/ Apply partitionwise join technique, if possible. /
904	try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist);
905	}
906
907
908	/*
909	* have_join_order_restriction
910	* Detect whether the two relations should be joined to satisfy
911	* a join-order restriction arising from special or lateral joins.
912	*
913	* In practice this is always used with have_relevant_joinclause(), and so
914	* could be merged with that function, but it seems clearer to separate the
915	* two concerns. We need this test because there are degenerate cases where
916	* a clauseless join must be performed to satisfy join-order restrictions.
917	* Also, if one rel has a lateral reference to the other, or both are needed
918	* to compute some PHV, we should consider joining them even if the join would
919	* be clauseless.
920	*
921	* Note: this is only a problem if one side of a degenerate outer join
922	* contains multiple rels, or a clauseless join is required within an
923	* IN/EXISTS RHS; else we will find a join path via the "last ditch" case in
924	* join_search_one_level(). We could dispense with this test if we were
925	* willing to try bushy plans in the "last ditch" case, but that seems much
926	* less efficient.
927	*/
928	bool
929	have_join_order_restriction(PlannerInfo *root,
930	RelOptInfo rel1, RelOptInfo rel2)
931	{
932	bool result = false;
933	ListCell *l;
934
935	/*
936	* If either side has a direct lateral reference to the other, attempt the
937	* join regardless of outer-join considerations.
938	*/
939	if (bms_overlap(rel1->relids, rel2->direct_lateral_relids) \|\|
940	bms_overlap(rel2->relids, rel1->direct_lateral_relids))
941	return true;
942
943	/*
944	* Likewise, if both rels are needed to compute some PlaceHolderVar,
945	* attempt the join regardless of outer-join considerations. (This is not
946	* very desirable, because a PHV with a large eval_at set will cause a lot
947	* of probably-useless joins to be considered, but failing to do this can
948	* cause us to fail to construct a plan at all.)
949	*/
950	foreach(l, root->placeholder_list)
951	{
952	PlaceHolderInfo phinfo = (PlaceHolderInfo ) lfirst(l);
953
954	if (bms_is_subset(rel1->relids, phinfo->ph_eval_at) &&
955	bms_is_subset(rel2->relids, phinfo->ph_eval_at))
956	return true;
957	}
958
959	/*
960	* It's possible that the rels correspond to the left and right sides of a
961	* degenerate outer join, that is, one with no joinclause mentioning the
962	* non-nullable side; in which case we should force the join to occur.
963	*
964	* Also, the two rels could represent a clauseless join that has to be
965	* completed to build up the LHS or RHS of an outer join.
966	*/
967	foreach(l, root->join_info_list)
968	{
969	SpecialJoinInfo sjinfo = (SpecialJoinInfo ) lfirst(l);
970
971	/ ignore full joins --- other mechanisms handle them /
972	if (sjinfo->jointype == JOIN_FULL)
973	continue;
974
975	/ Can we perform the SJ with these rels? /
976	if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
977	bms_is_subset(sjinfo->min_righthand, rel2->relids))
978	{
979	result = true;
980	break;
981	}
982	if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
983	bms_is_subset(sjinfo->min_righthand, rel1->relids))
984	{
985	result = true;
986	break;
987	}
988
989	/*
990	* Might we need to join these rels to complete the RHS? We have to
991	* use "overlap" tests since either rel might include a lower SJ that
992	* has been proven to commute with this one.
993	*/
994	if (bms_overlap(sjinfo->min_righthand, rel1->relids) &&
995	bms_overlap(sjinfo->min_righthand, rel2->relids))
996	{
997	result = true;
998	break;
999	}
1000
1001	/ Likewise for the LHS. /
1002	if (bms_overlap(sjinfo->min_lefthand, rel1->relids) &&
1003	bms_overlap(sjinfo->min_lefthand, rel2->relids))
1004	{
1005	result = true;
1006	break;
1007	}
1008	}
1009
1010	/*
1011	* We do not force the join to occur if either input rel can legally be
1012	* joined to anything else using joinclauses. This essentially means that
1013	* clauseless bushy joins are put off as long as possible. The reason is
1014	* that when there is a join order restriction high up in the join tree
1015	* (that is, with many rels inside the LHS or RHS), we would otherwise
1016	* expend lots of effort considering very stupid join combinations within
1017	* its LHS or RHS.
1018	*/
1019	if (result)
1020	{
1021	if (has_legal_joinclause(root, rel1) \|\|
1022	has_legal_joinclause(root, rel2))
1023	result = false;
1024	}
1025
1026	return result;
1027	}
1028
1029
1030	/*
1031	* has_join_restriction
1032	* Detect whether the specified relation has join-order restrictions,
1033	* due to being inside an outer join or an IN (sub-SELECT),
1034	* or participating in any LATERAL references or multi-rel PHVs.
1035	*
1036	* Essentially, this tests whether have_join_order_restriction() could
1037	* succeed with this rel and some other one. It's OK if we sometimes
1038	* say "true" incorrectly. (Therefore, we don't bother with the relatively
1039	* expensive has_legal_joinclause test.)
1040	*/
1041	static bool
1042	has_join_restriction(PlannerInfo root, RelOptInfo rel)
1043	{
1044	ListCell *l;
1045
1046	if (rel->lateral_relids != NULL \|\| rel->lateral_referencers != NULL)
1047	return true;
1048
1049	foreach(l, root->placeholder_list)
1050	{
1051	PlaceHolderInfo phinfo = (PlaceHolderInfo ) lfirst(l);
1052
1053	if (bms_is_subset(rel->relids, phinfo->ph_eval_at) &&
1054	!bms_equal(rel->relids, phinfo->ph_eval_at))
1055	return true;
1056	}
1057
1058	foreach(l, root->join_info_list)
1059	{
1060	SpecialJoinInfo sjinfo = (SpecialJoinInfo ) lfirst(l);
1061
1062	/ ignore full joins --- other mechanisms preserve their ordering /
1063	if (sjinfo->jointype == JOIN_FULL)
1064	continue;
1065
1066	/ ignore if SJ is already contained in rel /
1067	if (bms_is_subset(sjinfo->min_lefthand, rel->relids) &&
1068	bms_is_subset(sjinfo->min_righthand, rel->relids))
1069	continue;
1070
1071	/ restricted if it overlaps LHS or RHS, but doesn't contain SJ /
1072	if (bms_overlap(sjinfo->min_lefthand, rel->relids) \|\|
1073	bms_overlap(sjinfo->min_righthand, rel->relids))
1074	return true;
1075	}
1076
1077	return false;
1078	}
1079
1080
1081	/*
1082	* has_legal_joinclause
1083	* Detect whether the specified relation can legally be joined
1084	* to any other rels using join clauses.
1085	*
1086	* We consider only joins to single other relations in the current
1087	* initial_rels list. This is sufficient to get a "true" result in most real
1088	* queries, and an occasional erroneous "false" will only cost a bit more
1089	* planning time. The reason for this limitation is that considering joins to
1090	* other joins would require proving that the other join rel can legally be
1091	* formed, which seems like too much trouble for something that's only a
1092	* heuristic to save planning time. (Note: we must look at initial_rels
1093	* and not all of the query, since when we are planning a sub-joinlist we
1094	* may be forced to make clauseless joins within initial_rels even though
1095	* there are join clauses linking to other parts of the query.)
1096	*/
1097	static bool
1098	has_legal_joinclause(PlannerInfo root, RelOptInfo rel)
1099	{
1100	ListCell *lc;
1101
1102	foreach(lc, root->initial_rels)
1103	{
1104	RelOptInfo rel2 = (RelOptInfo ) lfirst(lc);
1105
1106	/ ignore rels that are already in "rel" /
1107	if (bms_overlap(rel->relids, rel2->relids))
1108	continue;
1109
1110	if (have_relevant_joinclause(root, rel, rel2))
1111	{
1112	Relids joinrelids;
1113	SpecialJoinInfo *sjinfo;
1114	bool reversed;
1115
1116	/ join_is_legal needs relids of the union /
1117	joinrelids = bms_union(rel->relids, rel2->relids);
1118
1119	if (join_is_legal(root, rel, rel2, joinrelids,
1120	&sjinfo, &reversed))
1121	{
1122	/ Yes, this will work /
1123	bms_free(joinrelids);
1124	return true;
1125	}
1126
1127	bms_free(joinrelids);
1128	}
1129	}
1130
1131	return false;
1132	}
1133
1134
1135	/*
1136	* There's a pitfall for creating parameterized nestloops: suppose the inner
1137	* rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's
1138	* minimum eval_at set includes the outer rel (B) and some third rel (C).
1139	* We might think we could create a B/A nestloop join that's parameterized by
1140	* C. But we would end up with a plan in which the PHV's expression has to be
1141	* evaluated as a nestloop parameter at the B/A join; and the executor is only
1142	* set up to handle simple Vars as NestLoopParams. Rather than add complexity
1143	* and overhead to the executor for such corner cases, it seems better to
1144	* forbid the join. (Note that we can still make use of A's parameterized
1145	* path with pre-joined B+C as the outer rel. have_join_order_restriction()
1146	* ensures that we will consider making such a join even if there are not
1147	* other reasons to do so.)
1148	*
1149	* So we check whether any PHVs used in the query could pose such a hazard.
1150	* We don't have any simple way of checking whether a risky PHV would actually
1151	* be used in the inner plan, and the case is so unusual that it doesn't seem
1152	* worth working very hard on it.
1153	*
1154	* This needs to be checked in two places. If the inner rel's minimum
1155	* parameterization would trigger the restriction, then join_is_legal() should
1156	* reject the join altogether, because there will be no workable paths for it.
1157	* But joinpath.c has to check again for every proposed nestloop path, because
1158	* the inner path might have more than the minimum parameterization, causing
1159	* some PHV to be dangerous for it that otherwise wouldn't be.
1160	*/
1161	bool
1162	have_dangerous_phv(PlannerInfo *root,
1163	Relids outer_relids, Relids inner_params)
1164	{
1165	ListCell *lc;
1166
1167	foreach(lc, root->placeholder_list)
1168	{
1169	PlaceHolderInfo phinfo = (PlaceHolderInfo ) lfirst(lc);
1170
1171	if (!bms_is_subset(phinfo->ph_eval_at, inner_params))
1172	continue; / ignore, could not be a nestloop param /
1173	if (!bms_overlap(phinfo->ph_eval_at, outer_relids))
1174	continue; / ignore, not relevant to this join /
1175	if (bms_is_subset(phinfo->ph_eval_at, outer_relids))
1176	continue; / safe, it can be eval'd within outerrel /
1177	/ Otherwise, it's potentially unsafe, so reject the join /
1178	return true;
1179	}
1180
1181	/ OK to perform the join /
1182	return false;
1183	}
1184
1185
1186	/*
1187	* is_dummy_rel --- has relation been proven empty?
1188	*/
1189	bool
1190	is_dummy_rel(RelOptInfo *rel)
1191	{
1192	Path *path;
1193
1194	/*
1195	* A rel that is known dummy will have just one path that is a childless
1196	* Append. (Even if somehow it has more paths, a childless Append will
1197	* have cost zero and hence should be at the front of the pathlist.)
1198	*/
1199	if (rel->pathlist == NIL)
1200	return false;
1201	path = (Path *) linitial(rel->pathlist);
1202
1203	/*
1204	* Initially, a dummy path will just be a childless Append. But in later
1205	* planning stages we might stick a ProjectSetPath and/or ProjectionPath
1206	* on top, since Append can't project. Rather than make assumptions about
1207	* which combinations can occur, just descend through whatever we find.
1208	*/
1209	for (;;)
1210	{
1211	if (IsA(path, ProjectionPath))
1212	path = ((ProjectionPath *) path)->subpath;
1213	else if (IsA(path, ProjectSetPath))
1214	path = ((ProjectSetPath *) path)->subpath;
1215	else
1216	break;
1217	}
1218	if (IS_DUMMY_APPEND(path))
1219	return true;
1220	return false;
1221	}
1222
1223	/*
1224	* Mark a relation as proven empty.
1225	*
1226	* During GEQO planning, this can get invoked more than once on the same
1227	* baserel struct, so it's worth checking to see if the rel is already marked
1228	* dummy.
1229	*
1230	* Also, when called during GEQO join planning, we are in a short-lived
1231	* memory context. We must make sure that the dummy path attached to a
1232	* baserel survives the GEQO cycle, else the baserel is trashed for future
1233	* GEQO cycles. On the other hand, when we are marking a joinrel during GEQO,
1234	* we don't want the dummy path to clutter the main planning context. Upshot
1235	* is that the best solution is to explicitly make the dummy path in the same
1236	* context the given RelOptInfo is in.
1237	*/
1238	void
1239	mark_dummy_rel(RelOptInfo *rel)
1240	{
1241	MemoryContext oldcontext;
1242
1243	/ Already marked? /
1244	if (is_dummy_rel(rel))
1245	return;
1246
1247	/ No, so choose correct context to make the dummy path in /
1248	oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
1249
1250	/ Set dummy size estimate /
1251	rel->rows = `0`;
1252
1253	/ Evict any previously chosen paths /
1254	rel->pathlist = NIL;
1255	rel->partial_pathlist = NIL;
1256
1257	/ Set up the dummy path /
1258	add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
1259	NIL, rel->lateral_relids,
1260	`0`, false, NIL, -`1`));
1261
1262	/ Set or update cheapest_total_path and related fields /
1263	set_cheapest(rel);
1264
1265	MemoryContextSwitchTo(oldcontext);
1266	}
1267
1268
1269	/*
1270	* restriction_is_constant_false --- is a restrictlist just FALSE?
1271	*
1272	* In cases where a qual is provably constant FALSE, eval_const_expressions
1273	* will generally have thrown away anything that's ANDed with it. In outer
1274	* join situations this will leave us computing cartesian products only to
1275	* decide there's no match for an outer row, which is pretty stupid. So,
1276	* we need to detect the case.
1277	*
1278	* If only_pushed_down is true, then consider only quals that are pushed-down
1279	* from the point of view of the joinrel.
1280	*/
1281	static bool
1282	restriction_is_constant_false(List *restrictlist,
1283	RelOptInfo *joinrel,
1284	bool only_pushed_down)
1285	{
1286	ListCell *lc;
1287
1288	/*
1289	* Despite the above comment, the restriction list we see here might
1290	* possibly have other members besides the FALSE constant, since other
1291	* quals could get "pushed down" to the outer join level. So we check
1292	* each member of the list.
1293	*/
1294	foreach(lc, restrictlist)
1295	{
1296	RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1297
1298	if (only_pushed_down && !RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
1299	continue;
1300
1301	if (rinfo->clause && IsA(rinfo->clause, Const))
1302	{
1303	Const con = (Const ) rinfo->clause;
1304
1305	/ constant NULL is as good as constant FALSE for our purposes /
1306	if (con->constisnull)
1307	return true;
1308	if (!DatumGetBool(con->constvalue))
1309	return true;
1310	}
1311	}
1312	return false;
1313	}
1314
1315	/*
1316	* Assess whether join between given two partitioned relations can be broken
1317	* down into joins between matching partitions; a technique called
1318	* "partitionwise join"
1319	*
1320	* Partitionwise join is possible when a. Joining relations have same
1321	* partitioning scheme b. There exists an equi-join between the partition keys
1322	* of the two relations.
1323	*
1324	* Partitionwise join is planned as follows (details: optimizer/README.)
1325	*
1326	* 1. Create the RelOptInfos for joins between matching partitions i.e
1327	* child-joins and add paths to them.
1328	*
1329	* 2. Construct Append or MergeAppend paths across the set of child joins.
1330	* This second phase is implemented by generate_partitionwise_join_paths().
1331	*
1332	* The RelOptInfo, SpecialJoinInfo and restrictlist for each child join are
1333	* obtained by translating the respective parent join structures.
1334	*/
1335	static void
1336	try_partitionwise_join(PlannerInfo root, RelOptInfo rel1, RelOptInfo *rel2,
1337	RelOptInfo joinrel, SpecialJoinInfo parent_sjinfo,
1338	List *parent_restrictlist)
1339	{
1340	bool rel1_is_simple = IS_SIMPLE_REL(rel1);
1341	bool rel2_is_simple = IS_SIMPLE_REL(rel2);
1342	int nparts;
1343	int cnt_parts;
1344
1345	/ Guard against stack overflow due to overly deep partition hierarchy. /
1346	check_stack_depth();
1347
1348	/ Nothing to do, if the join relation is not partitioned. /
1349	if (!IS_PARTITIONED_REL(joinrel))
1350	return;
1351
1352	/ The join relation should have consider_partitionwise_join set. /
1353	Assert(joinrel->consider_partitionwise_join);
1354
1355	/*
1356	* Since this join relation is partitioned, all the base relations
1357	* participating in this join must be partitioned and so are all the
1358	* intermediate join relations.
1359	*/
1360	Assert(IS_PARTITIONED_REL(rel1) && IS_PARTITIONED_REL(rel2));
1361	Assert(REL_HAS_ALL_PART_PROPS(rel1) && REL_HAS_ALL_PART_PROPS(rel2));
1362
1363	/ The joining relations should have consider_partitionwise_join set. /
1364	Assert(rel1->consider_partitionwise_join &&
1365	rel2->consider_partitionwise_join);
1366
1367	/*
1368	* The partition scheme of the join relation should match that of the
1369	* joining relations.
1370	*/
1371	Assert(joinrel->part_scheme == rel1->part_scheme &&
1372	joinrel->part_scheme == rel2->part_scheme);
1373
1374	/*
1375	* Since we allow partitionwise join only when the partition bounds of the
1376	* joining relations exactly match, the partition bounds of the join
1377	* should match those of the joining relations.
1378	*/
1379	Assert(partition_bounds_equal(joinrel->part_scheme->partnatts,
1380	joinrel->part_scheme->parttyplen,
1381	joinrel->part_scheme->parttypbyval,
1382	joinrel->boundinfo, rel1->boundinfo));
1383	Assert(partition_bounds_equal(joinrel->part_scheme->partnatts,
1384	joinrel->part_scheme->parttyplen,
1385	joinrel->part_scheme->parttypbyval,
1386	joinrel->boundinfo, rel2->boundinfo));
1387
1388	nparts = joinrel->nparts;
1389
1390	/*
1391	* Create child-join relations for this partitioned join, if those don't
1392	* exist. Add paths to child-joins for a pair of child relations
1393	* corresponding to the given pair of parent relations.
1394	*/
1395	for (cnt_parts = `0`; cnt_parts < nparts; cnt_parts++)
1396	{
1397	RelOptInfo *child_rel1 = rel1->part_rels[cnt_parts];
1398	RelOptInfo *child_rel2 = rel2->part_rels[cnt_parts];
1399	bool rel1_empty = (child_rel1 == NULL \|\|
1400	IS_DUMMY_REL(child_rel1));
1401	bool rel2_empty = (child_rel2 == NULL \|\|
1402	IS_DUMMY_REL(child_rel2));
1403	SpecialJoinInfo *child_sjinfo;
1404	List *child_restrictlist;
1405	RelOptInfo *child_joinrel;
1406	Relids child_joinrelids;
1407	AppendRelInfo **appinfos;
1408	int nappinfos;
1409
1410	/*
1411	* Check for cases where we can prove that this segment of the join
1412	* returns no rows, due to one or both inputs being empty (including
1413	* inputs that have been pruned away entirely). If so just ignore it.
1414	* These rules are equivalent to populate_joinrel_with_paths's rules
1415	* for dummy input relations.
1416	*/
1417	switch (parent_sjinfo->jointype)
1418	{
1419	case JOIN_INNER:
1420	case JOIN_SEMI:
1421	if (rel1_empty \|\| rel2_empty)
1422	continue; / ignore this join segment /
1423	break;
1424	case JOIN_LEFT:
1425	case JOIN_ANTI:
1426	if (rel1_empty)
1427	continue; / ignore this join segment /
1428	break;
1429	case JOIN_FULL:
1430	if (rel1_empty && rel2_empty)
1431	continue; / ignore this join segment /
1432	break;
1433	default:
1434	/ other values not expected here /
1435	elog(ERROR, "unrecognized join type: %d",
1436	(int) parent_sjinfo->jointype);
1437	break;
1438	}
1439
1440	/*
1441	* If a child has been pruned entirely then we can't generate paths
1442	* for it, so we have to reject partitionwise joining unless we were
1443	* able to eliminate this partition above.
1444	*/
1445	if (child_rel1 == NULL \|\| child_rel2 == NULL)
1446	{
1447	/*
1448	* Mark the joinrel as unpartitioned so that later functions treat
1449	* it correctly.
1450	*/
1451	joinrel->nparts = `0`;
1452	return;
1453	}
1454
1455	/*
1456	* If a leaf relation has consider_partitionwise_join=false, it means
1457	* that it's a dummy relation for which we skipped setting up tlist
1458	* expressions and adding EC members in set_append_rel_size(), so
1459	* again we have to fail here.
1460	*/
1461	if (rel1_is_simple && !child_rel1->consider_partitionwise_join)
1462	{
1463	Assert(child_rel1->reloptkind == RELOPT_OTHER_MEMBER_REL);
1464	Assert(IS_DUMMY_REL(child_rel1));
1465	joinrel->nparts = `0`;
1466	return;
1467	}
1468	if (rel2_is_simple && !child_rel2->consider_partitionwise_join)
1469	{
1470	Assert(child_rel2->reloptkind == RELOPT_OTHER_MEMBER_REL);
1471	Assert(IS_DUMMY_REL(child_rel2));
1472	joinrel->nparts = `0`;
1473	return;
1474	}
1475
1476	/ We should never try to join two overlapping sets of rels. /
1477	Assert(!bms_overlap(child_rel1->relids, child_rel2->relids));
1478	child_joinrelids = bms_union(child_rel1->relids, child_rel2->relids);
1479	appinfos = find_appinfos_by_relids(root, child_joinrelids, &nappinfos);
1480
1481	/*
1482	* Construct SpecialJoinInfo from parent join relations's
1483	* SpecialJoinInfo.
1484	*/
1485	child_sjinfo = build_child_join_sjinfo(root, parent_sjinfo,
1486	child_rel1->relids,
1487	child_rel2->relids);
1488
1489	/*
1490	* Construct restrictions applicable to the child join from those
1491	* applicable to the parent join.
1492	*/
1493	child_restrictlist =
1494	(List *) adjust_appendrel_attrs(root,
1495	(Node *) parent_restrictlist,
1496	nappinfos, appinfos);
1497	pfree(appinfos);
1498
1499	child_joinrel = joinrel->part_rels[cnt_parts];
1500	if (!child_joinrel)
1501	{
1502	child_joinrel = build_child_join_rel(root, child_rel1, child_rel2,
1503	joinrel, child_restrictlist,
1504	child_sjinfo,
1505	child_sjinfo->jointype);
1506	joinrel->part_rels[cnt_parts] = child_joinrel;
1507	}
1508
1509	Assert(bms_equal(child_joinrel->relids, child_joinrelids));
1510
1511	populate_joinrel_with_paths(root, child_rel1, child_rel2,
1512	child_joinrel, child_sjinfo,
1513	child_restrictlist);
1514	}
1515	}
1516
1517	/*
1518	* Construct the SpecialJoinInfo for a child-join by translating
1519	* SpecialJoinInfo for the join between parents. left_relids and right_relids
1520	* are the relids of left and right side of the join respectively.
1521	*/
1522	static SpecialJoinInfo *
1523	build_child_join_sjinfo(PlannerInfo root, SpecialJoinInfo parent_sjinfo,
1524	Relids left_relids, Relids right_relids)
1525	{
1526	SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo);
1527	AppendRelInfo **left_appinfos;
1528	int left_nappinfos;
1529	AppendRelInfo **right_appinfos;
1530	int right_nappinfos;
1531
1532	memcpy(sjinfo, parent_sjinfo, sizeof(SpecialJoinInfo));
1533	left_appinfos = find_appinfos_by_relids(root, left_relids,
1534	&left_nappinfos);
1535	right_appinfos = find_appinfos_by_relids(root, right_relids,
1536	&right_nappinfos);
1537
1538	sjinfo->min_lefthand = adjust_child_relids(sjinfo->min_lefthand,
1539	left_nappinfos, left_appinfos);
1540	sjinfo->min_righthand = adjust_child_relids(sjinfo->min_righthand,
1541	right_nappinfos,
1542	right_appinfos);
1543	sjinfo->syn_lefthand = adjust_child_relids(sjinfo->syn_lefthand,
1544	left_nappinfos, left_appinfos);
1545	sjinfo->syn_righthand = adjust_child_relids(sjinfo->syn_righthand,
1546	right_nappinfos,
1547	right_appinfos);
1548	sjinfo->semi_rhs_exprs = (List *) adjust_appendrel_attrs(root,
1549	(Node *) sjinfo->semi_rhs_exprs,
1550	right_nappinfos,
1551	right_appinfos);
1552
1553	pfree(left_appinfos);
1554	pfree(right_appinfos);
1555
1556	return sjinfo;
1557	}
1558
1559	/*
1560	* Returns true if there exists an equi-join condition for each pair of
1561	* partition keys from given relations being joined.
1562	*/
1563	bool
1564	have_partkey_equi_join(RelOptInfo *joinrel,
1565	RelOptInfo rel1, RelOptInfo rel2,
1566	JoinType jointype, List *restrictlist)
1567	{
1568	PartitionScheme part_scheme = rel1->part_scheme;
1569	ListCell *lc;
1570	int cnt_pks;
1571	bool pk_has_clause[PARTITION_MAX_KEYS];
1572	bool strict_op;
1573
1574	/*
1575	* This function should be called when the joining relations have same
1576	* partitioning scheme.
1577	*/
1578	Assert(rel1->part_scheme == rel2->part_scheme);
1579	Assert(part_scheme);
1580
1581	memset(pk_has_clause, `0`, sizeof(pk_has_clause));
1582	foreach(lc, restrictlist)
1583	{
1584	RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1585	OpExpr *opexpr;
1586	Expr *expr1;
1587	Expr *expr2;
1588	int ipk1;
1589	int ipk2;
1590
1591	/ If processing an outer join, only use its own join clauses. /
1592	if (IS_OUTER_JOIN(jointype) &&
1593	RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
1594	continue;
1595
1596	/ Skip clauses which can not be used for a join. /
1597	if (!rinfo->can_join)
1598	continue;
1599
1600	/ Skip clauses which are not equality conditions. /
1601	if (!rinfo->mergeopfamilies && !OidIsValid(rinfo->hashjoinoperator))
1602	continue;
1603
1604	opexpr = castNode(OpExpr, rinfo->clause);
1605
1606	/*
1607	* The equi-join between partition keys is strict if equi-join between
1608	* at least one partition key is using a strict operator. See
1609	* explanation about outer join reordering identity 3 in
1610	* optimizer/README
1611	*/
1612	strict_op = op_strict(opexpr->opno);
1613
1614	/ Match the operands to the relation. /
1615	if (bms_is_subset(rinfo->left_relids, rel1->relids) &&
1616	bms_is_subset(rinfo->right_relids, rel2->relids))
1617	{
1618	expr1 = linitial(opexpr->args);
1619	expr2 = lsecond(opexpr->args);
1620	}
1621	else if (bms_is_subset(rinfo->left_relids, rel2->relids) &&
1622	bms_is_subset(rinfo->right_relids, rel1->relids))
1623	{
1624	expr1 = lsecond(opexpr->args);
1625	expr2 = linitial(opexpr->args);
1626	}
1627	else
1628	continue;
1629
1630	/*
1631	* Only clauses referencing the partition keys are useful for
1632	* partitionwise join.
1633	*/
1634	ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op);
1635	if (ipk1 < `0`)
1636	continue;
1637	ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op);
1638	if (ipk2 < `0`)
1639	continue;
1640
1641	/*
1642	* If the clause refers to keys at different ordinal positions, it can
1643	* not be used for partitionwise join.
1644	*/
1645	if (ipk1 != ipk2)
1646	continue;
1647
1648	/*
1649	* The clause allows partitionwise join if only it uses the same
1650	* operator family as that specified by the partition key.
1651	*/
1652	if (rel1->part_scheme->strategy == PARTITION_STRATEGY_HASH)
1653	{
1654	if (!op_in_opfamily(rinfo->hashjoinoperator,
1655	part_scheme->partopfamily[ipk1]))
1656	continue;
1657	}
1658	else if (!list_member_oid(rinfo->mergeopfamilies,
1659	part_scheme->partopfamily[ipk1]))
1660	continue;
1661
1662	/ Mark the partition key as having an equi-join clause. /
1663	pk_has_clause[ipk1] = true;
1664	}
1665
1666	/ Check whether every partition key has an equi-join condition. /
1667	for (cnt_pks = `0`; cnt_pks < part_scheme->partnatts; cnt_pks++)
1668	{
1669	if (!pk_has_clause[cnt_pks])
1670	return false;
1671	}
1672
1673	return true;
1674	}
1675
1676	/*
1677	* Find the partition key from the given relation matching the given
1678	* expression. If found, return the index of the partition key, else return -1.
1679	*/
1680	static int
1681	match_expr_to_partition_keys(Expr expr, RelOptInfo rel, bool strict_op)
1682	{
1683	int cnt;
1684
1685	/ This function should be called only for partitioned relations. /
1686	Assert(rel->part_scheme);
1687
1688	/ Remove any relabel decorations. /
1689	while (IsA(expr, RelabelType))
1690	expr = (Expr *) (castNode(RelabelType, expr))->arg;
1691
1692	for (cnt = `0`; cnt < rel->part_scheme->partnatts; cnt++)
1693	{
1694	ListCell *lc;
1695
1696	Assert(rel->partexprs);
1697	foreach(lc, rel->partexprs[cnt])
1698	{
1699	if (equal(lfirst(lc), expr))
1700	return cnt;
1701	}
1702
1703	if (!strict_op)
1704	continue;
1705
1706	/*
1707	* If it's a strict equi-join a NULL partition key on one side will
1708	* not join a NULL partition key on the other side. So, rows with NULL
1709	* partition key from a partition on one side can not join with those
1710	* from a non-matching partition on the other side. So, search the
1711	* nullable partition keys as well.
1712	*/
1713	Assert(rel->nullable_partexprs);
1714	foreach(lc, rel->nullable_partexprs[cnt])
1715	{
1716	if (equal(lfirst(lc), expr))
1717	return cnt;
1718	}
1719	}
1720
1721	return -`1`;
1722	}
1723

Browse the source code of PostgreSQL/src/backend/optimizer/path/joinrels.c