joinpath.c source code [PostgreSQL/src/backend/optimizer/path/joinpath.c]

1	/-------------------------------------------------------------------------*
2	*
3	* joinpath.c
4	* Routines to find all possible paths for processing a set of joins
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	*
10	* IDENTIFICATION
11	* src/backend/optimizer/path/joinpath.c
12	*
13	*-------------------------------------------------------------------------
14	*/
15	#include "postgres.h"
16
17	#include <math.h>
18
19	#include "executor/executor.h"
20	#include "foreign/fdwapi.h"
21	#include "optimizer/cost.h"
22	#include "optimizer/pathnode.h"
23	#include "optimizer/paths.h"
24	#include "optimizer/planmain.h"
25
26	/ Hook for plugins to get control in add_paths_to_joinrel() /
27	set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
28
29	/*
30	* Paths parameterized by the parent can be considered to be parameterized by
31	* any of its child.
32	*/
33	#define PATH_PARAM_BY_PARENT(path, rel) \
34	((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
35	(rel)->top_parent_relids))
36	#define PATH_PARAM_BY_REL_SELF(path, rel) \
37	((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
38
39	#define PATH_PARAM_BY_REL(path, rel) \
40	(PATH_PARAM_BY_REL_SELF(path, rel) \|\| PATH_PARAM_BY_PARENT(path, rel))
41
42	static void try_partial_mergejoin_path(PlannerInfo *root,
43	RelOptInfo *joinrel,
44	Path *outer_path,
45	Path *inner_path,
46	List *pathkeys,
47	List *mergeclauses,
48	List *outersortkeys,
49	List *innersortkeys,
50	JoinType jointype,
51	JoinPathExtraData *extra);
52	static void sort_inner_and_outer(PlannerInfo root, RelOptInfo joinrel,
53	RelOptInfo outerrel, RelOptInfo innerrel,
54	JoinType jointype, JoinPathExtraData *extra);
55	static void match_unsorted_outer(PlannerInfo root, RelOptInfo joinrel,
56	RelOptInfo outerrel, RelOptInfo innerrel,
57	JoinType jointype, JoinPathExtraData *extra);
58	static void consider_parallel_nestloop(PlannerInfo *root,
59	RelOptInfo *joinrel,
60	RelOptInfo *outerrel,
61	RelOptInfo *innerrel,
62	JoinType jointype,
63	JoinPathExtraData *extra);
64	static void consider_parallel_mergejoin(PlannerInfo *root,
65	RelOptInfo *joinrel,
66	RelOptInfo *outerrel,
67	RelOptInfo *innerrel,
68	JoinType jointype,
69	JoinPathExtraData *extra,
70	Path *inner_cheapest_total);
71	static void hash_inner_and_outer(PlannerInfo root, RelOptInfo joinrel,
72	RelOptInfo outerrel, RelOptInfo innerrel,
73	JoinType jointype, JoinPathExtraData *extra);
74	static List select_mergejoin_clauses(PlannerInfo root,
75	RelOptInfo *joinrel,
76	RelOptInfo *outerrel,
77	RelOptInfo *innerrel,
78	List *restrictlist,
79	JoinType jointype,
80	bool *mergejoin_allowed);
81	static void generate_mergejoin_paths(PlannerInfo *root,
82	RelOptInfo *joinrel,
83	RelOptInfo *innerrel,
84	Path *outerpath,
85	JoinType jointype,
86	JoinPathExtraData *extra,
87	bool useallclauses,
88	Path *inner_cheapest_total,
89	List *merge_pathkeys,
90	bool is_partial);
91
92
93	/*
94	* add_paths_to_joinrel
95	* Given a join relation and two component rels from which it can be made,
96	* consider all possible paths that use the two component rels as outer
97	* and inner rel respectively. Add these paths to the join rel's pathlist
98	* if they survive comparison with other paths (and remove any existing
99	* paths that are dominated by these paths).
100	*
101	* Modifies the pathlist field of the joinrel node to contain the best
102	* paths found so far.
103	*
104	* jointype is not necessarily the same as sjinfo->jointype; it might be
105	* "flipped around" if we are considering joining the rels in the opposite
106	* direction from what's indicated in sjinfo.
107	*
108	* Also, this routine and others in this module accept the special JoinTypes
109	* JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
110	* unique-ify the outer or inner relation and then apply a regular inner
111	* join. These values are not allowed to propagate outside this module,
112	* however. Path cost estimation code may need to recognize that it's
113	* dealing with such a case --- the combination of nominal jointype INNER
114	* with sjinfo->jointype == JOIN_SEMI indicates that.
115	*/
116	void
117	add_paths_to_joinrel(PlannerInfo *root,
118	RelOptInfo *joinrel,
119	RelOptInfo *outerrel,
120	RelOptInfo *innerrel,
121	JoinType jointype,
122	SpecialJoinInfo *sjinfo,
123	List *restrictlist)
124	{
125	JoinPathExtraData extra;
126	bool mergejoin_allowed = true;
127	ListCell *lc;
128	Relids joinrelids;
129
130	/*
131	* PlannerInfo doesn't contain the SpecialJoinInfos created for joins
132	* between child relations, even if there is a SpecialJoinInfo node for
133	* the join between the topmost parents. So, while calculating Relids set
134	* representing the restriction, consider relids of topmost parent of
135	* partitions.
136	*/
137	if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
138	joinrelids = joinrel->top_parent_relids;
139	else
140	joinrelids = joinrel->relids;
141
142	extra.restrictlist = restrictlist;
143	extra.mergeclause_list = NIL;
144	extra.sjinfo = sjinfo;
145	extra.param_source_rels = NULL;
146
147	/*
148	* See if the inner relation is provably unique for this outer rel.
149	*
150	* We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
151	* matter since the executor can make the equivalent optimization anyway;
152	* we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
153	* must be considering a semijoin whose inner side is not provably unique
154	* (else reduce_unique_semijoins would've simplified it), so there's no
155	* point in calling innerrel_is_unique. However, if the LHS covers all of
156	* the semijoin's min_lefthand, then it's appropriate to set inner_unique
157	* because the path produced by create_unique_path will be unique relative
158	* to the LHS. (If we have an LHS that's only part of the min_lefthand,
159	* that is not true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
160	* letting that value escape this module.
161	*/
162	switch (jointype)
163	{
164	case JOIN_SEMI:
165	case JOIN_ANTI:
166	extra.inner_unique = false; / well, unproven /
167	break;
168	case JOIN_UNIQUE_INNER:
169	extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
170	outerrel->relids);
171	break;
172	case JOIN_UNIQUE_OUTER:
173	extra.inner_unique = innerrel_is_unique(root,
174	joinrel->relids,
175	outerrel->relids,
176	innerrel,
177	JOIN_INNER,
178	restrictlist,
179	false);
180	break;
181	default:
182	extra.inner_unique = innerrel_is_unique(root,
183	joinrel->relids,
184	outerrel->relids,
185	innerrel,
186	jointype,
187	restrictlist,
188	false);
189	break;
190	}
191
192	/*
193	* Find potential mergejoin clauses. We can skip this if we are not
194	* interested in doing a mergejoin. However, mergejoin may be our only
195	* way of implementing a full outer join, so override enable_mergejoin if
196	* it's a full join.
197	*/
198	if (enable_mergejoin \|\| jointype == JOIN_FULL)
199	extra.mergeclause_list = select_mergejoin_clauses(root,
200	joinrel,
201	outerrel,
202	innerrel,
203	restrictlist,
204	jointype,
205	&mergejoin_allowed);
206
207	/*
208	* If it's SEMI, ANTI, or inner_unique join, compute correction factors
209	* for cost estimation. These will be the same for all paths.
210	*/
211	if (jointype == JOIN_SEMI \|\| jointype == JOIN_ANTI \|\| extra.inner_unique)
212	compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
213	jointype, sjinfo, restrictlist,
214	&extra.semifactors);
215
216	/*
217	* Decide whether it's sensible to generate parameterized paths for this
218	* joinrel, and if so, which relations such paths should require. There
219	* is usually no need to create a parameterized result path unless there
220	* is a join order restriction that prevents joining one of our input rels
221	* directly to the parameter source rel instead of joining to the other
222	* input rel. (But see allow_star_schema_join().) This restriction
223	* reduces the number of parameterized paths we have to deal with at
224	* higher join levels, without compromising the quality of the resulting
225	* plan. We express the restriction as a Relids set that must overlap the
226	* parameterization of any proposed join path.
227	*/
228	foreach(lc, root->join_info_list)
229	{
230	SpecialJoinInfo sjinfo2 = (SpecialJoinInfo ) lfirst(lc);
231
232	/*
233	* SJ is relevant to this join if we have some part of its RHS
234	* (possibly not all of it), and haven't yet joined to its LHS. (This
235	* test is pretty simplistic, but should be sufficient considering the
236	* join has already been proven legal.) If the SJ is relevant, it
237	* presents constraints for joining to anything not in its RHS.
238	*/
239	if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
240	!bms_overlap(joinrelids, sjinfo2->min_lefthand))
241	extra.param_source_rels = bms_join(extra.param_source_rels,
242	bms_difference(root->all_baserels,
243	sjinfo2->min_righthand));
244
245	/ full joins constrain both sides symmetrically /
246	if (sjinfo2->jointype == JOIN_FULL &&
247	bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
248	!bms_overlap(joinrelids, sjinfo2->min_righthand))
249	extra.param_source_rels = bms_join(extra.param_source_rels,
250	bms_difference(root->all_baserels,
251	sjinfo2->min_lefthand));
252	}
253
254	/*
255	* However, when a LATERAL subquery is involved, there will simply not be
256	* any paths for the joinrel that aren't parameterized by whatever the
257	* subquery is parameterized by, unless its parameterization is resolved
258	* within the joinrel. So we might as well allow additional dependencies
259	* on whatever residual lateral dependencies the joinrel will have.
260	*/
261	extra.param_source_rels = bms_add_members(extra.param_source_rels,
262	joinrel->lateral_relids);
263
264	/*
265	* 1. Consider mergejoin paths where both relations must be explicitly
266	* sorted. Skip this if we can't mergejoin.
267	*/
268	if (mergejoin_allowed)
269	sort_inner_and_outer(root, joinrel, outerrel, innerrel,
270	jointype, &extra);
271
272	/*
273	* 2. Consider paths where the outer relation need not be explicitly
274	* sorted. This includes both nestloops and mergejoins where the outer
275	* path is already ordered. Again, skip this if we can't mergejoin.
276	* (That's okay because we know that nestloop can't handle right/full
277	* joins at all, so it wouldn't work in the prohibited cases either.)
278	*/
279	if (mergejoin_allowed)
280	match_unsorted_outer(root, joinrel, outerrel, innerrel,
281	jointype, &extra);
282
283	#ifdef NOT_USED
284
285	/*
286	* 3. Consider paths where the inner relation need not be explicitly
287	* sorted. This includes mergejoins only (nestloops were already built in
288	* match_unsorted_outer).
289	*
290	* Diked out as redundant 2/13/2000 -- tgl. There isn't any really
291	* significant difference between the inner and outer side of a mergejoin,
292	* so match_unsorted_inner creates no paths that aren't equivalent to
293	* those made by match_unsorted_outer when add_paths_to_joinrel() is
294	* invoked with the two rels given in the other order.
295	*/
296	if (mergejoin_allowed)
297	match_unsorted_inner(root, joinrel, outerrel, innerrel,
298	jointype, &extra);
299	#endif
300
301	/*
302	* 4. Consider paths where both outer and inner relations must be hashed
303	* before being joined. As above, disregard enable_hashjoin for full
304	* joins, because there may be no other alternative.
305	*/
306	if (enable_hashjoin \|\| jointype == JOIN_FULL)
307	hash_inner_and_outer(root, joinrel, outerrel, innerrel,
308	jointype, &extra);
309
310	/*
311	* 5. If inner and outer relations are foreign tables (or joins) belonging
312	* to the same server and assigned to the same user to check access
313	* permissions as, give the FDW a chance to push down joins.
314	*/
315	if (joinrel->fdwroutine &&
316	joinrel->fdwroutine->GetForeignJoinPaths)
317	joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
318	outerrel, innerrel,
319	jointype, &extra);
320
321	/*
322	* 6. Finally, give extensions a chance to manipulate the path list.
323	*/
324	if (set_join_pathlist_hook)
325	set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
326	jointype, &extra);
327	}
328
329	/*
330	* We override the param_source_rels heuristic to accept nestloop paths in
331	* which the outer rel satisfies some but not all of the inner path's
332	* parameterization. This is necessary to get good plans for star-schema
333	* scenarios, in which a parameterized path for a large table may require
334	* parameters from multiple small tables that will not get joined directly to
335	* each other. We can handle that by stacking nestloops that have the small
336	* tables on the outside; but this breaks the rule the param_source_rels
337	* heuristic is based on, namely that parameters should not be passed down
338	* across joins unless there's a join-order-constraint-based reason to do so.
339	* So we ignore the param_source_rels restriction when this case applies.
340	*
341	* allow_star_schema_join() returns true if the param_source_rels restriction
342	* should be overridden, ie, it's okay to perform this join.
343	*/
344	static inline bool
345	allow_star_schema_join(PlannerInfo *root,
346	Relids outerrelids,
347	Relids inner_paramrels)
348	{
349	/*
350	* It's a star-schema case if the outer rel provides some but not all of
351	* the inner rel's parameterization.
352	*/
353	return (bms_overlap(inner_paramrels, outerrelids) &&
354	bms_nonempty_difference(inner_paramrels, outerrelids));
355	}
356
357	/*
358	* try_nestloop_path
359	* Consider a nestloop join path; if it appears useful, push it into
360	* the joinrel's pathlist via add_path().
361	*/
362	static void
363	try_nestloop_path(PlannerInfo *root,
364	RelOptInfo *joinrel,
365	Path *outer_path,
366	Path *inner_path,
367	List *pathkeys,
368	JoinType jointype,
369	JoinPathExtraData *extra)
370	{
371	Relids required_outer;
372	JoinCostWorkspace workspace;
373	RelOptInfo *innerrel = inner_path->parent;
374	RelOptInfo *outerrel = outer_path->parent;
375	Relids innerrelids;
376	Relids outerrelids;
377	Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
378	Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
379
380	/*
381	* Paths are parameterized by top-level parents, so run parameterization
382	* tests on the parent relids.
383	*/
384	if (innerrel->top_parent_relids)
385	innerrelids = innerrel->top_parent_relids;
386	else
387	innerrelids = innerrel->relids;
388
389	if (outerrel->top_parent_relids)
390	outerrelids = outerrel->top_parent_relids;
391	else
392	outerrelids = outerrel->relids;
393
394	/*
395	* Check to see if proposed path is still parameterized, and reject if the
396	* parameterization wouldn't be sensible --- unless allow_star_schema_join
397	* says to allow it anyway. Also, we must reject if have_dangerous_phv
398	* doesn't like the look of it, which could only happen if the nestloop is
399	* still parameterized.
400	*/
401	required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
402	innerrelids, inner_paramrels);
403	if (required_outer &&
404	((!bms_overlap(required_outer, extra->param_source_rels) &&
405	!allow_star_schema_join(root, outerrelids, inner_paramrels)) \|\|
406	have_dangerous_phv(root, outerrelids, inner_paramrels)))
407	{
408	/ Waste no memory when we reject a path here /
409	bms_free(required_outer);
410	return;
411	}
412
413	/*
414	* Do a precheck to quickly eliminate obviously-inferior paths. We
415	* calculate a cheap lower bound on the path's cost and then use
416	* add_path_precheck() to see if the path is clearly going to be dominated
417	* by some existing path for the joinrel. If not, do the full pushup with
418	* creating a fully valid path structure and submitting it to add_path().
419	* The latter two steps are expensive enough to make this two-phase
420	* methodology worthwhile.
421	*/
422	initial_cost_nestloop(root, &workspace, jointype,
423	outer_path, inner_path, extra);
424
425	if (add_path_precheck(joinrel,
426	workspace.startup_cost, workspace.total_cost,
427	pathkeys, required_outer))
428	{
429	/*
430	* If the inner path is parameterized, it is parameterized by the
431	* topmost parent of the outer rel, not the outer rel itself. Fix
432	* that.
433	*/
434	if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
435	{
436	inner_path = reparameterize_path_by_child(root, inner_path,
437	outer_path->parent);
438
439	/*
440	* If we could not translate the path, we can't create nest loop
441	* path.
442	*/
443	if (!inner_path)
444	{
445	bms_free(required_outer);
446	return;
447	}
448	}
449
450	add_path(joinrel, (Path *)
451	create_nestloop_path(root,
452	joinrel,
453	jointype,
454	&workspace,
455	extra,
456	outer_path,
457	inner_path,
458	extra->restrictlist,
459	pathkeys,
460	required_outer));
461	}
462	else
463	{
464	/ Waste no memory when we reject a path here /
465	bms_free(required_outer);
466	}
467	}
468
469	/*
470	* try_partial_nestloop_path
471	* Consider a partial nestloop join path; if it appears useful, push it into
472	* the joinrel's partial_pathlist via add_partial_path().
473	*/
474	static void
475	try_partial_nestloop_path(PlannerInfo *root,
476	RelOptInfo *joinrel,
477	Path *outer_path,
478	Path *inner_path,
479	List *pathkeys,
480	JoinType jointype,
481	JoinPathExtraData *extra)
482	{
483	JoinCostWorkspace workspace;
484
485	/*
486	* If the inner path is parameterized, the parameterization must be fully
487	* satisfied by the proposed outer path. Parameterized partial paths are
488	* not supported. The caller should already have verified that no
489	* extra_lateral_rels are required here.
490	*/
491	Assert(bms_is_empty(joinrel->lateral_relids));
492	if (inner_path->param_info != NULL)
493	{
494	Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
495	RelOptInfo *outerrel = outer_path->parent;
496	Relids outerrelids;
497
498	/*
499	* The inner and outer paths are parameterized, if at all, by the top
500	* level parents, not the child relations, so we must use those relids
501	* for our parameterization tests.
502	*/
503	if (outerrel->top_parent_relids)
504	outerrelids = outerrel->top_parent_relids;
505	else
506	outerrelids = outerrel->relids;
507
508	if (!bms_is_subset(inner_paramrels, outerrelids))
509	return;
510	}
511
512	/*
513	* Before creating a path, get a quick lower bound on what it is likely to
514	* cost. Bail out right away if it looks terrible.
515	*/
516	initial_cost_nestloop(root, &workspace, jointype,
517	outer_path, inner_path, extra);
518	if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
519	return;
520
521	/*
522	* If the inner path is parameterized, it is parameterized by the topmost
523	* parent of the outer rel, not the outer rel itself. Fix that.
524	*/
525	if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
526	{
527	inner_path = reparameterize_path_by_child(root, inner_path,
528	outer_path->parent);
529
530	/*
531	* If we could not translate the path, we can't create nest loop path.
532	*/
533	if (!inner_path)
534	return;
535	}
536
537	/ Might be good enough to be worth trying, so let's try it. /
538	add_partial_path(joinrel, (Path *)
539	create_nestloop_path(root,
540	joinrel,
541	jointype,
542	&workspace,
543	extra,
544	outer_path,
545	inner_path,
546	extra->restrictlist,
547	pathkeys,
548	NULL));
549	}
550
551	/*
552	* try_mergejoin_path
553	* Consider a merge join path; if it appears useful, push it into
554	* the joinrel's pathlist via add_path().
555	*/
556	static void
557	try_mergejoin_path(PlannerInfo *root,
558	RelOptInfo *joinrel,
559	Path *outer_path,
560	Path *inner_path,
561	List *pathkeys,
562	List *mergeclauses,
563	List *outersortkeys,
564	List *innersortkeys,
565	JoinType jointype,
566	JoinPathExtraData *extra,
567	bool is_partial)
568	{
569	Relids required_outer;
570	JoinCostWorkspace workspace;
571
572	if (is_partial)
573	{
574	try_partial_mergejoin_path(root,
575	joinrel,
576	outer_path,
577	inner_path,
578	pathkeys,
579	mergeclauses,
580	outersortkeys,
581	innersortkeys,
582	jointype,
583	extra);
584	return;
585	}
586
587	/*
588	* Check to see if proposed path is still parameterized, and reject if the
589	* parameterization wouldn't be sensible.
590	*/
591	required_outer = calc_non_nestloop_required_outer(outer_path,
592	inner_path);
593	if (required_outer &&
594	!bms_overlap(required_outer, extra->param_source_rels))
595	{
596	/ Waste no memory when we reject a path here /
597	bms_free(required_outer);
598	return;
599	}
600
601	/*
602	* If the given paths are already well enough ordered, we can skip doing
603	* an explicit sort.
604	*/
605	if (outersortkeys &&
606	pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
607	outersortkeys = NIL;
608	if (innersortkeys &&
609	pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
610	innersortkeys = NIL;
611
612	/*
613	* See comments in try_nestloop_path().
614	*/
615	initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
616	outer_path, inner_path,
617	outersortkeys, innersortkeys,
618	extra);
619
620	if (add_path_precheck(joinrel,
621	workspace.startup_cost, workspace.total_cost,
622	pathkeys, required_outer))
623	{
624	add_path(joinrel, (Path *)
625	create_mergejoin_path(root,
626	joinrel,
627	jointype,
628	&workspace,
629	extra,
630	outer_path,
631	inner_path,
632	extra->restrictlist,
633	pathkeys,
634	required_outer,
635	mergeclauses,
636	outersortkeys,
637	innersortkeys));
638	}
639	else
640	{
641	/ Waste no memory when we reject a path here /
642	bms_free(required_outer);
643	}
644	}
645
646	/*
647	* try_partial_mergejoin_path
648	* Consider a partial merge join path; if it appears useful, push it into
649	* the joinrel's pathlist via add_partial_path().
650	*/
651	static void
652	try_partial_mergejoin_path(PlannerInfo *root,
653	RelOptInfo *joinrel,
654	Path *outer_path,
655	Path *inner_path,
656	List *pathkeys,
657	List *mergeclauses,
658	List *outersortkeys,
659	List *innersortkeys,
660	JoinType jointype,
661	JoinPathExtraData *extra)
662	{
663	JoinCostWorkspace workspace;
664
665	/*
666	* See comments in try_partial_hashjoin_path().
667	*/
668	Assert(bms_is_empty(joinrel->lateral_relids));
669	if (inner_path->param_info != NULL)
670	{
671	Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
672
673	if (!bms_is_empty(inner_paramrels))
674	return;
675	}
676
677	/*
678	* If the given paths are already well enough ordered, we can skip doing
679	* an explicit sort.
680	*/
681	if (outersortkeys &&
682	pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
683	outersortkeys = NIL;
684	if (innersortkeys &&
685	pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
686	innersortkeys = NIL;
687
688	/*
689	* See comments in try_partial_nestloop_path().
690	*/
691	initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
692	outer_path, inner_path,
693	outersortkeys, innersortkeys,
694	extra);
695
696	if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
697	return;
698
699	/ Might be good enough to be worth trying, so let's try it. /
700	add_partial_path(joinrel, (Path *)
701	create_mergejoin_path(root,
702	joinrel,
703	jointype,
704	&workspace,
705	extra,
706	outer_path,
707	inner_path,
708	extra->restrictlist,
709	pathkeys,
710	NULL,
711	mergeclauses,
712	outersortkeys,
713	innersortkeys));
714	}
715
716	/*
717	* try_hashjoin_path
718	* Consider a hash join path; if it appears useful, push it into
719	* the joinrel's pathlist via add_path().
720	*/
721	static void
722	try_hashjoin_path(PlannerInfo *root,
723	RelOptInfo *joinrel,
724	Path *outer_path,
725	Path *inner_path,
726	List *hashclauses,
727	JoinType jointype,
728	JoinPathExtraData *extra)
729	{
730	Relids required_outer;
731	JoinCostWorkspace workspace;
732
733	/*
734	* Check to see if proposed path is still parameterized, and reject if the
735	* parameterization wouldn't be sensible.
736	*/
737	required_outer = calc_non_nestloop_required_outer(outer_path,
738	inner_path);
739	if (required_outer &&
740	!bms_overlap(required_outer, extra->param_source_rels))
741	{
742	/ Waste no memory when we reject a path here /
743	bms_free(required_outer);
744	return;
745	}
746
747	/*
748	* See comments in try_nestloop_path(). Also note that hashjoin paths
749	* never have any output pathkeys, per comments in create_hashjoin_path.
750	*/
751	initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
752	outer_path, inner_path, extra, false);
753
754	if (add_path_precheck(joinrel,
755	workspace.startup_cost, workspace.total_cost,
756	NIL, required_outer))
757	{
758	add_path(joinrel, (Path *)
759	create_hashjoin_path(root,
760	joinrel,
761	jointype,
762	&workspace,
763	extra,
764	outer_path,
765	inner_path,
766	false, / parallel_hash /
767	extra->restrictlist,
768	required_outer,
769	hashclauses));
770	}
771	else
772	{
773	/ Waste no memory when we reject a path here /
774	bms_free(required_outer);
775	}
776	}
777
778	/*
779	* try_partial_hashjoin_path
780	* Consider a partial hashjoin join path; if it appears useful, push it into
781	* the joinrel's partial_pathlist via add_partial_path().
782	* The outer side is partial. If parallel_hash is true, then the inner path
783	* must be partial and will be run in parallel to create one or more shared
784	* hash tables; otherwise the inner path must be complete and a copy of it
785	* is run in every process to create separate identical private hash tables.
786	*/
787	static void
788	try_partial_hashjoin_path(PlannerInfo *root,
789	RelOptInfo *joinrel,
790	Path *outer_path,
791	Path *inner_path,
792	List *hashclauses,
793	JoinType jointype,
794	JoinPathExtraData *extra,
795	bool parallel_hash)
796	{
797	JoinCostWorkspace workspace;
798
799	/*
800	* If the inner path is parameterized, the parameterization must be fully
801	* satisfied by the proposed outer path. Parameterized partial paths are
802	* not supported. The caller should already have verified that no
803	* extra_lateral_rels are required here.
804	*/
805	Assert(bms_is_empty(joinrel->lateral_relids));
806	if (inner_path->param_info != NULL)
807	{
808	Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
809
810	if (!bms_is_empty(inner_paramrels))
811	return;
812	}
813
814	/*
815	* Before creating a path, get a quick lower bound on what it is likely to
816	* cost. Bail out right away if it looks terrible.
817	*/
818	initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
819	outer_path, inner_path, extra, parallel_hash);
820	if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
821	return;
822
823	/ Might be good enough to be worth trying, so let's try it. /
824	add_partial_path(joinrel, (Path *)
825	create_hashjoin_path(root,
826	joinrel,
827	jointype,
828	&workspace,
829	extra,
830	outer_path,
831	inner_path,
832	parallel_hash,
833	extra->restrictlist,
834	NULL,
835	hashclauses));
836	}
837
838	/*
839	* clause_sides_match_join
840	* Determine whether a join clause is of the right form to use in this join.
841	*
842	* We already know that the clause is a binary opclause referencing only the
843	* rels in the current join. The point here is to check whether it has the
844	* form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
845	* rather than mixing outer and inner vars on either side. If it matches,
846	* we set the transient flag outer_is_left to identify which side is which.
847	*/
848	static inline bool
849	clause_sides_match_join(RestrictInfo rinfo, RelOptInfo outerrel,
850	RelOptInfo *innerrel)
851	{
852	if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
853	bms_is_subset(rinfo->right_relids, innerrel->relids))
854	{
855	/ lefthand side is outer /
856	rinfo->outer_is_left = true;
857	return true;
858	}
859	else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
860	bms_is_subset(rinfo->right_relids, outerrel->relids))
861	{
862	/ righthand side is outer /
863	rinfo->outer_is_left = false;
864	return true;
865	}
866	return false; / no good for these input relations /
867	}
868
869	/*
870	* sort_inner_and_outer
871	* Create mergejoin join paths by explicitly sorting both the outer and
872	* inner join relations on each available merge ordering.
873	*
874	* 'joinrel' is the join relation
875	* 'outerrel' is the outer join relation
876	* 'innerrel' is the inner join relation
877	* 'jointype' is the type of join to do
878	* 'extra' contains additional input values
879	*/
880	static void
881	sort_inner_and_outer(PlannerInfo *root,
882	RelOptInfo *joinrel,
883	RelOptInfo *outerrel,
884	RelOptInfo *innerrel,
885	JoinType jointype,
886	JoinPathExtraData *extra)
887	{
888	JoinType save_jointype = jointype;
889	Path *outer_path;
890	Path *inner_path;
891	Path *cheapest_partial_outer = NULL;
892	Path *cheapest_safe_inner = NULL;
893	List *all_pathkeys;
894	ListCell *l;
895
896	/*
897	* We only consider the cheapest-total-cost input paths, since we are
898	* assuming here that a sort is required. We will consider
899	* cheapest-startup-cost input paths later, and only if they don't need a
900	* sort.
901	*
902	* This function intentionally does not consider parameterized input
903	* paths, except when the cheapest-total is parameterized. If we did so,
904	* we'd have a combinatorial explosion of mergejoin paths of dubious
905	* value. This interacts with decisions elsewhere that also discriminate
906	* against mergejoins with parameterized inputs; see comments in
907	* src/backend/optimizer/README.
908	*/
909	outer_path = outerrel->cheapest_total_path;
910	inner_path = innerrel->cheapest_total_path;
911
912	/*
913	* If either cheapest-total path is parameterized by the other rel, we
914	* can't use a mergejoin. (There's no use looking for alternative input
915	* paths, since these should already be the least-parameterized available
916	* paths.)
917	*/
918	if (PATH_PARAM_BY_REL(outer_path, innerrel) \|\|
919	PATH_PARAM_BY_REL(inner_path, outerrel))
920	return;
921
922	/*
923	* If unique-ification is requested, do it and then handle as a plain
924	* inner join.
925	*/
926	if (jointype == JOIN_UNIQUE_OUTER)
927	{
928	outer_path = (Path *) create_unique_path(root, outerrel,
929	outer_path, extra->sjinfo);
930	Assert(outer_path);
931	jointype = JOIN_INNER;
932	}
933	else if (jointype == JOIN_UNIQUE_INNER)
934	{
935	inner_path = (Path *) create_unique_path(root, innerrel,
936	inner_path, extra->sjinfo);
937	Assert(inner_path);
938	jointype = JOIN_INNER;
939	}
940
941	/*
942	* If the joinrel is parallel-safe, we may be able to consider a partial
943	* merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
944	* outer path will be partial, and therefore we won't be able to properly
945	* guarantee uniqueness. Similarly, we can't handle JOIN_FULL and
946	* JOIN_RIGHT, because they can produce false null extended rows. Also,
947	* the resulting path must not be parameterized.
948	*/
949	if (joinrel->consider_parallel &&
950	save_jointype != JOIN_UNIQUE_OUTER &&
951	save_jointype != JOIN_FULL &&
952	save_jointype != JOIN_RIGHT &&
953	outerrel->partial_pathlist != NIL &&
954	bms_is_empty(joinrel->lateral_relids))
955	{
956	cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
957
958	if (inner_path->parallel_safe)
959	cheapest_safe_inner = inner_path;
960	else if (save_jointype != JOIN_UNIQUE_INNER)
961	cheapest_safe_inner =
962	get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
963	}
964
965	/*
966	* Each possible ordering of the available mergejoin clauses will generate
967	* a differently-sorted result path at essentially the same cost. We have
968	* no basis for choosing one over another at this level of joining, but
969	* some sort orders may be more useful than others for higher-level
970	* mergejoins, so it's worth considering multiple orderings.
971	*
972	* Actually, it's not quite true that every mergeclause ordering will
973	* generate a different path order, because some of the clauses may be
974	* partially redundant (refer to the same EquivalenceClasses). Therefore,
975	* what we do is convert the mergeclause list to a list of canonical
976	* pathkeys, and then consider different orderings of the pathkeys.
977	*
978	* Generating a path for every permutation of the pathkeys doesn't seem
979	* like a winning strategy; the cost in planning time is too high. For
980	* now, we generate one path for each pathkey, listing that pathkey first
981	* and the rest in random order. This should allow at least a one-clause
982	* mergejoin without re-sorting against any other possible mergejoin
983	* partner path. But if we've not guessed the right ordering of secondary
984	* keys, we may end up evaluating clauses as qpquals when they could have
985	* been done as mergeclauses. (In practice, it's rare that there's more
986	* than two or three mergeclauses, so expending a huge amount of thought
987	* on that is probably not worth it.)
988	*
989	* The pathkey order returned by select_outer_pathkeys_for_merge() has
990	* some heuristics behind it (see that function), so be sure to try it
991	* exactly as-is as well as making variants.
992	*/
993	all_pathkeys = select_outer_pathkeys_for_merge(root,
994	extra->mergeclause_list,
995	joinrel);
996
997	foreach(l, all_pathkeys)
998	{
999	List front_pathkey = (List ) lfirst(l);
1000	List *cur_mergeclauses;
1001	List *outerkeys;
1002	List *innerkeys;
1003	List *merge_pathkeys;
1004
1005	/ Make a pathkey list with this guy first /
1006	if (l != list_head(all_pathkeys))
1007	outerkeys = lcons(front_pathkey,
1008	list_delete_ptr(list_copy(all_pathkeys),
1009	front_pathkey));
1010	else
1011	outerkeys = all_pathkeys; / no work at first one... /
1012
1013	/ Sort the mergeclauses into the corresponding ordering /
1014	cur_mergeclauses =
1015	find_mergeclauses_for_outer_pathkeys(root,
1016	outerkeys,
1017	extra->mergeclause_list);
1018
1019	/ Should have used them all... /
1020	Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1021
1022	/ Build sort pathkeys for the inner side /
1023	innerkeys = make_inner_pathkeys_for_merge(root,
1024	cur_mergeclauses,
1025	outerkeys);
1026
1027	/ Build pathkeys representing output sort order /
1028	merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1029	outerkeys);
1030
1031	/*
1032	* And now we can make the path.
1033	*
1034	* Note: it's possible that the cheapest paths will already be sorted
1035	* properly. try_mergejoin_path will detect that case and suppress an
1036	* explicit sort step, so we needn't do so here.
1037	*/
1038	try_mergejoin_path(root,
1039	joinrel,
1040	outer_path,
1041	inner_path,
1042	merge_pathkeys,
1043	cur_mergeclauses,
1044	outerkeys,
1045	innerkeys,
1046	jointype,
1047	extra,
1048	false);
1049
1050	/*
1051	* If we have partial outer and parallel safe inner path then try
1052	* partial mergejoin path.
1053	*/
1054	if (cheapest_partial_outer && cheapest_safe_inner)
1055	try_partial_mergejoin_path(root,
1056	joinrel,
1057	cheapest_partial_outer,
1058	cheapest_safe_inner,
1059	merge_pathkeys,
1060	cur_mergeclauses,
1061	outerkeys,
1062	innerkeys,
1063	jointype,
1064	extra);
1065	}
1066	}
1067
1068	/*
1069	* generate_mergejoin_paths
1070	* Creates possible mergejoin paths for input outerpath.
1071	*
1072	* We generate mergejoins if mergejoin clauses are available. We have
1073	* two ways to generate the inner path for a mergejoin: sort the cheapest
1074	* inner path, or use an inner path that is already suitably ordered for the
1075	* merge. If we have several mergeclauses, it could be that there is no inner
1076	* path (or only a very expensive one) for the full list of mergeclauses, but
1077	* better paths exist if we truncate the mergeclause list (thereby discarding
1078	* some sort key requirements). So, we consider truncations of the
1079	* mergeclause list as well as the full list. (Ideally we'd consider all
1080	* subsets of the mergeclause list, but that seems way too expensive.)
1081	*/
1082	static void
1083	generate_mergejoin_paths(PlannerInfo *root,
1084	RelOptInfo *joinrel,
1085	RelOptInfo *innerrel,
1086	Path *outerpath,
1087	JoinType jointype,
1088	JoinPathExtraData *extra,
1089	bool useallclauses,
1090	Path *inner_cheapest_total,
1091	List *merge_pathkeys,
1092	bool is_partial)
1093	{
1094	List *mergeclauses;
1095	List *innersortkeys;
1096	List *trialsortkeys;
1097	Path *cheapest_startup_inner;
1098	Path *cheapest_total_inner;
1099	JoinType save_jointype = jointype;
1100	int num_sortkeys;
1101	int sortkeycnt;
1102
1103	if (jointype == JOIN_UNIQUE_OUTER \|\| jointype == JOIN_UNIQUE_INNER)
1104	jointype = JOIN_INNER;
1105
1106	/ Look for useful mergeclauses (if any) /
1107	mergeclauses =
1108	find_mergeclauses_for_outer_pathkeys(root,
1109	outerpath->pathkeys,
1110	extra->mergeclause_list);
1111
1112	/*
1113	* Done with this outer path if no chance for a mergejoin.
1114	*
1115	* Special corner case: for "x FULL JOIN y ON true", there will be no join
1116	* clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1117	* but since mergejoin is our only join type that supports FULL JOIN
1118	* without any join clauses, it's necessary to generate a clauseless
1119	* mergejoin path instead.
1120	*/
1121	if (mergeclauses == NIL)
1122	{
1123	if (jointype == JOIN_FULL)
1124	/ okay to try for mergejoin / ;
1125	else
1126	return;
1127	}
1128	if (useallclauses &&
1129	list_length(mergeclauses) != list_length(extra->mergeclause_list))
1130	return;
1131
1132	/ Compute the required ordering of the inner path /
1133	innersortkeys = make_inner_pathkeys_for_merge(root,
1134	mergeclauses,
1135	outerpath->pathkeys);
1136
1137	/*
1138	* Generate a mergejoin on the basis of sorting the cheapest inner. Since
1139	* a sort will be needed, only cheapest total cost matters. (But
1140	* try_mergejoin_path will do the right thing if inner_cheapest_total is
1141	* already correctly sorted.)
1142	*/
1143	try_mergejoin_path(root,
1144	joinrel,
1145	outerpath,
1146	inner_cheapest_total,
1147	merge_pathkeys,
1148	mergeclauses,
1149	NIL,
1150	innersortkeys,
1151	jointype,
1152	extra,
1153	is_partial);
1154
1155	/ Can't do anything else if inner path needs to be unique'd /
1156	if (save_jointype == JOIN_UNIQUE_INNER)
1157	return;
1158
1159	/*
1160	* Look for presorted inner paths that satisfy the innersortkey list ---
1161	* or any truncation thereof, if we are allowed to build a mergejoin using
1162	* a subset of the merge clauses. Here, we consider both cheap startup
1163	* cost and cheap total cost.
1164	*
1165	* Currently we do not consider parameterized inner paths here. This
1166	* interacts with decisions elsewhere that also discriminate against
1167	* mergejoins with parameterized inputs; see comments in
1168	* src/backend/optimizer/README.
1169	*
1170	* As we shorten the sortkey list, we should consider only paths that are
1171	* strictly cheaper than (in particular, not the same as) any path found
1172	* in an earlier iteration. Otherwise we'd be intentionally using fewer
1173	* merge keys than a given path allows (treating the rest as plain
1174	* joinquals), which is unlikely to be a good idea. Also, eliminating
1175	* paths here on the basis of compare_path_costs is a lot cheaper than
1176	* building the mergejoin path only to throw it away.
1177	*
1178	* If inner_cheapest_total is well enough sorted to have not required a
1179	* sort in the path made above, we shouldn't make a duplicate path with
1180	* it, either. We handle that case with the same logic that handles the
1181	* previous consideration, by initializing the variables that track
1182	* cheapest-so-far properly. Note that we do NOT reject
1183	* inner_cheapest_total if we find it matches some shorter set of
1184	* pathkeys. That case corresponds to using fewer mergekeys to avoid
1185	* sorting inner_cheapest_total, whereas we did sort it above, so the
1186	* plans being considered are different.
1187	*/
1188	if (pathkeys_contained_in(innersortkeys,
1189	inner_cheapest_total->pathkeys))
1190	{
1191	/ inner_cheapest_total didn't require a sort /
1192	cheapest_startup_inner = inner_cheapest_total;
1193	cheapest_total_inner = inner_cheapest_total;
1194	}
1195	else
1196	{
1197	/ it did require a sort, at least for the full set of keys /
1198	cheapest_startup_inner = NULL;
1199	cheapest_total_inner = NULL;
1200	}
1201	num_sortkeys = list_length(innersortkeys);
1202	if (num_sortkeys > `1` && !useallclauses)
1203	trialsortkeys = list_copy(innersortkeys); / need modifiable copy /
1204	else
1205	trialsortkeys = innersortkeys; / won't really truncate /
1206
1207	for (sortkeycnt = num_sortkeys; sortkeycnt > `0`; sortkeycnt--)
1208	{
1209	Path *innerpath;
1210	List *newclauses = NIL;
1211
1212	/*
1213	* Look for an inner path ordered well enough for the first
1214	* 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1215	* destructively, which is why we made a copy...
1216	*/
1217	trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1218	innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1219	trialsortkeys,
1220	NULL,
1221	TOTAL_COST,
1222	is_partial);
1223	if (innerpath != NULL &&
1224	(cheapest_total_inner == NULL \|\|
1225	compare_path_costs(innerpath, cheapest_total_inner,
1226	TOTAL_COST) < `0`))
1227	{
1228	/ Found a cheap (or even-cheaper) sorted path /
1229	/ Select the right mergeclauses, if we didn't already /
1230	if (sortkeycnt < num_sortkeys)
1231	{
1232	newclauses =
1233	trim_mergeclauses_for_inner_pathkeys(root,
1234	mergeclauses,
1235	trialsortkeys);
1236	Assert(newclauses != NIL);
1237	}
1238	else
1239	newclauses = mergeclauses;
1240	try_mergejoin_path(root,
1241	joinrel,
1242	outerpath,
1243	innerpath,
1244	merge_pathkeys,
1245	newclauses,
1246	NIL,
1247	NIL,
1248	jointype,
1249	extra,
1250	is_partial);
1251	cheapest_total_inner = innerpath;
1252	}
1253	/ Same on the basis of cheapest startup cost ... /
1254	innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1255	trialsortkeys,
1256	NULL,
1257	STARTUP_COST,
1258	is_partial);
1259	if (innerpath != NULL &&
1260	(cheapest_startup_inner == NULL \|\|
1261	compare_path_costs(innerpath, cheapest_startup_inner,
1262	STARTUP_COST) < `0`))
1263	{
1264	/ Found a cheap (or even-cheaper) sorted path /
1265	if (innerpath != cheapest_total_inner)
1266	{
1267	/*
1268	* Avoid rebuilding clause list if we already made one; saves
1269	* memory in big join trees...
1270	*/
1271	if (newclauses == NIL)
1272	{
1273	if (sortkeycnt < num_sortkeys)
1274	{
1275	newclauses =
1276	trim_mergeclauses_for_inner_pathkeys(root,
1277	mergeclauses,
1278	trialsortkeys);
1279	Assert(newclauses != NIL);
1280	}
1281	else
1282	newclauses = mergeclauses;
1283	}
1284	try_mergejoin_path(root,
1285	joinrel,
1286	outerpath,
1287	innerpath,
1288	merge_pathkeys,
1289	newclauses,
1290	NIL,
1291	NIL,
1292	jointype,
1293	extra,
1294	is_partial);
1295	}
1296	cheapest_startup_inner = innerpath;
1297	}
1298
1299	/*
1300	* Don't consider truncated sortkeys if we need all clauses.
1301	*/
1302	if (useallclauses)
1303	break;
1304	}
1305	}
1306
1307	/*
1308	* match_unsorted_outer
1309	* Creates possible join paths for processing a single join relation
1310	* 'joinrel' by employing either iterative substitution or
1311	* mergejoining on each of its possible outer paths (considering
1312	* only outer paths that are already ordered well enough for merging).
1313	*
1314	* We always generate a nestloop path for each available outer path.
1315	* In fact we may generate as many as five: one on the cheapest-total-cost
1316	* inner path, one on the same with materialization, one on the
1317	* cheapest-startup-cost inner path (if different), one on the
1318	* cheapest-total inner-indexscan path (if any), and one on the
1319	* cheapest-startup inner-indexscan path (if different).
1320	*
1321	* We also consider mergejoins if mergejoin clauses are available. See
1322	* detailed comments in generate_mergejoin_paths.
1323	*
1324	* 'joinrel' is the join relation
1325	* 'outerrel' is the outer join relation
1326	* 'innerrel' is the inner join relation
1327	* 'jointype' is the type of join to do
1328	* 'extra' contains additional input values
1329	*/
1330	static void
1331	match_unsorted_outer(PlannerInfo *root,
1332	RelOptInfo *joinrel,
1333	RelOptInfo *outerrel,
1334	RelOptInfo *innerrel,
1335	JoinType jointype,
1336	JoinPathExtraData *extra)
1337	{
1338	JoinType save_jointype = jointype;
1339	bool nestjoinOK;
1340	bool useallclauses;
1341	Path *inner_cheapest_total = innerrel->cheapest_total_path;
1342	Path *matpath = NULL;
1343	ListCell *lc1;
1344
1345	/*
1346	* Nestloop only supports inner, left, semi, and anti joins. Also, if we
1347	* are doing a right or full mergejoin, we must use all the mergeclauses
1348	* as join clauses, else we will not have a valid plan. (Although these
1349	* two flags are currently inverses, keep them separate for clarity and
1350	* possible future changes.)
1351	*/
1352	switch (jointype)
1353	{
1354	case JOIN_INNER:
1355	case JOIN_LEFT:
1356	case JOIN_SEMI:
1357	case JOIN_ANTI:
1358	nestjoinOK = true;
1359	useallclauses = false;
1360	break;
1361	case JOIN_RIGHT:
1362	case JOIN_FULL:
1363	nestjoinOK = false;
1364	useallclauses = true;
1365	break;
1366	case JOIN_UNIQUE_OUTER:
1367	case JOIN_UNIQUE_INNER:
1368	jointype = JOIN_INNER;
1369	nestjoinOK = true;
1370	useallclauses = false;
1371	break;
1372	default:
1373	elog(ERROR, "unrecognized join type: %d",
1374	(int) jointype);
1375	nestjoinOK = false; / keep compiler quiet /
1376	useallclauses = false;
1377	break;
1378	}
1379
1380	/*
1381	* If inner_cheapest_total is parameterized by the outer rel, ignore it;
1382	* we will consider it below as a member of cheapest_parameterized_paths,
1383	* but the other possibilities considered in this routine aren't usable.
1384	*/
1385	if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
1386	inner_cheapest_total = NULL;
1387
1388	/*
1389	* If we need to unique-ify the inner path, we will consider only the
1390	* cheapest-total inner.
1391	*/
1392	if (save_jointype == JOIN_UNIQUE_INNER)
1393	{
1394	/ No way to do this with an inner path parameterized by outer rel /
1395	if (inner_cheapest_total == NULL)
1396	return;
1397	inner_cheapest_total = (Path *)
1398	create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
1399	Assert(inner_cheapest_total);
1400	}
1401	else if (nestjoinOK)
1402	{
1403	/*
1404	* Consider materializing the cheapest inner path, unless
1405	* enable_material is off or the path in question materializes its
1406	* output anyway.
1407	*/
1408	if (enable_material && inner_cheapest_total != NULL &&
1409	!ExecMaterializesOutput(inner_cheapest_total->pathtype))
1410	matpath = (Path *)
1411	create_material_path(innerrel, inner_cheapest_total);
1412	}
1413
1414	foreach(lc1, outerrel->pathlist)
1415	{
1416	Path outerpath = (Path ) lfirst(lc1);
1417	List *merge_pathkeys;
1418
1419	/*
1420	* We cannot use an outer path that is parameterized by the inner rel.
1421	*/
1422	if (PATH_PARAM_BY_REL(outerpath, innerrel))
1423	continue;
1424
1425	/*
1426	* If we need to unique-ify the outer path, it's pointless to consider
1427	* any but the cheapest outer. (XXX we don't consider parameterized
1428	* outers, nor inners, for unique-ified cases. Should we?)
1429	*/
1430	if (save_jointype == JOIN_UNIQUE_OUTER)
1431	{
1432	if (outerpath != outerrel->cheapest_total_path)
1433	continue;
1434	outerpath = (Path *) create_unique_path(root, outerrel,
1435	outerpath, extra->sjinfo);
1436	Assert(outerpath);
1437	}
1438
1439	/*
1440	* The result will have this sort order (even if it is implemented as
1441	* a nestloop, and even if some of the mergeclauses are implemented by
1442	* qpquals rather than as true mergeclauses):
1443	*/
1444	merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1445	outerpath->pathkeys);
1446
1447	if (save_jointype == JOIN_UNIQUE_INNER)
1448	{
1449	/*
1450	* Consider nestloop join, but only with the unique-ified cheapest
1451	* inner path
1452	*/
1453	try_nestloop_path(root,
1454	joinrel,
1455	outerpath,
1456	inner_cheapest_total,
1457	merge_pathkeys,
1458	jointype,
1459	extra);
1460	}
1461	else if (nestjoinOK)
1462	{
1463	/*
1464	* Consider nestloop joins using this outer path and various
1465	* available paths for the inner relation. We consider the
1466	* cheapest-total paths for each available parameterization of the
1467	* inner relation, including the unparameterized case.
1468	*/
1469	ListCell *lc2;
1470
1471	foreach(lc2, innerrel->cheapest_parameterized_paths)
1472	{
1473	Path innerpath = (Path ) lfirst(lc2);
1474
1475	try_nestloop_path(root,
1476	joinrel,
1477	outerpath,
1478	innerpath,
1479	merge_pathkeys,
1480	jointype,
1481	extra);
1482	}
1483
1484	/ Also consider materialized form of the cheapest inner path /
1485	if (matpath != NULL)
1486	try_nestloop_path(root,
1487	joinrel,
1488	outerpath,
1489	matpath,
1490	merge_pathkeys,
1491	jointype,
1492	extra);
1493	}
1494
1495	/ Can't do anything else if outer path needs to be unique'd /
1496	if (save_jointype == JOIN_UNIQUE_OUTER)
1497	continue;
1498
1499	/ Can't do anything else if inner rel is parameterized by outer /
1500	if (inner_cheapest_total == NULL)
1501	continue;
1502
1503	/ Generate merge join paths /
1504	generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
1505	save_jointype, extra, useallclauses,
1506	inner_cheapest_total, merge_pathkeys,
1507	false);
1508	}
1509
1510	/*
1511	* Consider partial nestloop and mergejoin plan if outerrel has any
1512	* partial path and the joinrel is parallel-safe. However, we can't
1513	* handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
1514	* therefore we won't be able to properly guarantee uniqueness. Nor can
1515	* we handle extra_lateral_rels, since partial paths must not be
1516	* parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
1517	* because they can produce false null extended rows.
1518	*/
1519	if (joinrel->consider_parallel &&
1520	save_jointype != JOIN_UNIQUE_OUTER &&
1521	save_jointype != JOIN_FULL &&
1522	save_jointype != JOIN_RIGHT &&
1523	outerrel->partial_pathlist != NIL &&
1524	bms_is_empty(joinrel->lateral_relids))
1525	{
1526	if (nestjoinOK)
1527	consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
1528	save_jointype, extra);
1529
1530	/*
1531	* If inner_cheapest_total is NULL or non parallel-safe then find the
1532	* cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
1533	* can't use any alternative inner path.
1534	*/
1535	if (inner_cheapest_total == NULL \|\|
1536	!inner_cheapest_total->parallel_safe)
1537	{
1538	if (save_jointype == JOIN_UNIQUE_INNER)
1539	return;
1540
1541	inner_cheapest_total = get_cheapest_parallel_safe_total_inner(
1542	innerrel->pathlist);
1543	}
1544
1545	if (inner_cheapest_total)
1546	consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
1547	save_jointype, extra,
1548	inner_cheapest_total);
1549	}
1550	}
1551
1552	/*
1553	* consider_parallel_mergejoin
1554	* Try to build partial paths for a joinrel by joining a partial path
1555	* for the outer relation to a complete path for the inner relation.
1556	*
1557	* 'joinrel' is the join relation
1558	* 'outerrel' is the outer join relation
1559	* 'innerrel' is the inner join relation
1560	* 'jointype' is the type of join to do
1561	* 'extra' contains additional input values
1562	* 'inner_cheapest_total' cheapest total path for innerrel
1563	*/
1564	static void
1565	consider_parallel_mergejoin(PlannerInfo *root,
1566	RelOptInfo *joinrel,
1567	RelOptInfo *outerrel,
1568	RelOptInfo *innerrel,
1569	JoinType jointype,
1570	JoinPathExtraData *extra,
1571	Path *inner_cheapest_total)
1572	{
1573	ListCell *lc1;
1574
1575	/ generate merge join path for each partial outer path /
1576	foreach(lc1, outerrel->partial_pathlist)
1577	{
1578	Path outerpath = (Path ) lfirst(lc1);
1579	List *merge_pathkeys;
1580
1581	/*
1582	* Figure out what useful ordering any paths we create will have.
1583	*/
1584	merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1585	outerpath->pathkeys);
1586
1587	generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
1588	extra, false, inner_cheapest_total,
1589	merge_pathkeys, true);
1590	}
1591	}
1592
1593	/*
1594	* consider_parallel_nestloop
1595	* Try to build partial paths for a joinrel by joining a partial path for the
1596	* outer relation to a complete path for the inner relation.
1597	*
1598	* 'joinrel' is the join relation
1599	* 'outerrel' is the outer join relation
1600	* 'innerrel' is the inner join relation
1601	* 'jointype' is the type of join to do
1602	* 'extra' contains additional input values
1603	*/
1604	static void
1605	consider_parallel_nestloop(PlannerInfo *root,
1606	RelOptInfo *joinrel,
1607	RelOptInfo *outerrel,
1608	RelOptInfo *innerrel,
1609	JoinType jointype,
1610	JoinPathExtraData *extra)
1611	{
1612	JoinType save_jointype = jointype;
1613	ListCell *lc1;
1614
1615	if (jointype == JOIN_UNIQUE_INNER)
1616	jointype = JOIN_INNER;
1617
1618	foreach(lc1, outerrel->partial_pathlist)
1619	{
1620	Path outerpath = (Path ) lfirst(lc1);
1621	List *pathkeys;
1622	ListCell *lc2;
1623
1624	/ Figure out what useful ordering any paths we create will have. /
1625	pathkeys = build_join_pathkeys(root, joinrel, jointype,
1626	outerpath->pathkeys);
1627
1628	/*
1629	* Try the cheapest parameterized paths; only those which will produce
1630	* an unparameterized path when joined to this outerrel will survive
1631	* try_partial_nestloop_path. The cheapest unparameterized path is
1632	* also in this list.
1633	*/
1634	foreach(lc2, innerrel->cheapest_parameterized_paths)
1635	{
1636	Path innerpath = (Path ) lfirst(lc2);
1637
1638	/ Can't join to an inner path that is not parallel-safe /
1639	if (!innerpath->parallel_safe)
1640	continue;
1641
1642	/*
1643	* If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
1644	* cheapest_total_path, and we have to unique-ify it. (We might
1645	* be able to relax this to allow other safe, unparameterized
1646	* inner paths, but right now create_unique_path is not on board
1647	* with that.)
1648	*/
1649	if (save_jointype == JOIN_UNIQUE_INNER)
1650	{
1651	if (innerpath != innerrel->cheapest_total_path)
1652	continue;
1653	innerpath = (Path *) create_unique_path(root, innerrel,
1654	innerpath,
1655	extra->sjinfo);
1656	Assert(innerpath);
1657	}
1658
1659	try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
1660	pathkeys, jointype, extra);
1661	}
1662	}
1663	}
1664
1665	/*
1666	* hash_inner_and_outer
1667	* Create hashjoin join paths by explicitly hashing both the outer and
1668	* inner keys of each available hash clause.
1669	*
1670	* 'joinrel' is the join relation
1671	* 'outerrel' is the outer join relation
1672	* 'innerrel' is the inner join relation
1673	* 'jointype' is the type of join to do
1674	* 'extra' contains additional input values
1675	*/
1676	static void
1677	hash_inner_and_outer(PlannerInfo *root,
1678	RelOptInfo *joinrel,
1679	RelOptInfo *outerrel,
1680	RelOptInfo *innerrel,
1681	JoinType jointype,
1682	JoinPathExtraData *extra)
1683	{
1684	JoinType save_jointype = jointype;
1685	bool isouterjoin = IS_OUTER_JOIN(jointype);
1686	List *hashclauses;
1687	ListCell *l;
1688
1689	/*
1690	* We need to build only one hashclauses list for any given pair of outer
1691	* and inner relations; all of the hashable clauses will be used as keys.
1692	*
1693	* Scan the join's restrictinfo list to find hashjoinable clauses that are
1694	* usable with this pair of sub-relations.
1695	*/
1696	hashclauses = NIL;
1697	foreach(l, extra->restrictlist)
1698	{
1699	RestrictInfo restrictinfo = (RestrictInfo ) lfirst(l);
1700
1701	/*
1702	* If processing an outer join, only use its own join clauses for
1703	* hashing. For inner joins we need not be so picky.
1704	*/
1705	if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
1706	continue;
1707
1708	if (!restrictinfo->can_join \|\|
1709	restrictinfo->hashjoinoperator == InvalidOid)
1710	continue; / not hashjoinable /
1711
1712	/*
1713	* Check if clause has the form "outer op inner" or "inner op outer".
1714	*/
1715	if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1716	continue; / no good for these input relations /
1717
1718	hashclauses = lappend(hashclauses, restrictinfo);
1719	}
1720
1721	/ If we found any usable hashclauses, make paths /
1722	if (hashclauses)
1723	{
1724	/*
1725	* We consider both the cheapest-total-cost and cheapest-startup-cost
1726	* outer paths. There's no need to consider any but the
1727	* cheapest-total-cost inner path, however.
1728	*/
1729	Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
1730	Path *cheapest_total_outer = outerrel->cheapest_total_path;
1731	Path *cheapest_total_inner = innerrel->cheapest_total_path;
1732
1733	/*
1734	* If either cheapest-total path is parameterized by the other rel, we
1735	* can't use a hashjoin. (There's no use looking for alternative
1736	* input paths, since these should already be the least-parameterized
1737	* available paths.)
1738	*/
1739	if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) \|\|
1740	PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
1741	return;
1742
1743	/ Unique-ify if need be; we ignore parameterized possibilities /
1744	if (jointype == JOIN_UNIQUE_OUTER)
1745	{
1746	cheapest_total_outer = (Path *)
1747	create_unique_path(root, outerrel,
1748	cheapest_total_outer, extra->sjinfo);
1749	Assert(cheapest_total_outer);
1750	jointype = JOIN_INNER;
1751	try_hashjoin_path(root,
1752	joinrel,
1753	cheapest_total_outer,
1754	cheapest_total_inner,
1755	hashclauses,
1756	jointype,
1757	extra);
1758	/ no possibility of cheap startup here /
1759	}
1760	else if (jointype == JOIN_UNIQUE_INNER)
1761	{
1762	cheapest_total_inner = (Path *)
1763	create_unique_path(root, innerrel,
1764	cheapest_total_inner, extra->sjinfo);
1765	Assert(cheapest_total_inner);
1766	jointype = JOIN_INNER;
1767	try_hashjoin_path(root,
1768	joinrel,
1769	cheapest_total_outer,
1770	cheapest_total_inner,
1771	hashclauses,
1772	jointype,
1773	extra);
1774	if (cheapest_startup_outer != NULL &&
1775	cheapest_startup_outer != cheapest_total_outer)
1776	try_hashjoin_path(root,
1777	joinrel,
1778	cheapest_startup_outer,
1779	cheapest_total_inner,
1780	hashclauses,
1781	jointype,
1782	extra);
1783	}
1784	else
1785	{
1786	/*
1787	* For other jointypes, we consider the cheapest startup outer
1788	* together with the cheapest total inner, and then consider
1789	* pairings of cheapest-total paths including parameterized ones.
1790	* There is no use in generating parameterized paths on the basis
1791	* of possibly cheap startup cost, so this is sufficient.
1792	*/
1793	ListCell *lc1;
1794	ListCell *lc2;
1795
1796	if (cheapest_startup_outer != NULL)
1797	try_hashjoin_path(root,
1798	joinrel,
1799	cheapest_startup_outer,
1800	cheapest_total_inner,
1801	hashclauses,
1802	jointype,
1803	extra);
1804
1805	foreach(lc1, outerrel->cheapest_parameterized_paths)
1806	{
1807	Path outerpath = (Path ) lfirst(lc1);
1808
1809	/*
1810	* We cannot use an outer path that is parameterized by the
1811	* inner rel.
1812	*/
1813	if (PATH_PARAM_BY_REL(outerpath, innerrel))
1814	continue;
1815
1816	foreach(lc2, innerrel->cheapest_parameterized_paths)
1817	{
1818	Path innerpath = (Path ) lfirst(lc2);
1819
1820	/*
1821	* We cannot use an inner path that is parameterized by
1822	* the outer rel, either.
1823	*/
1824	if (PATH_PARAM_BY_REL(innerpath, outerrel))
1825	continue;
1826
1827	if (outerpath == cheapest_startup_outer &&
1828	innerpath == cheapest_total_inner)
1829	continue; / already tried it /
1830
1831	try_hashjoin_path(root,
1832	joinrel,
1833	outerpath,
1834	innerpath,
1835	hashclauses,
1836	jointype,
1837	extra);
1838	}
1839	}
1840	}
1841
1842	/*
1843	* If the joinrel is parallel-safe, we may be able to consider a
1844	* partial hash join. However, we can't handle JOIN_UNIQUE_OUTER,
1845	* because the outer path will be partial, and therefore we won't be
1846	* able to properly guarantee uniqueness. Similarly, we can't handle
1847	* JOIN_FULL and JOIN_RIGHT, because they can produce false null
1848	* extended rows. Also, the resulting path must not be parameterized.
1849	* We would be able to support JOIN_FULL and JOIN_RIGHT for Parallel
1850	* Hash, since in that case we're back to a single hash table with a
1851	* single set of match bits for each batch, but that will require
1852	* figuring out a deadlock-free way to wait for the probe to finish.
1853	*/
1854	if (joinrel->consider_parallel &&
1855	save_jointype != JOIN_UNIQUE_OUTER &&
1856	save_jointype != JOIN_FULL &&
1857	save_jointype != JOIN_RIGHT &&
1858	outerrel->partial_pathlist != NIL &&
1859	bms_is_empty(joinrel->lateral_relids))
1860	{
1861	Path *cheapest_partial_outer;
1862	Path *cheapest_partial_inner = NULL;
1863	Path *cheapest_safe_inner = NULL;
1864
1865	cheapest_partial_outer =
1866	(Path *) linitial(outerrel->partial_pathlist);
1867
1868	/*
1869	* Can we use a partial inner plan too, so that we can build a
1870	* shared hash table in parallel? We can't handle
1871	* JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
1872	*/
1873	if (innerrel->partial_pathlist != NIL &&
1874	save_jointype != JOIN_UNIQUE_INNER &&
1875	enable_parallel_hash)
1876	{
1877	cheapest_partial_inner =
1878	(Path *) linitial(innerrel->partial_pathlist);
1879	try_partial_hashjoin_path(root, joinrel,
1880	cheapest_partial_outer,
1881	cheapest_partial_inner,
1882	hashclauses, jointype, extra,
1883	true / parallel_hash / );
1884	}
1885
1886	/*
1887	* Normally, given that the joinrel is parallel-safe, the cheapest
1888	* total inner path will also be parallel-safe, but if not, we'll
1889	* have to search for the cheapest safe, unparameterized inner
1890	* path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
1891	* inner path.
1892	*/
1893	if (cheapest_total_inner->parallel_safe)
1894	cheapest_safe_inner = cheapest_total_inner;
1895	else if (save_jointype != JOIN_UNIQUE_INNER)
1896	cheapest_safe_inner =
1897	get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1898
1899	if (cheapest_safe_inner != NULL)
1900	try_partial_hashjoin_path(root, joinrel,
1901	cheapest_partial_outer,
1902	cheapest_safe_inner,
1903	hashclauses, jointype, extra,
1904	false / parallel_hash / );
1905	}
1906	}
1907	}
1908
1909	/*
1910	* select_mergejoin_clauses
1911	* Select mergejoin clauses that are usable for a particular join.
1912	* Returns a list of RestrictInfo nodes for those clauses.
1913	*
1914	* *mergejoin_allowed is normally set to true, but it is set to false if
1915	* this is a right/full join and there are nonmergejoinable join clauses.
1916	* The executor's mergejoin machinery cannot handle such cases, so we have
1917	* to avoid generating a mergejoin plan. (Note that this flag does NOT
1918	* consider whether there are actually any mergejoinable clauses. This is
1919	* correct because in some cases we need to build a clauseless mergejoin.
1920	* Simply returning NIL is therefore not enough to distinguish safe from
1921	* unsafe cases.)
1922	*
1923	* We also mark each selected RestrictInfo to show which side is currently
1924	* being considered as outer. These are transient markings that are only
1925	* good for the duration of the current add_paths_to_joinrel() call!
1926	*
1927	* We examine each restrictinfo clause known for the join to see
1928	* if it is mergejoinable and involves vars from the two sub-relations
1929	* currently of interest.
1930	*/
1931	static List *
1932	select_mergejoin_clauses(PlannerInfo *root,
1933	RelOptInfo *joinrel,
1934	RelOptInfo *outerrel,
1935	RelOptInfo *innerrel,
1936	List *restrictlist,
1937	JoinType jointype,
1938	bool *mergejoin_allowed)
1939	{
1940	List *result_list = NIL;
1941	bool isouterjoin = IS_OUTER_JOIN(jointype);
1942	bool have_nonmergeable_joinclause = false;
1943	ListCell *l;
1944
1945	foreach(l, restrictlist)
1946	{
1947	RestrictInfo restrictinfo = (RestrictInfo ) lfirst(l);
1948
1949	/*
1950	* If processing an outer join, only use its own join clauses in the
1951	* merge. For inner joins we can use pushed-down clauses too. (Note:
1952	* we don't set have_nonmergeable_joinclause here because pushed-down
1953	* clauses will become otherquals not joinquals.)
1954	*/
1955	if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
1956	continue;
1957
1958	/ Check that clause is a mergeable operator clause /
1959	if (!restrictinfo->can_join \|\|
1960	restrictinfo->mergeopfamilies == NIL)
1961	{
1962	/*
1963	* The executor can handle extra joinquals that are constants, but
1964	* not anything else, when doing right/full merge join. (The
1965	* reason to support constants is so we can do FULL JOIN ON
1966	* FALSE.)
1967	*/
1968	if (!restrictinfo->clause \|\| !IsA(restrictinfo->clause, Const))
1969	have_nonmergeable_joinclause = true;
1970	continue; / not mergejoinable /
1971	}
1972
1973	/*
1974	* Check if clause has the form "outer op inner" or "inner op outer".
1975	*/
1976	if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1977	{
1978	have_nonmergeable_joinclause = true;
1979	continue; / no good for these input relations /
1980	}
1981
1982	/*
1983	* Insist that each side have a non-redundant eclass. This
1984	* restriction is needed because various bits of the planner expect
1985	* that each clause in a merge be associable with some pathkey in a
1986	* canonical pathkey list, but redundant eclasses can't appear in
1987	* canonical sort orderings. (XXX it might be worth relaxing this,
1988	* but not enough time to address it for 8.3.)
1989	*
1990	* Note: it would be bad if this condition failed for an otherwise
1991	* mergejoinable FULL JOIN clause, since that would result in
1992	* undesirable planner failure. I believe that is not possible
1993	* however; a variable involved in a full join could only appear in
1994	* below_outer_join eclasses, which aren't considered redundant.
1995	*
1996	* This case can happen for left/right join clauses: the outer-side
1997	* variable could be equated to a constant. Because we will propagate
1998	* that constant across the join clause, the loss of ability to do a
1999	* mergejoin is not really all that big a deal, and so it's not clear
2000	* that improving this is important.
2001	*/
2002	update_mergeclause_eclasses(root, restrictinfo);
2003
2004	if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) \|\|
2005	EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2006	{
2007	have_nonmergeable_joinclause = true;
2008	continue; / can't handle redundant eclasses /
2009	}
2010
2011	result_list = lappend(result_list, restrictinfo);
2012	}
2013
2014	/*
2015	* Report whether mergejoin is allowed (see comment at top of function).
2016	*/
2017	switch (jointype)
2018	{
2019	case JOIN_RIGHT:
2020	case JOIN_FULL:
2021	*mergejoin_allowed = !have_nonmergeable_joinclause;
2022	break;
2023	default:
2024	*mergejoin_allowed = true;
2025	break;
2026	}
2027
2028	return result_list;
2029	}
2030

Browse the source code of PostgreSQL/src/backend/optimizer/path/joinpath.c