1/*-------------------------------------------------------------------------
2 *
3 * joinpath.c
4 * Routines to find all possible paths for processing a set of joins
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/path/joinpath.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include <math.h>
18
19#include "executor/executor.h"
20#include "foreign/fdwapi.h"
21#include "optimizer/cost.h"
22#include "optimizer/pathnode.h"
23#include "optimizer/paths.h"
24#include "optimizer/planmain.h"
25
26/* Hook for plugins to get control in add_paths_to_joinrel() */
27set_join_pathlist_hook_type set_join_pathlist_hook = NULL;
28
29/*
30 * Paths parameterized by the parent can be considered to be parameterized by
31 * any of its child.
32 */
33#define PATH_PARAM_BY_PARENT(path, rel) \
34 ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), \
35 (rel)->top_parent_relids))
36#define PATH_PARAM_BY_REL_SELF(path, rel) \
37 ((path)->param_info && bms_overlap(PATH_REQ_OUTER(path), (rel)->relids))
38
39#define PATH_PARAM_BY_REL(path, rel) \
40 (PATH_PARAM_BY_REL_SELF(path, rel) || PATH_PARAM_BY_PARENT(path, rel))
41
42static void try_partial_mergejoin_path(PlannerInfo *root,
43 RelOptInfo *joinrel,
44 Path *outer_path,
45 Path *inner_path,
46 List *pathkeys,
47 List *mergeclauses,
48 List *outersortkeys,
49 List *innersortkeys,
50 JoinType jointype,
51 JoinPathExtraData *extra);
52static void sort_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
53 RelOptInfo *outerrel, RelOptInfo *innerrel,
54 JoinType jointype, JoinPathExtraData *extra);
55static void match_unsorted_outer(PlannerInfo *root, RelOptInfo *joinrel,
56 RelOptInfo *outerrel, RelOptInfo *innerrel,
57 JoinType jointype, JoinPathExtraData *extra);
58static void consider_parallel_nestloop(PlannerInfo *root,
59 RelOptInfo *joinrel,
60 RelOptInfo *outerrel,
61 RelOptInfo *innerrel,
62 JoinType jointype,
63 JoinPathExtraData *extra);
64static void consider_parallel_mergejoin(PlannerInfo *root,
65 RelOptInfo *joinrel,
66 RelOptInfo *outerrel,
67 RelOptInfo *innerrel,
68 JoinType jointype,
69 JoinPathExtraData *extra,
70 Path *inner_cheapest_total);
71static void hash_inner_and_outer(PlannerInfo *root, RelOptInfo *joinrel,
72 RelOptInfo *outerrel, RelOptInfo *innerrel,
73 JoinType jointype, JoinPathExtraData *extra);
74static List *select_mergejoin_clauses(PlannerInfo *root,
75 RelOptInfo *joinrel,
76 RelOptInfo *outerrel,
77 RelOptInfo *innerrel,
78 List *restrictlist,
79 JoinType jointype,
80 bool *mergejoin_allowed);
81static void generate_mergejoin_paths(PlannerInfo *root,
82 RelOptInfo *joinrel,
83 RelOptInfo *innerrel,
84 Path *outerpath,
85 JoinType jointype,
86 JoinPathExtraData *extra,
87 bool useallclauses,
88 Path *inner_cheapest_total,
89 List *merge_pathkeys,
90 bool is_partial);
91
92
93/*
94 * add_paths_to_joinrel
95 * Given a join relation and two component rels from which it can be made,
96 * consider all possible paths that use the two component rels as outer
97 * and inner rel respectively. Add these paths to the join rel's pathlist
98 * if they survive comparison with other paths (and remove any existing
99 * paths that are dominated by these paths).
100 *
101 * Modifies the pathlist field of the joinrel node to contain the best
102 * paths found so far.
103 *
104 * jointype is not necessarily the same as sjinfo->jointype; it might be
105 * "flipped around" if we are considering joining the rels in the opposite
106 * direction from what's indicated in sjinfo.
107 *
108 * Also, this routine and others in this module accept the special JoinTypes
109 * JOIN_UNIQUE_OUTER and JOIN_UNIQUE_INNER to indicate that we should
110 * unique-ify the outer or inner relation and then apply a regular inner
111 * join. These values are not allowed to propagate outside this module,
112 * however. Path cost estimation code may need to recognize that it's
113 * dealing with such a case --- the combination of nominal jointype INNER
114 * with sjinfo->jointype == JOIN_SEMI indicates that.
115 */
116void
117add_paths_to_joinrel(PlannerInfo *root,
118 RelOptInfo *joinrel,
119 RelOptInfo *outerrel,
120 RelOptInfo *innerrel,
121 JoinType jointype,
122 SpecialJoinInfo *sjinfo,
123 List *restrictlist)
124{
125 JoinPathExtraData extra;
126 bool mergejoin_allowed = true;
127 ListCell *lc;
128 Relids joinrelids;
129
130 /*
131 * PlannerInfo doesn't contain the SpecialJoinInfos created for joins
132 * between child relations, even if there is a SpecialJoinInfo node for
133 * the join between the topmost parents. So, while calculating Relids set
134 * representing the restriction, consider relids of topmost parent of
135 * partitions.
136 */
137 if (joinrel->reloptkind == RELOPT_OTHER_JOINREL)
138 joinrelids = joinrel->top_parent_relids;
139 else
140 joinrelids = joinrel->relids;
141
142 extra.restrictlist = restrictlist;
143 extra.mergeclause_list = NIL;
144 extra.sjinfo = sjinfo;
145 extra.param_source_rels = NULL;
146
147 /*
148 * See if the inner relation is provably unique for this outer rel.
149 *
150 * We have some special cases: for JOIN_SEMI and JOIN_ANTI, it doesn't
151 * matter since the executor can make the equivalent optimization anyway;
152 * we need not expend planner cycles on proofs. For JOIN_UNIQUE_INNER, we
153 * must be considering a semijoin whose inner side is not provably unique
154 * (else reduce_unique_semijoins would've simplified it), so there's no
155 * point in calling innerrel_is_unique. However, if the LHS covers all of
156 * the semijoin's min_lefthand, then it's appropriate to set inner_unique
157 * because the path produced by create_unique_path will be unique relative
158 * to the LHS. (If we have an LHS that's only part of the min_lefthand,
159 * that is *not* true.) For JOIN_UNIQUE_OUTER, pass JOIN_INNER to avoid
160 * letting that value escape this module.
161 */
162 switch (jointype)
163 {
164 case JOIN_SEMI:
165 case JOIN_ANTI:
166 extra.inner_unique = false; /* well, unproven */
167 break;
168 case JOIN_UNIQUE_INNER:
169 extra.inner_unique = bms_is_subset(sjinfo->min_lefthand,
170 outerrel->relids);
171 break;
172 case JOIN_UNIQUE_OUTER:
173 extra.inner_unique = innerrel_is_unique(root,
174 joinrel->relids,
175 outerrel->relids,
176 innerrel,
177 JOIN_INNER,
178 restrictlist,
179 false);
180 break;
181 default:
182 extra.inner_unique = innerrel_is_unique(root,
183 joinrel->relids,
184 outerrel->relids,
185 innerrel,
186 jointype,
187 restrictlist,
188 false);
189 break;
190 }
191
192 /*
193 * Find potential mergejoin clauses. We can skip this if we are not
194 * interested in doing a mergejoin. However, mergejoin may be our only
195 * way of implementing a full outer join, so override enable_mergejoin if
196 * it's a full join.
197 */
198 if (enable_mergejoin || jointype == JOIN_FULL)
199 extra.mergeclause_list = select_mergejoin_clauses(root,
200 joinrel,
201 outerrel,
202 innerrel,
203 restrictlist,
204 jointype,
205 &mergejoin_allowed);
206
207 /*
208 * If it's SEMI, ANTI, or inner_unique join, compute correction factors
209 * for cost estimation. These will be the same for all paths.
210 */
211 if (jointype == JOIN_SEMI || jointype == JOIN_ANTI || extra.inner_unique)
212 compute_semi_anti_join_factors(root, joinrel, outerrel, innerrel,
213 jointype, sjinfo, restrictlist,
214 &extra.semifactors);
215
216 /*
217 * Decide whether it's sensible to generate parameterized paths for this
218 * joinrel, and if so, which relations such paths should require. There
219 * is usually no need to create a parameterized result path unless there
220 * is a join order restriction that prevents joining one of our input rels
221 * directly to the parameter source rel instead of joining to the other
222 * input rel. (But see allow_star_schema_join().) This restriction
223 * reduces the number of parameterized paths we have to deal with at
224 * higher join levels, without compromising the quality of the resulting
225 * plan. We express the restriction as a Relids set that must overlap the
226 * parameterization of any proposed join path.
227 */
228 foreach(lc, root->join_info_list)
229 {
230 SpecialJoinInfo *sjinfo2 = (SpecialJoinInfo *) lfirst(lc);
231
232 /*
233 * SJ is relevant to this join if we have some part of its RHS
234 * (possibly not all of it), and haven't yet joined to its LHS. (This
235 * test is pretty simplistic, but should be sufficient considering the
236 * join has already been proven legal.) If the SJ is relevant, it
237 * presents constraints for joining to anything not in its RHS.
238 */
239 if (bms_overlap(joinrelids, sjinfo2->min_righthand) &&
240 !bms_overlap(joinrelids, sjinfo2->min_lefthand))
241 extra.param_source_rels = bms_join(extra.param_source_rels,
242 bms_difference(root->all_baserels,
243 sjinfo2->min_righthand));
244
245 /* full joins constrain both sides symmetrically */
246 if (sjinfo2->jointype == JOIN_FULL &&
247 bms_overlap(joinrelids, sjinfo2->min_lefthand) &&
248 !bms_overlap(joinrelids, sjinfo2->min_righthand))
249 extra.param_source_rels = bms_join(extra.param_source_rels,
250 bms_difference(root->all_baserels,
251 sjinfo2->min_lefthand));
252 }
253
254 /*
255 * However, when a LATERAL subquery is involved, there will simply not be
256 * any paths for the joinrel that aren't parameterized by whatever the
257 * subquery is parameterized by, unless its parameterization is resolved
258 * within the joinrel. So we might as well allow additional dependencies
259 * on whatever residual lateral dependencies the joinrel will have.
260 */
261 extra.param_source_rels = bms_add_members(extra.param_source_rels,
262 joinrel->lateral_relids);
263
264 /*
265 * 1. Consider mergejoin paths where both relations must be explicitly
266 * sorted. Skip this if we can't mergejoin.
267 */
268 if (mergejoin_allowed)
269 sort_inner_and_outer(root, joinrel, outerrel, innerrel,
270 jointype, &extra);
271
272 /*
273 * 2. Consider paths where the outer relation need not be explicitly
274 * sorted. This includes both nestloops and mergejoins where the outer
275 * path is already ordered. Again, skip this if we can't mergejoin.
276 * (That's okay because we know that nestloop can't handle right/full
277 * joins at all, so it wouldn't work in the prohibited cases either.)
278 */
279 if (mergejoin_allowed)
280 match_unsorted_outer(root, joinrel, outerrel, innerrel,
281 jointype, &extra);
282
283#ifdef NOT_USED
284
285 /*
286 * 3. Consider paths where the inner relation need not be explicitly
287 * sorted. This includes mergejoins only (nestloops were already built in
288 * match_unsorted_outer).
289 *
290 * Diked out as redundant 2/13/2000 -- tgl. There isn't any really
291 * significant difference between the inner and outer side of a mergejoin,
292 * so match_unsorted_inner creates no paths that aren't equivalent to
293 * those made by match_unsorted_outer when add_paths_to_joinrel() is
294 * invoked with the two rels given in the other order.
295 */
296 if (mergejoin_allowed)
297 match_unsorted_inner(root, joinrel, outerrel, innerrel,
298 jointype, &extra);
299#endif
300
301 /*
302 * 4. Consider paths where both outer and inner relations must be hashed
303 * before being joined. As above, disregard enable_hashjoin for full
304 * joins, because there may be no other alternative.
305 */
306 if (enable_hashjoin || jointype == JOIN_FULL)
307 hash_inner_and_outer(root, joinrel, outerrel, innerrel,
308 jointype, &extra);
309
310 /*
311 * 5. If inner and outer relations are foreign tables (or joins) belonging
312 * to the same server and assigned to the same user to check access
313 * permissions as, give the FDW a chance to push down joins.
314 */
315 if (joinrel->fdwroutine &&
316 joinrel->fdwroutine->GetForeignJoinPaths)
317 joinrel->fdwroutine->GetForeignJoinPaths(root, joinrel,
318 outerrel, innerrel,
319 jointype, &extra);
320
321 /*
322 * 6. Finally, give extensions a chance to manipulate the path list.
323 */
324 if (set_join_pathlist_hook)
325 set_join_pathlist_hook(root, joinrel, outerrel, innerrel,
326 jointype, &extra);
327}
328
329/*
330 * We override the param_source_rels heuristic to accept nestloop paths in
331 * which the outer rel satisfies some but not all of the inner path's
332 * parameterization. This is necessary to get good plans for star-schema
333 * scenarios, in which a parameterized path for a large table may require
334 * parameters from multiple small tables that will not get joined directly to
335 * each other. We can handle that by stacking nestloops that have the small
336 * tables on the outside; but this breaks the rule the param_source_rels
337 * heuristic is based on, namely that parameters should not be passed down
338 * across joins unless there's a join-order-constraint-based reason to do so.
339 * So we ignore the param_source_rels restriction when this case applies.
340 *
341 * allow_star_schema_join() returns true if the param_source_rels restriction
342 * should be overridden, ie, it's okay to perform this join.
343 */
344static inline bool
345allow_star_schema_join(PlannerInfo *root,
346 Relids outerrelids,
347 Relids inner_paramrels)
348{
349 /*
350 * It's a star-schema case if the outer rel provides some but not all of
351 * the inner rel's parameterization.
352 */
353 return (bms_overlap(inner_paramrels, outerrelids) &&
354 bms_nonempty_difference(inner_paramrels, outerrelids));
355}
356
357/*
358 * try_nestloop_path
359 * Consider a nestloop join path; if it appears useful, push it into
360 * the joinrel's pathlist via add_path().
361 */
362static void
363try_nestloop_path(PlannerInfo *root,
364 RelOptInfo *joinrel,
365 Path *outer_path,
366 Path *inner_path,
367 List *pathkeys,
368 JoinType jointype,
369 JoinPathExtraData *extra)
370{
371 Relids required_outer;
372 JoinCostWorkspace workspace;
373 RelOptInfo *innerrel = inner_path->parent;
374 RelOptInfo *outerrel = outer_path->parent;
375 Relids innerrelids;
376 Relids outerrelids;
377 Relids inner_paramrels = PATH_REQ_OUTER(inner_path);
378 Relids outer_paramrels = PATH_REQ_OUTER(outer_path);
379
380 /*
381 * Paths are parameterized by top-level parents, so run parameterization
382 * tests on the parent relids.
383 */
384 if (innerrel->top_parent_relids)
385 innerrelids = innerrel->top_parent_relids;
386 else
387 innerrelids = innerrel->relids;
388
389 if (outerrel->top_parent_relids)
390 outerrelids = outerrel->top_parent_relids;
391 else
392 outerrelids = outerrel->relids;
393
394 /*
395 * Check to see if proposed path is still parameterized, and reject if the
396 * parameterization wouldn't be sensible --- unless allow_star_schema_join
397 * says to allow it anyway. Also, we must reject if have_dangerous_phv
398 * doesn't like the look of it, which could only happen if the nestloop is
399 * still parameterized.
400 */
401 required_outer = calc_nestloop_required_outer(outerrelids, outer_paramrels,
402 innerrelids, inner_paramrels);
403 if (required_outer &&
404 ((!bms_overlap(required_outer, extra->param_source_rels) &&
405 !allow_star_schema_join(root, outerrelids, inner_paramrels)) ||
406 have_dangerous_phv(root, outerrelids, inner_paramrels)))
407 {
408 /* Waste no memory when we reject a path here */
409 bms_free(required_outer);
410 return;
411 }
412
413 /*
414 * Do a precheck to quickly eliminate obviously-inferior paths. We
415 * calculate a cheap lower bound on the path's cost and then use
416 * add_path_precheck() to see if the path is clearly going to be dominated
417 * by some existing path for the joinrel. If not, do the full pushup with
418 * creating a fully valid path structure and submitting it to add_path().
419 * The latter two steps are expensive enough to make this two-phase
420 * methodology worthwhile.
421 */
422 initial_cost_nestloop(root, &workspace, jointype,
423 outer_path, inner_path, extra);
424
425 if (add_path_precheck(joinrel,
426 workspace.startup_cost, workspace.total_cost,
427 pathkeys, required_outer))
428 {
429 /*
430 * If the inner path is parameterized, it is parameterized by the
431 * topmost parent of the outer rel, not the outer rel itself. Fix
432 * that.
433 */
434 if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
435 {
436 inner_path = reparameterize_path_by_child(root, inner_path,
437 outer_path->parent);
438
439 /*
440 * If we could not translate the path, we can't create nest loop
441 * path.
442 */
443 if (!inner_path)
444 {
445 bms_free(required_outer);
446 return;
447 }
448 }
449
450 add_path(joinrel, (Path *)
451 create_nestloop_path(root,
452 joinrel,
453 jointype,
454 &workspace,
455 extra,
456 outer_path,
457 inner_path,
458 extra->restrictlist,
459 pathkeys,
460 required_outer));
461 }
462 else
463 {
464 /* Waste no memory when we reject a path here */
465 bms_free(required_outer);
466 }
467}
468
469/*
470 * try_partial_nestloop_path
471 * Consider a partial nestloop join path; if it appears useful, push it into
472 * the joinrel's partial_pathlist via add_partial_path().
473 */
474static void
475try_partial_nestloop_path(PlannerInfo *root,
476 RelOptInfo *joinrel,
477 Path *outer_path,
478 Path *inner_path,
479 List *pathkeys,
480 JoinType jointype,
481 JoinPathExtraData *extra)
482{
483 JoinCostWorkspace workspace;
484
485 /*
486 * If the inner path is parameterized, the parameterization must be fully
487 * satisfied by the proposed outer path. Parameterized partial paths are
488 * not supported. The caller should already have verified that no
489 * extra_lateral_rels are required here.
490 */
491 Assert(bms_is_empty(joinrel->lateral_relids));
492 if (inner_path->param_info != NULL)
493 {
494 Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
495 RelOptInfo *outerrel = outer_path->parent;
496 Relids outerrelids;
497
498 /*
499 * The inner and outer paths are parameterized, if at all, by the top
500 * level parents, not the child relations, so we must use those relids
501 * for our parameterization tests.
502 */
503 if (outerrel->top_parent_relids)
504 outerrelids = outerrel->top_parent_relids;
505 else
506 outerrelids = outerrel->relids;
507
508 if (!bms_is_subset(inner_paramrels, outerrelids))
509 return;
510 }
511
512 /*
513 * Before creating a path, get a quick lower bound on what it is likely to
514 * cost. Bail out right away if it looks terrible.
515 */
516 initial_cost_nestloop(root, &workspace, jointype,
517 outer_path, inner_path, extra);
518 if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
519 return;
520
521 /*
522 * If the inner path is parameterized, it is parameterized by the topmost
523 * parent of the outer rel, not the outer rel itself. Fix that.
524 */
525 if (PATH_PARAM_BY_PARENT(inner_path, outer_path->parent))
526 {
527 inner_path = reparameterize_path_by_child(root, inner_path,
528 outer_path->parent);
529
530 /*
531 * If we could not translate the path, we can't create nest loop path.
532 */
533 if (!inner_path)
534 return;
535 }
536
537 /* Might be good enough to be worth trying, so let's try it. */
538 add_partial_path(joinrel, (Path *)
539 create_nestloop_path(root,
540 joinrel,
541 jointype,
542 &workspace,
543 extra,
544 outer_path,
545 inner_path,
546 extra->restrictlist,
547 pathkeys,
548 NULL));
549}
550
551/*
552 * try_mergejoin_path
553 * Consider a merge join path; if it appears useful, push it into
554 * the joinrel's pathlist via add_path().
555 */
556static void
557try_mergejoin_path(PlannerInfo *root,
558 RelOptInfo *joinrel,
559 Path *outer_path,
560 Path *inner_path,
561 List *pathkeys,
562 List *mergeclauses,
563 List *outersortkeys,
564 List *innersortkeys,
565 JoinType jointype,
566 JoinPathExtraData *extra,
567 bool is_partial)
568{
569 Relids required_outer;
570 JoinCostWorkspace workspace;
571
572 if (is_partial)
573 {
574 try_partial_mergejoin_path(root,
575 joinrel,
576 outer_path,
577 inner_path,
578 pathkeys,
579 mergeclauses,
580 outersortkeys,
581 innersortkeys,
582 jointype,
583 extra);
584 return;
585 }
586
587 /*
588 * Check to see if proposed path is still parameterized, and reject if the
589 * parameterization wouldn't be sensible.
590 */
591 required_outer = calc_non_nestloop_required_outer(outer_path,
592 inner_path);
593 if (required_outer &&
594 !bms_overlap(required_outer, extra->param_source_rels))
595 {
596 /* Waste no memory when we reject a path here */
597 bms_free(required_outer);
598 return;
599 }
600
601 /*
602 * If the given paths are already well enough ordered, we can skip doing
603 * an explicit sort.
604 */
605 if (outersortkeys &&
606 pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
607 outersortkeys = NIL;
608 if (innersortkeys &&
609 pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
610 innersortkeys = NIL;
611
612 /*
613 * See comments in try_nestloop_path().
614 */
615 initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
616 outer_path, inner_path,
617 outersortkeys, innersortkeys,
618 extra);
619
620 if (add_path_precheck(joinrel,
621 workspace.startup_cost, workspace.total_cost,
622 pathkeys, required_outer))
623 {
624 add_path(joinrel, (Path *)
625 create_mergejoin_path(root,
626 joinrel,
627 jointype,
628 &workspace,
629 extra,
630 outer_path,
631 inner_path,
632 extra->restrictlist,
633 pathkeys,
634 required_outer,
635 mergeclauses,
636 outersortkeys,
637 innersortkeys));
638 }
639 else
640 {
641 /* Waste no memory when we reject a path here */
642 bms_free(required_outer);
643 }
644}
645
646/*
647 * try_partial_mergejoin_path
648 * Consider a partial merge join path; if it appears useful, push it into
649 * the joinrel's pathlist via add_partial_path().
650 */
651static void
652try_partial_mergejoin_path(PlannerInfo *root,
653 RelOptInfo *joinrel,
654 Path *outer_path,
655 Path *inner_path,
656 List *pathkeys,
657 List *mergeclauses,
658 List *outersortkeys,
659 List *innersortkeys,
660 JoinType jointype,
661 JoinPathExtraData *extra)
662{
663 JoinCostWorkspace workspace;
664
665 /*
666 * See comments in try_partial_hashjoin_path().
667 */
668 Assert(bms_is_empty(joinrel->lateral_relids));
669 if (inner_path->param_info != NULL)
670 {
671 Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
672
673 if (!bms_is_empty(inner_paramrels))
674 return;
675 }
676
677 /*
678 * If the given paths are already well enough ordered, we can skip doing
679 * an explicit sort.
680 */
681 if (outersortkeys &&
682 pathkeys_contained_in(outersortkeys, outer_path->pathkeys))
683 outersortkeys = NIL;
684 if (innersortkeys &&
685 pathkeys_contained_in(innersortkeys, inner_path->pathkeys))
686 innersortkeys = NIL;
687
688 /*
689 * See comments in try_partial_nestloop_path().
690 */
691 initial_cost_mergejoin(root, &workspace, jointype, mergeclauses,
692 outer_path, inner_path,
693 outersortkeys, innersortkeys,
694 extra);
695
696 if (!add_partial_path_precheck(joinrel, workspace.total_cost, pathkeys))
697 return;
698
699 /* Might be good enough to be worth trying, so let's try it. */
700 add_partial_path(joinrel, (Path *)
701 create_mergejoin_path(root,
702 joinrel,
703 jointype,
704 &workspace,
705 extra,
706 outer_path,
707 inner_path,
708 extra->restrictlist,
709 pathkeys,
710 NULL,
711 mergeclauses,
712 outersortkeys,
713 innersortkeys));
714}
715
716/*
717 * try_hashjoin_path
718 * Consider a hash join path; if it appears useful, push it into
719 * the joinrel's pathlist via add_path().
720 */
721static void
722try_hashjoin_path(PlannerInfo *root,
723 RelOptInfo *joinrel,
724 Path *outer_path,
725 Path *inner_path,
726 List *hashclauses,
727 JoinType jointype,
728 JoinPathExtraData *extra)
729{
730 Relids required_outer;
731 JoinCostWorkspace workspace;
732
733 /*
734 * Check to see if proposed path is still parameterized, and reject if the
735 * parameterization wouldn't be sensible.
736 */
737 required_outer = calc_non_nestloop_required_outer(outer_path,
738 inner_path);
739 if (required_outer &&
740 !bms_overlap(required_outer, extra->param_source_rels))
741 {
742 /* Waste no memory when we reject a path here */
743 bms_free(required_outer);
744 return;
745 }
746
747 /*
748 * See comments in try_nestloop_path(). Also note that hashjoin paths
749 * never have any output pathkeys, per comments in create_hashjoin_path.
750 */
751 initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
752 outer_path, inner_path, extra, false);
753
754 if (add_path_precheck(joinrel,
755 workspace.startup_cost, workspace.total_cost,
756 NIL, required_outer))
757 {
758 add_path(joinrel, (Path *)
759 create_hashjoin_path(root,
760 joinrel,
761 jointype,
762 &workspace,
763 extra,
764 outer_path,
765 inner_path,
766 false, /* parallel_hash */
767 extra->restrictlist,
768 required_outer,
769 hashclauses));
770 }
771 else
772 {
773 /* Waste no memory when we reject a path here */
774 bms_free(required_outer);
775 }
776}
777
778/*
779 * try_partial_hashjoin_path
780 * Consider a partial hashjoin join path; if it appears useful, push it into
781 * the joinrel's partial_pathlist via add_partial_path().
782 * The outer side is partial. If parallel_hash is true, then the inner path
783 * must be partial and will be run in parallel to create one or more shared
784 * hash tables; otherwise the inner path must be complete and a copy of it
785 * is run in every process to create separate identical private hash tables.
786 */
787static void
788try_partial_hashjoin_path(PlannerInfo *root,
789 RelOptInfo *joinrel,
790 Path *outer_path,
791 Path *inner_path,
792 List *hashclauses,
793 JoinType jointype,
794 JoinPathExtraData *extra,
795 bool parallel_hash)
796{
797 JoinCostWorkspace workspace;
798
799 /*
800 * If the inner path is parameterized, the parameterization must be fully
801 * satisfied by the proposed outer path. Parameterized partial paths are
802 * not supported. The caller should already have verified that no
803 * extra_lateral_rels are required here.
804 */
805 Assert(bms_is_empty(joinrel->lateral_relids));
806 if (inner_path->param_info != NULL)
807 {
808 Relids inner_paramrels = inner_path->param_info->ppi_req_outer;
809
810 if (!bms_is_empty(inner_paramrels))
811 return;
812 }
813
814 /*
815 * Before creating a path, get a quick lower bound on what it is likely to
816 * cost. Bail out right away if it looks terrible.
817 */
818 initial_cost_hashjoin(root, &workspace, jointype, hashclauses,
819 outer_path, inner_path, extra, parallel_hash);
820 if (!add_partial_path_precheck(joinrel, workspace.total_cost, NIL))
821 return;
822
823 /* Might be good enough to be worth trying, so let's try it. */
824 add_partial_path(joinrel, (Path *)
825 create_hashjoin_path(root,
826 joinrel,
827 jointype,
828 &workspace,
829 extra,
830 outer_path,
831 inner_path,
832 parallel_hash,
833 extra->restrictlist,
834 NULL,
835 hashclauses));
836}
837
838/*
839 * clause_sides_match_join
840 * Determine whether a join clause is of the right form to use in this join.
841 *
842 * We already know that the clause is a binary opclause referencing only the
843 * rels in the current join. The point here is to check whether it has the
844 * form "outerrel_expr op innerrel_expr" or "innerrel_expr op outerrel_expr",
845 * rather than mixing outer and inner vars on either side. If it matches,
846 * we set the transient flag outer_is_left to identify which side is which.
847 */
848static inline bool
849clause_sides_match_join(RestrictInfo *rinfo, RelOptInfo *outerrel,
850 RelOptInfo *innerrel)
851{
852 if (bms_is_subset(rinfo->left_relids, outerrel->relids) &&
853 bms_is_subset(rinfo->right_relids, innerrel->relids))
854 {
855 /* lefthand side is outer */
856 rinfo->outer_is_left = true;
857 return true;
858 }
859 else if (bms_is_subset(rinfo->left_relids, innerrel->relids) &&
860 bms_is_subset(rinfo->right_relids, outerrel->relids))
861 {
862 /* righthand side is outer */
863 rinfo->outer_is_left = false;
864 return true;
865 }
866 return false; /* no good for these input relations */
867}
868
869/*
870 * sort_inner_and_outer
871 * Create mergejoin join paths by explicitly sorting both the outer and
872 * inner join relations on each available merge ordering.
873 *
874 * 'joinrel' is the join relation
875 * 'outerrel' is the outer join relation
876 * 'innerrel' is the inner join relation
877 * 'jointype' is the type of join to do
878 * 'extra' contains additional input values
879 */
880static void
881sort_inner_and_outer(PlannerInfo *root,
882 RelOptInfo *joinrel,
883 RelOptInfo *outerrel,
884 RelOptInfo *innerrel,
885 JoinType jointype,
886 JoinPathExtraData *extra)
887{
888 JoinType save_jointype = jointype;
889 Path *outer_path;
890 Path *inner_path;
891 Path *cheapest_partial_outer = NULL;
892 Path *cheapest_safe_inner = NULL;
893 List *all_pathkeys;
894 ListCell *l;
895
896 /*
897 * We only consider the cheapest-total-cost input paths, since we are
898 * assuming here that a sort is required. We will consider
899 * cheapest-startup-cost input paths later, and only if they don't need a
900 * sort.
901 *
902 * This function intentionally does not consider parameterized input
903 * paths, except when the cheapest-total is parameterized. If we did so,
904 * we'd have a combinatorial explosion of mergejoin paths of dubious
905 * value. This interacts with decisions elsewhere that also discriminate
906 * against mergejoins with parameterized inputs; see comments in
907 * src/backend/optimizer/README.
908 */
909 outer_path = outerrel->cheapest_total_path;
910 inner_path = innerrel->cheapest_total_path;
911
912 /*
913 * If either cheapest-total path is parameterized by the other rel, we
914 * can't use a mergejoin. (There's no use looking for alternative input
915 * paths, since these should already be the least-parameterized available
916 * paths.)
917 */
918 if (PATH_PARAM_BY_REL(outer_path, innerrel) ||
919 PATH_PARAM_BY_REL(inner_path, outerrel))
920 return;
921
922 /*
923 * If unique-ification is requested, do it and then handle as a plain
924 * inner join.
925 */
926 if (jointype == JOIN_UNIQUE_OUTER)
927 {
928 outer_path = (Path *) create_unique_path(root, outerrel,
929 outer_path, extra->sjinfo);
930 Assert(outer_path);
931 jointype = JOIN_INNER;
932 }
933 else if (jointype == JOIN_UNIQUE_INNER)
934 {
935 inner_path = (Path *) create_unique_path(root, innerrel,
936 inner_path, extra->sjinfo);
937 Assert(inner_path);
938 jointype = JOIN_INNER;
939 }
940
941 /*
942 * If the joinrel is parallel-safe, we may be able to consider a partial
943 * merge join. However, we can't handle JOIN_UNIQUE_OUTER, because the
944 * outer path will be partial, and therefore we won't be able to properly
945 * guarantee uniqueness. Similarly, we can't handle JOIN_FULL and
946 * JOIN_RIGHT, because they can produce false null extended rows. Also,
947 * the resulting path must not be parameterized.
948 */
949 if (joinrel->consider_parallel &&
950 save_jointype != JOIN_UNIQUE_OUTER &&
951 save_jointype != JOIN_FULL &&
952 save_jointype != JOIN_RIGHT &&
953 outerrel->partial_pathlist != NIL &&
954 bms_is_empty(joinrel->lateral_relids))
955 {
956 cheapest_partial_outer = (Path *) linitial(outerrel->partial_pathlist);
957
958 if (inner_path->parallel_safe)
959 cheapest_safe_inner = inner_path;
960 else if (save_jointype != JOIN_UNIQUE_INNER)
961 cheapest_safe_inner =
962 get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
963 }
964
965 /*
966 * Each possible ordering of the available mergejoin clauses will generate
967 * a differently-sorted result path at essentially the same cost. We have
968 * no basis for choosing one over another at this level of joining, but
969 * some sort orders may be more useful than others for higher-level
970 * mergejoins, so it's worth considering multiple orderings.
971 *
972 * Actually, it's not quite true that every mergeclause ordering will
973 * generate a different path order, because some of the clauses may be
974 * partially redundant (refer to the same EquivalenceClasses). Therefore,
975 * what we do is convert the mergeclause list to a list of canonical
976 * pathkeys, and then consider different orderings of the pathkeys.
977 *
978 * Generating a path for *every* permutation of the pathkeys doesn't seem
979 * like a winning strategy; the cost in planning time is too high. For
980 * now, we generate one path for each pathkey, listing that pathkey first
981 * and the rest in random order. This should allow at least a one-clause
982 * mergejoin without re-sorting against any other possible mergejoin
983 * partner path. But if we've not guessed the right ordering of secondary
984 * keys, we may end up evaluating clauses as qpquals when they could have
985 * been done as mergeclauses. (In practice, it's rare that there's more
986 * than two or three mergeclauses, so expending a huge amount of thought
987 * on that is probably not worth it.)
988 *
989 * The pathkey order returned by select_outer_pathkeys_for_merge() has
990 * some heuristics behind it (see that function), so be sure to try it
991 * exactly as-is as well as making variants.
992 */
993 all_pathkeys = select_outer_pathkeys_for_merge(root,
994 extra->mergeclause_list,
995 joinrel);
996
997 foreach(l, all_pathkeys)
998 {
999 List *front_pathkey = (List *) lfirst(l);
1000 List *cur_mergeclauses;
1001 List *outerkeys;
1002 List *innerkeys;
1003 List *merge_pathkeys;
1004
1005 /* Make a pathkey list with this guy first */
1006 if (l != list_head(all_pathkeys))
1007 outerkeys = lcons(front_pathkey,
1008 list_delete_ptr(list_copy(all_pathkeys),
1009 front_pathkey));
1010 else
1011 outerkeys = all_pathkeys; /* no work at first one... */
1012
1013 /* Sort the mergeclauses into the corresponding ordering */
1014 cur_mergeclauses =
1015 find_mergeclauses_for_outer_pathkeys(root,
1016 outerkeys,
1017 extra->mergeclause_list);
1018
1019 /* Should have used them all... */
1020 Assert(list_length(cur_mergeclauses) == list_length(extra->mergeclause_list));
1021
1022 /* Build sort pathkeys for the inner side */
1023 innerkeys = make_inner_pathkeys_for_merge(root,
1024 cur_mergeclauses,
1025 outerkeys);
1026
1027 /* Build pathkeys representing output sort order */
1028 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1029 outerkeys);
1030
1031 /*
1032 * And now we can make the path.
1033 *
1034 * Note: it's possible that the cheapest paths will already be sorted
1035 * properly. try_mergejoin_path will detect that case and suppress an
1036 * explicit sort step, so we needn't do so here.
1037 */
1038 try_mergejoin_path(root,
1039 joinrel,
1040 outer_path,
1041 inner_path,
1042 merge_pathkeys,
1043 cur_mergeclauses,
1044 outerkeys,
1045 innerkeys,
1046 jointype,
1047 extra,
1048 false);
1049
1050 /*
1051 * If we have partial outer and parallel safe inner path then try
1052 * partial mergejoin path.
1053 */
1054 if (cheapest_partial_outer && cheapest_safe_inner)
1055 try_partial_mergejoin_path(root,
1056 joinrel,
1057 cheapest_partial_outer,
1058 cheapest_safe_inner,
1059 merge_pathkeys,
1060 cur_mergeclauses,
1061 outerkeys,
1062 innerkeys,
1063 jointype,
1064 extra);
1065 }
1066}
1067
1068/*
1069 * generate_mergejoin_paths
1070 * Creates possible mergejoin paths for input outerpath.
1071 *
1072 * We generate mergejoins if mergejoin clauses are available. We have
1073 * two ways to generate the inner path for a mergejoin: sort the cheapest
1074 * inner path, or use an inner path that is already suitably ordered for the
1075 * merge. If we have several mergeclauses, it could be that there is no inner
1076 * path (or only a very expensive one) for the full list of mergeclauses, but
1077 * better paths exist if we truncate the mergeclause list (thereby discarding
1078 * some sort key requirements). So, we consider truncations of the
1079 * mergeclause list as well as the full list. (Ideally we'd consider all
1080 * subsets of the mergeclause list, but that seems way too expensive.)
1081 */
1082static void
1083generate_mergejoin_paths(PlannerInfo *root,
1084 RelOptInfo *joinrel,
1085 RelOptInfo *innerrel,
1086 Path *outerpath,
1087 JoinType jointype,
1088 JoinPathExtraData *extra,
1089 bool useallclauses,
1090 Path *inner_cheapest_total,
1091 List *merge_pathkeys,
1092 bool is_partial)
1093{
1094 List *mergeclauses;
1095 List *innersortkeys;
1096 List *trialsortkeys;
1097 Path *cheapest_startup_inner;
1098 Path *cheapest_total_inner;
1099 JoinType save_jointype = jointype;
1100 int num_sortkeys;
1101 int sortkeycnt;
1102
1103 if (jointype == JOIN_UNIQUE_OUTER || jointype == JOIN_UNIQUE_INNER)
1104 jointype = JOIN_INNER;
1105
1106 /* Look for useful mergeclauses (if any) */
1107 mergeclauses =
1108 find_mergeclauses_for_outer_pathkeys(root,
1109 outerpath->pathkeys,
1110 extra->mergeclause_list);
1111
1112 /*
1113 * Done with this outer path if no chance for a mergejoin.
1114 *
1115 * Special corner case: for "x FULL JOIN y ON true", there will be no join
1116 * clauses at all. Ordinarily we'd generate a clauseless nestloop path,
1117 * but since mergejoin is our only join type that supports FULL JOIN
1118 * without any join clauses, it's necessary to generate a clauseless
1119 * mergejoin path instead.
1120 */
1121 if (mergeclauses == NIL)
1122 {
1123 if (jointype == JOIN_FULL)
1124 /* okay to try for mergejoin */ ;
1125 else
1126 return;
1127 }
1128 if (useallclauses &&
1129 list_length(mergeclauses) != list_length(extra->mergeclause_list))
1130 return;
1131
1132 /* Compute the required ordering of the inner path */
1133 innersortkeys = make_inner_pathkeys_for_merge(root,
1134 mergeclauses,
1135 outerpath->pathkeys);
1136
1137 /*
1138 * Generate a mergejoin on the basis of sorting the cheapest inner. Since
1139 * a sort will be needed, only cheapest total cost matters. (But
1140 * try_mergejoin_path will do the right thing if inner_cheapest_total is
1141 * already correctly sorted.)
1142 */
1143 try_mergejoin_path(root,
1144 joinrel,
1145 outerpath,
1146 inner_cheapest_total,
1147 merge_pathkeys,
1148 mergeclauses,
1149 NIL,
1150 innersortkeys,
1151 jointype,
1152 extra,
1153 is_partial);
1154
1155 /* Can't do anything else if inner path needs to be unique'd */
1156 if (save_jointype == JOIN_UNIQUE_INNER)
1157 return;
1158
1159 /*
1160 * Look for presorted inner paths that satisfy the innersortkey list ---
1161 * or any truncation thereof, if we are allowed to build a mergejoin using
1162 * a subset of the merge clauses. Here, we consider both cheap startup
1163 * cost and cheap total cost.
1164 *
1165 * Currently we do not consider parameterized inner paths here. This
1166 * interacts with decisions elsewhere that also discriminate against
1167 * mergejoins with parameterized inputs; see comments in
1168 * src/backend/optimizer/README.
1169 *
1170 * As we shorten the sortkey list, we should consider only paths that are
1171 * strictly cheaper than (in particular, not the same as) any path found
1172 * in an earlier iteration. Otherwise we'd be intentionally using fewer
1173 * merge keys than a given path allows (treating the rest as plain
1174 * joinquals), which is unlikely to be a good idea. Also, eliminating
1175 * paths here on the basis of compare_path_costs is a lot cheaper than
1176 * building the mergejoin path only to throw it away.
1177 *
1178 * If inner_cheapest_total is well enough sorted to have not required a
1179 * sort in the path made above, we shouldn't make a duplicate path with
1180 * it, either. We handle that case with the same logic that handles the
1181 * previous consideration, by initializing the variables that track
1182 * cheapest-so-far properly. Note that we do NOT reject
1183 * inner_cheapest_total if we find it matches some shorter set of
1184 * pathkeys. That case corresponds to using fewer mergekeys to avoid
1185 * sorting inner_cheapest_total, whereas we did sort it above, so the
1186 * plans being considered are different.
1187 */
1188 if (pathkeys_contained_in(innersortkeys,
1189 inner_cheapest_total->pathkeys))
1190 {
1191 /* inner_cheapest_total didn't require a sort */
1192 cheapest_startup_inner = inner_cheapest_total;
1193 cheapest_total_inner = inner_cheapest_total;
1194 }
1195 else
1196 {
1197 /* it did require a sort, at least for the full set of keys */
1198 cheapest_startup_inner = NULL;
1199 cheapest_total_inner = NULL;
1200 }
1201 num_sortkeys = list_length(innersortkeys);
1202 if (num_sortkeys > 1 && !useallclauses)
1203 trialsortkeys = list_copy(innersortkeys); /* need modifiable copy */
1204 else
1205 trialsortkeys = innersortkeys; /* won't really truncate */
1206
1207 for (sortkeycnt = num_sortkeys; sortkeycnt > 0; sortkeycnt--)
1208 {
1209 Path *innerpath;
1210 List *newclauses = NIL;
1211
1212 /*
1213 * Look for an inner path ordered well enough for the first
1214 * 'sortkeycnt' innersortkeys. NB: trialsortkeys list is modified
1215 * destructively, which is why we made a copy...
1216 */
1217 trialsortkeys = list_truncate(trialsortkeys, sortkeycnt);
1218 innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1219 trialsortkeys,
1220 NULL,
1221 TOTAL_COST,
1222 is_partial);
1223 if (innerpath != NULL &&
1224 (cheapest_total_inner == NULL ||
1225 compare_path_costs(innerpath, cheapest_total_inner,
1226 TOTAL_COST) < 0))
1227 {
1228 /* Found a cheap (or even-cheaper) sorted path */
1229 /* Select the right mergeclauses, if we didn't already */
1230 if (sortkeycnt < num_sortkeys)
1231 {
1232 newclauses =
1233 trim_mergeclauses_for_inner_pathkeys(root,
1234 mergeclauses,
1235 trialsortkeys);
1236 Assert(newclauses != NIL);
1237 }
1238 else
1239 newclauses = mergeclauses;
1240 try_mergejoin_path(root,
1241 joinrel,
1242 outerpath,
1243 innerpath,
1244 merge_pathkeys,
1245 newclauses,
1246 NIL,
1247 NIL,
1248 jointype,
1249 extra,
1250 is_partial);
1251 cheapest_total_inner = innerpath;
1252 }
1253 /* Same on the basis of cheapest startup cost ... */
1254 innerpath = get_cheapest_path_for_pathkeys(innerrel->pathlist,
1255 trialsortkeys,
1256 NULL,
1257 STARTUP_COST,
1258 is_partial);
1259 if (innerpath != NULL &&
1260 (cheapest_startup_inner == NULL ||
1261 compare_path_costs(innerpath, cheapest_startup_inner,
1262 STARTUP_COST) < 0))
1263 {
1264 /* Found a cheap (or even-cheaper) sorted path */
1265 if (innerpath != cheapest_total_inner)
1266 {
1267 /*
1268 * Avoid rebuilding clause list if we already made one; saves
1269 * memory in big join trees...
1270 */
1271 if (newclauses == NIL)
1272 {
1273 if (sortkeycnt < num_sortkeys)
1274 {
1275 newclauses =
1276 trim_mergeclauses_for_inner_pathkeys(root,
1277 mergeclauses,
1278 trialsortkeys);
1279 Assert(newclauses != NIL);
1280 }
1281 else
1282 newclauses = mergeclauses;
1283 }
1284 try_mergejoin_path(root,
1285 joinrel,
1286 outerpath,
1287 innerpath,
1288 merge_pathkeys,
1289 newclauses,
1290 NIL,
1291 NIL,
1292 jointype,
1293 extra,
1294 is_partial);
1295 }
1296 cheapest_startup_inner = innerpath;
1297 }
1298
1299 /*
1300 * Don't consider truncated sortkeys if we need all clauses.
1301 */
1302 if (useallclauses)
1303 break;
1304 }
1305}
1306
1307/*
1308 * match_unsorted_outer
1309 * Creates possible join paths for processing a single join relation
1310 * 'joinrel' by employing either iterative substitution or
1311 * mergejoining on each of its possible outer paths (considering
1312 * only outer paths that are already ordered well enough for merging).
1313 *
1314 * We always generate a nestloop path for each available outer path.
1315 * In fact we may generate as many as five: one on the cheapest-total-cost
1316 * inner path, one on the same with materialization, one on the
1317 * cheapest-startup-cost inner path (if different), one on the
1318 * cheapest-total inner-indexscan path (if any), and one on the
1319 * cheapest-startup inner-indexscan path (if different).
1320 *
1321 * We also consider mergejoins if mergejoin clauses are available. See
1322 * detailed comments in generate_mergejoin_paths.
1323 *
1324 * 'joinrel' is the join relation
1325 * 'outerrel' is the outer join relation
1326 * 'innerrel' is the inner join relation
1327 * 'jointype' is the type of join to do
1328 * 'extra' contains additional input values
1329 */
1330static void
1331match_unsorted_outer(PlannerInfo *root,
1332 RelOptInfo *joinrel,
1333 RelOptInfo *outerrel,
1334 RelOptInfo *innerrel,
1335 JoinType jointype,
1336 JoinPathExtraData *extra)
1337{
1338 JoinType save_jointype = jointype;
1339 bool nestjoinOK;
1340 bool useallclauses;
1341 Path *inner_cheapest_total = innerrel->cheapest_total_path;
1342 Path *matpath = NULL;
1343 ListCell *lc1;
1344
1345 /*
1346 * Nestloop only supports inner, left, semi, and anti joins. Also, if we
1347 * are doing a right or full mergejoin, we must use *all* the mergeclauses
1348 * as join clauses, else we will not have a valid plan. (Although these
1349 * two flags are currently inverses, keep them separate for clarity and
1350 * possible future changes.)
1351 */
1352 switch (jointype)
1353 {
1354 case JOIN_INNER:
1355 case JOIN_LEFT:
1356 case JOIN_SEMI:
1357 case JOIN_ANTI:
1358 nestjoinOK = true;
1359 useallclauses = false;
1360 break;
1361 case JOIN_RIGHT:
1362 case JOIN_FULL:
1363 nestjoinOK = false;
1364 useallclauses = true;
1365 break;
1366 case JOIN_UNIQUE_OUTER:
1367 case JOIN_UNIQUE_INNER:
1368 jointype = JOIN_INNER;
1369 nestjoinOK = true;
1370 useallclauses = false;
1371 break;
1372 default:
1373 elog(ERROR, "unrecognized join type: %d",
1374 (int) jointype);
1375 nestjoinOK = false; /* keep compiler quiet */
1376 useallclauses = false;
1377 break;
1378 }
1379
1380 /*
1381 * If inner_cheapest_total is parameterized by the outer rel, ignore it;
1382 * we will consider it below as a member of cheapest_parameterized_paths,
1383 * but the other possibilities considered in this routine aren't usable.
1384 */
1385 if (PATH_PARAM_BY_REL(inner_cheapest_total, outerrel))
1386 inner_cheapest_total = NULL;
1387
1388 /*
1389 * If we need to unique-ify the inner path, we will consider only the
1390 * cheapest-total inner.
1391 */
1392 if (save_jointype == JOIN_UNIQUE_INNER)
1393 {
1394 /* No way to do this with an inner path parameterized by outer rel */
1395 if (inner_cheapest_total == NULL)
1396 return;
1397 inner_cheapest_total = (Path *)
1398 create_unique_path(root, innerrel, inner_cheapest_total, extra->sjinfo);
1399 Assert(inner_cheapest_total);
1400 }
1401 else if (nestjoinOK)
1402 {
1403 /*
1404 * Consider materializing the cheapest inner path, unless
1405 * enable_material is off or the path in question materializes its
1406 * output anyway.
1407 */
1408 if (enable_material && inner_cheapest_total != NULL &&
1409 !ExecMaterializesOutput(inner_cheapest_total->pathtype))
1410 matpath = (Path *)
1411 create_material_path(innerrel, inner_cheapest_total);
1412 }
1413
1414 foreach(lc1, outerrel->pathlist)
1415 {
1416 Path *outerpath = (Path *) lfirst(lc1);
1417 List *merge_pathkeys;
1418
1419 /*
1420 * We cannot use an outer path that is parameterized by the inner rel.
1421 */
1422 if (PATH_PARAM_BY_REL(outerpath, innerrel))
1423 continue;
1424
1425 /*
1426 * If we need to unique-ify the outer path, it's pointless to consider
1427 * any but the cheapest outer. (XXX we don't consider parameterized
1428 * outers, nor inners, for unique-ified cases. Should we?)
1429 */
1430 if (save_jointype == JOIN_UNIQUE_OUTER)
1431 {
1432 if (outerpath != outerrel->cheapest_total_path)
1433 continue;
1434 outerpath = (Path *) create_unique_path(root, outerrel,
1435 outerpath, extra->sjinfo);
1436 Assert(outerpath);
1437 }
1438
1439 /*
1440 * The result will have this sort order (even if it is implemented as
1441 * a nestloop, and even if some of the mergeclauses are implemented by
1442 * qpquals rather than as true mergeclauses):
1443 */
1444 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1445 outerpath->pathkeys);
1446
1447 if (save_jointype == JOIN_UNIQUE_INNER)
1448 {
1449 /*
1450 * Consider nestloop join, but only with the unique-ified cheapest
1451 * inner path
1452 */
1453 try_nestloop_path(root,
1454 joinrel,
1455 outerpath,
1456 inner_cheapest_total,
1457 merge_pathkeys,
1458 jointype,
1459 extra);
1460 }
1461 else if (nestjoinOK)
1462 {
1463 /*
1464 * Consider nestloop joins using this outer path and various
1465 * available paths for the inner relation. We consider the
1466 * cheapest-total paths for each available parameterization of the
1467 * inner relation, including the unparameterized case.
1468 */
1469 ListCell *lc2;
1470
1471 foreach(lc2, innerrel->cheapest_parameterized_paths)
1472 {
1473 Path *innerpath = (Path *) lfirst(lc2);
1474
1475 try_nestloop_path(root,
1476 joinrel,
1477 outerpath,
1478 innerpath,
1479 merge_pathkeys,
1480 jointype,
1481 extra);
1482 }
1483
1484 /* Also consider materialized form of the cheapest inner path */
1485 if (matpath != NULL)
1486 try_nestloop_path(root,
1487 joinrel,
1488 outerpath,
1489 matpath,
1490 merge_pathkeys,
1491 jointype,
1492 extra);
1493 }
1494
1495 /* Can't do anything else if outer path needs to be unique'd */
1496 if (save_jointype == JOIN_UNIQUE_OUTER)
1497 continue;
1498
1499 /* Can't do anything else if inner rel is parameterized by outer */
1500 if (inner_cheapest_total == NULL)
1501 continue;
1502
1503 /* Generate merge join paths */
1504 generate_mergejoin_paths(root, joinrel, innerrel, outerpath,
1505 save_jointype, extra, useallclauses,
1506 inner_cheapest_total, merge_pathkeys,
1507 false);
1508 }
1509
1510 /*
1511 * Consider partial nestloop and mergejoin plan if outerrel has any
1512 * partial path and the joinrel is parallel-safe. However, we can't
1513 * handle JOIN_UNIQUE_OUTER, because the outer path will be partial, and
1514 * therefore we won't be able to properly guarantee uniqueness. Nor can
1515 * we handle extra_lateral_rels, since partial paths must not be
1516 * parameterized. Similarly, we can't handle JOIN_FULL and JOIN_RIGHT,
1517 * because they can produce false null extended rows.
1518 */
1519 if (joinrel->consider_parallel &&
1520 save_jointype != JOIN_UNIQUE_OUTER &&
1521 save_jointype != JOIN_FULL &&
1522 save_jointype != JOIN_RIGHT &&
1523 outerrel->partial_pathlist != NIL &&
1524 bms_is_empty(joinrel->lateral_relids))
1525 {
1526 if (nestjoinOK)
1527 consider_parallel_nestloop(root, joinrel, outerrel, innerrel,
1528 save_jointype, extra);
1529
1530 /*
1531 * If inner_cheapest_total is NULL or non parallel-safe then find the
1532 * cheapest total parallel safe path. If doing JOIN_UNIQUE_INNER, we
1533 * can't use any alternative inner path.
1534 */
1535 if (inner_cheapest_total == NULL ||
1536 !inner_cheapest_total->parallel_safe)
1537 {
1538 if (save_jointype == JOIN_UNIQUE_INNER)
1539 return;
1540
1541 inner_cheapest_total = get_cheapest_parallel_safe_total_inner(
1542 innerrel->pathlist);
1543 }
1544
1545 if (inner_cheapest_total)
1546 consider_parallel_mergejoin(root, joinrel, outerrel, innerrel,
1547 save_jointype, extra,
1548 inner_cheapest_total);
1549 }
1550}
1551
1552/*
1553 * consider_parallel_mergejoin
1554 * Try to build partial paths for a joinrel by joining a partial path
1555 * for the outer relation to a complete path for the inner relation.
1556 *
1557 * 'joinrel' is the join relation
1558 * 'outerrel' is the outer join relation
1559 * 'innerrel' is the inner join relation
1560 * 'jointype' is the type of join to do
1561 * 'extra' contains additional input values
1562 * 'inner_cheapest_total' cheapest total path for innerrel
1563 */
1564static void
1565consider_parallel_mergejoin(PlannerInfo *root,
1566 RelOptInfo *joinrel,
1567 RelOptInfo *outerrel,
1568 RelOptInfo *innerrel,
1569 JoinType jointype,
1570 JoinPathExtraData *extra,
1571 Path *inner_cheapest_total)
1572{
1573 ListCell *lc1;
1574
1575 /* generate merge join path for each partial outer path */
1576 foreach(lc1, outerrel->partial_pathlist)
1577 {
1578 Path *outerpath = (Path *) lfirst(lc1);
1579 List *merge_pathkeys;
1580
1581 /*
1582 * Figure out what useful ordering any paths we create will have.
1583 */
1584 merge_pathkeys = build_join_pathkeys(root, joinrel, jointype,
1585 outerpath->pathkeys);
1586
1587 generate_mergejoin_paths(root, joinrel, innerrel, outerpath, jointype,
1588 extra, false, inner_cheapest_total,
1589 merge_pathkeys, true);
1590 }
1591}
1592
1593/*
1594 * consider_parallel_nestloop
1595 * Try to build partial paths for a joinrel by joining a partial path for the
1596 * outer relation to a complete path for the inner relation.
1597 *
1598 * 'joinrel' is the join relation
1599 * 'outerrel' is the outer join relation
1600 * 'innerrel' is the inner join relation
1601 * 'jointype' is the type of join to do
1602 * 'extra' contains additional input values
1603 */
1604static void
1605consider_parallel_nestloop(PlannerInfo *root,
1606 RelOptInfo *joinrel,
1607 RelOptInfo *outerrel,
1608 RelOptInfo *innerrel,
1609 JoinType jointype,
1610 JoinPathExtraData *extra)
1611{
1612 JoinType save_jointype = jointype;
1613 ListCell *lc1;
1614
1615 if (jointype == JOIN_UNIQUE_INNER)
1616 jointype = JOIN_INNER;
1617
1618 foreach(lc1, outerrel->partial_pathlist)
1619 {
1620 Path *outerpath = (Path *) lfirst(lc1);
1621 List *pathkeys;
1622 ListCell *lc2;
1623
1624 /* Figure out what useful ordering any paths we create will have. */
1625 pathkeys = build_join_pathkeys(root, joinrel, jointype,
1626 outerpath->pathkeys);
1627
1628 /*
1629 * Try the cheapest parameterized paths; only those which will produce
1630 * an unparameterized path when joined to this outerrel will survive
1631 * try_partial_nestloop_path. The cheapest unparameterized path is
1632 * also in this list.
1633 */
1634 foreach(lc2, innerrel->cheapest_parameterized_paths)
1635 {
1636 Path *innerpath = (Path *) lfirst(lc2);
1637
1638 /* Can't join to an inner path that is not parallel-safe */
1639 if (!innerpath->parallel_safe)
1640 continue;
1641
1642 /*
1643 * If we're doing JOIN_UNIQUE_INNER, we can only use the inner's
1644 * cheapest_total_path, and we have to unique-ify it. (We might
1645 * be able to relax this to allow other safe, unparameterized
1646 * inner paths, but right now create_unique_path is not on board
1647 * with that.)
1648 */
1649 if (save_jointype == JOIN_UNIQUE_INNER)
1650 {
1651 if (innerpath != innerrel->cheapest_total_path)
1652 continue;
1653 innerpath = (Path *) create_unique_path(root, innerrel,
1654 innerpath,
1655 extra->sjinfo);
1656 Assert(innerpath);
1657 }
1658
1659 try_partial_nestloop_path(root, joinrel, outerpath, innerpath,
1660 pathkeys, jointype, extra);
1661 }
1662 }
1663}
1664
1665/*
1666 * hash_inner_and_outer
1667 * Create hashjoin join paths by explicitly hashing both the outer and
1668 * inner keys of each available hash clause.
1669 *
1670 * 'joinrel' is the join relation
1671 * 'outerrel' is the outer join relation
1672 * 'innerrel' is the inner join relation
1673 * 'jointype' is the type of join to do
1674 * 'extra' contains additional input values
1675 */
1676static void
1677hash_inner_and_outer(PlannerInfo *root,
1678 RelOptInfo *joinrel,
1679 RelOptInfo *outerrel,
1680 RelOptInfo *innerrel,
1681 JoinType jointype,
1682 JoinPathExtraData *extra)
1683{
1684 JoinType save_jointype = jointype;
1685 bool isouterjoin = IS_OUTER_JOIN(jointype);
1686 List *hashclauses;
1687 ListCell *l;
1688
1689 /*
1690 * We need to build only one hashclauses list for any given pair of outer
1691 * and inner relations; all of the hashable clauses will be used as keys.
1692 *
1693 * Scan the join's restrictinfo list to find hashjoinable clauses that are
1694 * usable with this pair of sub-relations.
1695 */
1696 hashclauses = NIL;
1697 foreach(l, extra->restrictlist)
1698 {
1699 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1700
1701 /*
1702 * If processing an outer join, only use its own join clauses for
1703 * hashing. For inner joins we need not be so picky.
1704 */
1705 if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
1706 continue;
1707
1708 if (!restrictinfo->can_join ||
1709 restrictinfo->hashjoinoperator == InvalidOid)
1710 continue; /* not hashjoinable */
1711
1712 /*
1713 * Check if clause has the form "outer op inner" or "inner op outer".
1714 */
1715 if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1716 continue; /* no good for these input relations */
1717
1718 hashclauses = lappend(hashclauses, restrictinfo);
1719 }
1720
1721 /* If we found any usable hashclauses, make paths */
1722 if (hashclauses)
1723 {
1724 /*
1725 * We consider both the cheapest-total-cost and cheapest-startup-cost
1726 * outer paths. There's no need to consider any but the
1727 * cheapest-total-cost inner path, however.
1728 */
1729 Path *cheapest_startup_outer = outerrel->cheapest_startup_path;
1730 Path *cheapest_total_outer = outerrel->cheapest_total_path;
1731 Path *cheapest_total_inner = innerrel->cheapest_total_path;
1732
1733 /*
1734 * If either cheapest-total path is parameterized by the other rel, we
1735 * can't use a hashjoin. (There's no use looking for alternative
1736 * input paths, since these should already be the least-parameterized
1737 * available paths.)
1738 */
1739 if (PATH_PARAM_BY_REL(cheapest_total_outer, innerrel) ||
1740 PATH_PARAM_BY_REL(cheapest_total_inner, outerrel))
1741 return;
1742
1743 /* Unique-ify if need be; we ignore parameterized possibilities */
1744 if (jointype == JOIN_UNIQUE_OUTER)
1745 {
1746 cheapest_total_outer = (Path *)
1747 create_unique_path(root, outerrel,
1748 cheapest_total_outer, extra->sjinfo);
1749 Assert(cheapest_total_outer);
1750 jointype = JOIN_INNER;
1751 try_hashjoin_path(root,
1752 joinrel,
1753 cheapest_total_outer,
1754 cheapest_total_inner,
1755 hashclauses,
1756 jointype,
1757 extra);
1758 /* no possibility of cheap startup here */
1759 }
1760 else if (jointype == JOIN_UNIQUE_INNER)
1761 {
1762 cheapest_total_inner = (Path *)
1763 create_unique_path(root, innerrel,
1764 cheapest_total_inner, extra->sjinfo);
1765 Assert(cheapest_total_inner);
1766 jointype = JOIN_INNER;
1767 try_hashjoin_path(root,
1768 joinrel,
1769 cheapest_total_outer,
1770 cheapest_total_inner,
1771 hashclauses,
1772 jointype,
1773 extra);
1774 if (cheapest_startup_outer != NULL &&
1775 cheapest_startup_outer != cheapest_total_outer)
1776 try_hashjoin_path(root,
1777 joinrel,
1778 cheapest_startup_outer,
1779 cheapest_total_inner,
1780 hashclauses,
1781 jointype,
1782 extra);
1783 }
1784 else
1785 {
1786 /*
1787 * For other jointypes, we consider the cheapest startup outer
1788 * together with the cheapest total inner, and then consider
1789 * pairings of cheapest-total paths including parameterized ones.
1790 * There is no use in generating parameterized paths on the basis
1791 * of possibly cheap startup cost, so this is sufficient.
1792 */
1793 ListCell *lc1;
1794 ListCell *lc2;
1795
1796 if (cheapest_startup_outer != NULL)
1797 try_hashjoin_path(root,
1798 joinrel,
1799 cheapest_startup_outer,
1800 cheapest_total_inner,
1801 hashclauses,
1802 jointype,
1803 extra);
1804
1805 foreach(lc1, outerrel->cheapest_parameterized_paths)
1806 {
1807 Path *outerpath = (Path *) lfirst(lc1);
1808
1809 /*
1810 * We cannot use an outer path that is parameterized by the
1811 * inner rel.
1812 */
1813 if (PATH_PARAM_BY_REL(outerpath, innerrel))
1814 continue;
1815
1816 foreach(lc2, innerrel->cheapest_parameterized_paths)
1817 {
1818 Path *innerpath = (Path *) lfirst(lc2);
1819
1820 /*
1821 * We cannot use an inner path that is parameterized by
1822 * the outer rel, either.
1823 */
1824 if (PATH_PARAM_BY_REL(innerpath, outerrel))
1825 continue;
1826
1827 if (outerpath == cheapest_startup_outer &&
1828 innerpath == cheapest_total_inner)
1829 continue; /* already tried it */
1830
1831 try_hashjoin_path(root,
1832 joinrel,
1833 outerpath,
1834 innerpath,
1835 hashclauses,
1836 jointype,
1837 extra);
1838 }
1839 }
1840 }
1841
1842 /*
1843 * If the joinrel is parallel-safe, we may be able to consider a
1844 * partial hash join. However, we can't handle JOIN_UNIQUE_OUTER,
1845 * because the outer path will be partial, and therefore we won't be
1846 * able to properly guarantee uniqueness. Similarly, we can't handle
1847 * JOIN_FULL and JOIN_RIGHT, because they can produce false null
1848 * extended rows. Also, the resulting path must not be parameterized.
1849 * We would be able to support JOIN_FULL and JOIN_RIGHT for Parallel
1850 * Hash, since in that case we're back to a single hash table with a
1851 * single set of match bits for each batch, but that will require
1852 * figuring out a deadlock-free way to wait for the probe to finish.
1853 */
1854 if (joinrel->consider_parallel &&
1855 save_jointype != JOIN_UNIQUE_OUTER &&
1856 save_jointype != JOIN_FULL &&
1857 save_jointype != JOIN_RIGHT &&
1858 outerrel->partial_pathlist != NIL &&
1859 bms_is_empty(joinrel->lateral_relids))
1860 {
1861 Path *cheapest_partial_outer;
1862 Path *cheapest_partial_inner = NULL;
1863 Path *cheapest_safe_inner = NULL;
1864
1865 cheapest_partial_outer =
1866 (Path *) linitial(outerrel->partial_pathlist);
1867
1868 /*
1869 * Can we use a partial inner plan too, so that we can build a
1870 * shared hash table in parallel? We can't handle
1871 * JOIN_UNIQUE_INNER because we can't guarantee uniqueness.
1872 */
1873 if (innerrel->partial_pathlist != NIL &&
1874 save_jointype != JOIN_UNIQUE_INNER &&
1875 enable_parallel_hash)
1876 {
1877 cheapest_partial_inner =
1878 (Path *) linitial(innerrel->partial_pathlist);
1879 try_partial_hashjoin_path(root, joinrel,
1880 cheapest_partial_outer,
1881 cheapest_partial_inner,
1882 hashclauses, jointype, extra,
1883 true /* parallel_hash */ );
1884 }
1885
1886 /*
1887 * Normally, given that the joinrel is parallel-safe, the cheapest
1888 * total inner path will also be parallel-safe, but if not, we'll
1889 * have to search for the cheapest safe, unparameterized inner
1890 * path. If doing JOIN_UNIQUE_INNER, we can't use any alternative
1891 * inner path.
1892 */
1893 if (cheapest_total_inner->parallel_safe)
1894 cheapest_safe_inner = cheapest_total_inner;
1895 else if (save_jointype != JOIN_UNIQUE_INNER)
1896 cheapest_safe_inner =
1897 get_cheapest_parallel_safe_total_inner(innerrel->pathlist);
1898
1899 if (cheapest_safe_inner != NULL)
1900 try_partial_hashjoin_path(root, joinrel,
1901 cheapest_partial_outer,
1902 cheapest_safe_inner,
1903 hashclauses, jointype, extra,
1904 false /* parallel_hash */ );
1905 }
1906 }
1907}
1908
1909/*
1910 * select_mergejoin_clauses
1911 * Select mergejoin clauses that are usable for a particular join.
1912 * Returns a list of RestrictInfo nodes for those clauses.
1913 *
1914 * *mergejoin_allowed is normally set to true, but it is set to false if
1915 * this is a right/full join and there are nonmergejoinable join clauses.
1916 * The executor's mergejoin machinery cannot handle such cases, so we have
1917 * to avoid generating a mergejoin plan. (Note that this flag does NOT
1918 * consider whether there are actually any mergejoinable clauses. This is
1919 * correct because in some cases we need to build a clauseless mergejoin.
1920 * Simply returning NIL is therefore not enough to distinguish safe from
1921 * unsafe cases.)
1922 *
1923 * We also mark each selected RestrictInfo to show which side is currently
1924 * being considered as outer. These are transient markings that are only
1925 * good for the duration of the current add_paths_to_joinrel() call!
1926 *
1927 * We examine each restrictinfo clause known for the join to see
1928 * if it is mergejoinable and involves vars from the two sub-relations
1929 * currently of interest.
1930 */
1931static List *
1932select_mergejoin_clauses(PlannerInfo *root,
1933 RelOptInfo *joinrel,
1934 RelOptInfo *outerrel,
1935 RelOptInfo *innerrel,
1936 List *restrictlist,
1937 JoinType jointype,
1938 bool *mergejoin_allowed)
1939{
1940 List *result_list = NIL;
1941 bool isouterjoin = IS_OUTER_JOIN(jointype);
1942 bool have_nonmergeable_joinclause = false;
1943 ListCell *l;
1944
1945 foreach(l, restrictlist)
1946 {
1947 RestrictInfo *restrictinfo = (RestrictInfo *) lfirst(l);
1948
1949 /*
1950 * If processing an outer join, only use its own join clauses in the
1951 * merge. For inner joins we can use pushed-down clauses too. (Note:
1952 * we don't set have_nonmergeable_joinclause here because pushed-down
1953 * clauses will become otherquals not joinquals.)
1954 */
1955 if (isouterjoin && RINFO_IS_PUSHED_DOWN(restrictinfo, joinrel->relids))
1956 continue;
1957
1958 /* Check that clause is a mergeable operator clause */
1959 if (!restrictinfo->can_join ||
1960 restrictinfo->mergeopfamilies == NIL)
1961 {
1962 /*
1963 * The executor can handle extra joinquals that are constants, but
1964 * not anything else, when doing right/full merge join. (The
1965 * reason to support constants is so we can do FULL JOIN ON
1966 * FALSE.)
1967 */
1968 if (!restrictinfo->clause || !IsA(restrictinfo->clause, Const))
1969 have_nonmergeable_joinclause = true;
1970 continue; /* not mergejoinable */
1971 }
1972
1973 /*
1974 * Check if clause has the form "outer op inner" or "inner op outer".
1975 */
1976 if (!clause_sides_match_join(restrictinfo, outerrel, innerrel))
1977 {
1978 have_nonmergeable_joinclause = true;
1979 continue; /* no good for these input relations */
1980 }
1981
1982 /*
1983 * Insist that each side have a non-redundant eclass. This
1984 * restriction is needed because various bits of the planner expect
1985 * that each clause in a merge be associable with some pathkey in a
1986 * canonical pathkey list, but redundant eclasses can't appear in
1987 * canonical sort orderings. (XXX it might be worth relaxing this,
1988 * but not enough time to address it for 8.3.)
1989 *
1990 * Note: it would be bad if this condition failed for an otherwise
1991 * mergejoinable FULL JOIN clause, since that would result in
1992 * undesirable planner failure. I believe that is not possible
1993 * however; a variable involved in a full join could only appear in
1994 * below_outer_join eclasses, which aren't considered redundant.
1995 *
1996 * This case *can* happen for left/right join clauses: the outer-side
1997 * variable could be equated to a constant. Because we will propagate
1998 * that constant across the join clause, the loss of ability to do a
1999 * mergejoin is not really all that big a deal, and so it's not clear
2000 * that improving this is important.
2001 */
2002 update_mergeclause_eclasses(root, restrictinfo);
2003
2004 if (EC_MUST_BE_REDUNDANT(restrictinfo->left_ec) ||
2005 EC_MUST_BE_REDUNDANT(restrictinfo->right_ec))
2006 {
2007 have_nonmergeable_joinclause = true;
2008 continue; /* can't handle redundant eclasses */
2009 }
2010
2011 result_list = lappend(result_list, restrictinfo);
2012 }
2013
2014 /*
2015 * Report whether mergejoin is allowed (see comment at top of function).
2016 */
2017 switch (jointype)
2018 {
2019 case JOIN_RIGHT:
2020 case JOIN_FULL:
2021 *mergejoin_allowed = !have_nonmergeable_joinclause;
2022 break;
2023 default:
2024 *mergejoin_allowed = true;
2025 break;
2026 }
2027
2028 return result_list;
2029}
2030