1/*-------------------------------------------------------------------------
2 *
3 * joinrels.c
4 * Routines to determine which relations should be joined
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/path/joinrels.c
12 *
13 *-------------------------------------------------------------------------
14 */
15#include "postgres.h"
16
17#include "miscadmin.h"
18#include "optimizer/appendinfo.h"
19#include "optimizer/joininfo.h"
20#include "optimizer/pathnode.h"
21#include "optimizer/paths.h"
22#include "partitioning/partbounds.h"
23#include "utils/lsyscache.h"
24#include "utils/memutils.h"
25
26
27static void make_rels_by_clause_joins(PlannerInfo *root,
28 RelOptInfo *old_rel,
29 ListCell *other_rels);
30static void make_rels_by_clauseless_joins(PlannerInfo *root,
31 RelOptInfo *old_rel,
32 ListCell *other_rels);
33static bool has_join_restriction(PlannerInfo *root, RelOptInfo *rel);
34static bool has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel);
35static bool restriction_is_constant_false(List *restrictlist,
36 RelOptInfo *joinrel,
37 bool only_pushed_down);
38static void populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
39 RelOptInfo *rel2, RelOptInfo *joinrel,
40 SpecialJoinInfo *sjinfo, List *restrictlist);
41static void try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1,
42 RelOptInfo *rel2, RelOptInfo *joinrel,
43 SpecialJoinInfo *parent_sjinfo,
44 List *parent_restrictlist);
45static SpecialJoinInfo *build_child_join_sjinfo(PlannerInfo *root,
46 SpecialJoinInfo *parent_sjinfo,
47 Relids left_relids, Relids right_relids);
48static int match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel,
49 bool strict_op);
50
51
52/*
53 * join_search_one_level
54 * Consider ways to produce join relations containing exactly 'level'
55 * jointree items. (This is one step of the dynamic-programming method
56 * embodied in standard_join_search.) Join rel nodes for each feasible
57 * combination of lower-level rels are created and returned in a list.
58 * Implementation paths are created for each such joinrel, too.
59 *
60 * level: level of rels we want to make this time
61 * root->join_rel_level[j], 1 <= j < level, is a list of rels containing j items
62 *
63 * The result is returned in root->join_rel_level[level].
64 */
65void
66join_search_one_level(PlannerInfo *root, int level)
67{
68 List **joinrels = root->join_rel_level;
69 ListCell *r;
70 int k;
71
72 Assert(joinrels[level] == NIL);
73
74 /* Set join_cur_level so that new joinrels are added to proper list */
75 root->join_cur_level = level;
76
77 /*
78 * First, consider left-sided and right-sided plans, in which rels of
79 * exactly level-1 member relations are joined against initial relations.
80 * We prefer to join using join clauses, but if we find a rel of level-1
81 * members that has no join clauses, we will generate Cartesian-product
82 * joins against all initial rels not already contained in it.
83 */
84 foreach(r, joinrels[level - 1])
85 {
86 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
87
88 if (old_rel->joininfo != NIL || old_rel->has_eclass_joins ||
89 has_join_restriction(root, old_rel))
90 {
91 /*
92 * There are join clauses or join order restrictions relevant to
93 * this rel, so consider joins between this rel and (only) those
94 * initial rels it is linked to by a clause or restriction.
95 *
96 * At level 2 this condition is symmetric, so there is no need to
97 * look at initial rels before this one in the list; we already
98 * considered such joins when we were at the earlier rel. (The
99 * mirror-image joins are handled automatically by make_join_rel.)
100 * In later passes (level > 2), we join rels of the previous level
101 * to each initial rel they don't already include but have a join
102 * clause or restriction with.
103 */
104 ListCell *other_rels;
105
106 if (level == 2) /* consider remaining initial rels */
107 other_rels = lnext(r);
108 else /* consider all initial rels */
109 other_rels = list_head(joinrels[1]);
110
111 make_rels_by_clause_joins(root,
112 old_rel,
113 other_rels);
114 }
115 else
116 {
117 /*
118 * Oops, we have a relation that is not joined to any other
119 * relation, either directly or by join-order restrictions.
120 * Cartesian product time.
121 *
122 * We consider a cartesian product with each not-already-included
123 * initial rel, whether it has other join clauses or not. At
124 * level 2, if there are two or more clauseless initial rels, we
125 * will redundantly consider joining them in both directions; but
126 * such cases aren't common enough to justify adding complexity to
127 * avoid the duplicated effort.
128 */
129 make_rels_by_clauseless_joins(root,
130 old_rel,
131 list_head(joinrels[1]));
132 }
133 }
134
135 /*
136 * Now, consider "bushy plans" in which relations of k initial rels are
137 * joined to relations of level-k initial rels, for 2 <= k <= level-2.
138 *
139 * We only consider bushy-plan joins for pairs of rels where there is a
140 * suitable join clause (or join order restriction), in order to avoid
141 * unreasonable growth of planning time.
142 */
143 for (k = 2;; k++)
144 {
145 int other_level = level - k;
146
147 /*
148 * Since make_join_rel(x, y) handles both x,y and y,x cases, we only
149 * need to go as far as the halfway point.
150 */
151 if (k > other_level)
152 break;
153
154 foreach(r, joinrels[k])
155 {
156 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
157 ListCell *other_rels;
158 ListCell *r2;
159
160 /*
161 * We can ignore relations without join clauses here, unless they
162 * participate in join-order restrictions --- then we might have
163 * to force a bushy join plan.
164 */
165 if (old_rel->joininfo == NIL && !old_rel->has_eclass_joins &&
166 !has_join_restriction(root, old_rel))
167 continue;
168
169 if (k == other_level)
170 other_rels = lnext(r); /* only consider remaining rels */
171 else
172 other_rels = list_head(joinrels[other_level]);
173
174 for_each_cell(r2, other_rels)
175 {
176 RelOptInfo *new_rel = (RelOptInfo *) lfirst(r2);
177
178 if (!bms_overlap(old_rel->relids, new_rel->relids))
179 {
180 /*
181 * OK, we can build a rel of the right level from this
182 * pair of rels. Do so if there is at least one relevant
183 * join clause or join order restriction.
184 */
185 if (have_relevant_joinclause(root, old_rel, new_rel) ||
186 have_join_order_restriction(root, old_rel, new_rel))
187 {
188 (void) make_join_rel(root, old_rel, new_rel);
189 }
190 }
191 }
192 }
193 }
194
195 /*----------
196 * Last-ditch effort: if we failed to find any usable joins so far, force
197 * a set of cartesian-product joins to be generated. This handles the
198 * special case where all the available rels have join clauses but we
199 * cannot use any of those clauses yet. This can only happen when we are
200 * considering a join sub-problem (a sub-joinlist) and all the rels in the
201 * sub-problem have only join clauses with rels outside the sub-problem.
202 * An example is
203 *
204 * SELECT ... FROM a INNER JOIN b ON TRUE, c, d, ...
205 * WHERE a.w = c.x and b.y = d.z;
206 *
207 * If the "a INNER JOIN b" sub-problem does not get flattened into the
208 * upper level, we must be willing to make a cartesian join of a and b;
209 * but the code above will not have done so, because it thought that both
210 * a and b have joinclauses. We consider only left-sided and right-sided
211 * cartesian joins in this case (no bushy).
212 *----------
213 */
214 if (joinrels[level] == NIL)
215 {
216 /*
217 * This loop is just like the first one, except we always call
218 * make_rels_by_clauseless_joins().
219 */
220 foreach(r, joinrels[level - 1])
221 {
222 RelOptInfo *old_rel = (RelOptInfo *) lfirst(r);
223
224 make_rels_by_clauseless_joins(root,
225 old_rel,
226 list_head(joinrels[1]));
227 }
228
229 /*----------
230 * When special joins are involved, there may be no legal way
231 * to make an N-way join for some values of N. For example consider
232 *
233 * SELECT ... FROM t1 WHERE
234 * x IN (SELECT ... FROM t2,t3 WHERE ...) AND
235 * y IN (SELECT ... FROM t4,t5 WHERE ...)
236 *
237 * We will flatten this query to a 5-way join problem, but there are
238 * no 4-way joins that join_is_legal() will consider legal. We have
239 * to accept failure at level 4 and go on to discover a workable
240 * bushy plan at level 5.
241 *
242 * However, if there are no special joins and no lateral references
243 * then join_is_legal() should never fail, and so the following sanity
244 * check is useful.
245 *----------
246 */
247 if (joinrels[level] == NIL &&
248 root->join_info_list == NIL &&
249 !root->hasLateralRTEs)
250 elog(ERROR, "failed to build any %d-way joins", level);
251 }
252}
253
254/*
255 * make_rels_by_clause_joins
256 * Build joins between the given relation 'old_rel' and other relations
257 * that participate in join clauses that 'old_rel' also participates in
258 * (or participate in join-order restrictions with it).
259 * The join rels are returned in root->join_rel_level[join_cur_level].
260 *
261 * Note: at levels above 2 we will generate the same joined relation in
262 * multiple ways --- for example (a join b) join c is the same RelOptInfo as
263 * (b join c) join a, though the second case will add a different set of Paths
264 * to it. This is the reason for using the join_rel_level mechanism, which
265 * automatically ensures that each new joinrel is only added to the list once.
266 *
267 * 'old_rel' is the relation entry for the relation to be joined
268 * 'other_rels': the first cell in a linked list containing the other
269 * rels to be considered for joining
270 *
271 * Currently, this is only used with initial rels in other_rels, but it
272 * will work for joining to joinrels too.
273 */
274static void
275make_rels_by_clause_joins(PlannerInfo *root,
276 RelOptInfo *old_rel,
277 ListCell *other_rels)
278{
279 ListCell *l;
280
281 for_each_cell(l, other_rels)
282 {
283 RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
284
285 if (!bms_overlap(old_rel->relids, other_rel->relids) &&
286 (have_relevant_joinclause(root, old_rel, other_rel) ||
287 have_join_order_restriction(root, old_rel, other_rel)))
288 {
289 (void) make_join_rel(root, old_rel, other_rel);
290 }
291 }
292}
293
294/*
295 * make_rels_by_clauseless_joins
296 * Given a relation 'old_rel' and a list of other relations
297 * 'other_rels', create a join relation between 'old_rel' and each
298 * member of 'other_rels' that isn't already included in 'old_rel'.
299 * The join rels are returned in root->join_rel_level[join_cur_level].
300 *
301 * 'old_rel' is the relation entry for the relation to be joined
302 * 'other_rels': the first cell of a linked list containing the
303 * other rels to be considered for joining
304 *
305 * Currently, this is only used with initial rels in other_rels, but it would
306 * work for joining to joinrels too.
307 */
308static void
309make_rels_by_clauseless_joins(PlannerInfo *root,
310 RelOptInfo *old_rel,
311 ListCell *other_rels)
312{
313 ListCell *l;
314
315 for_each_cell(l, other_rels)
316 {
317 RelOptInfo *other_rel = (RelOptInfo *) lfirst(l);
318
319 if (!bms_overlap(other_rel->relids, old_rel->relids))
320 {
321 (void) make_join_rel(root, old_rel, other_rel);
322 }
323 }
324}
325
326
327/*
328 * join_is_legal
329 * Determine whether a proposed join is legal given the query's
330 * join order constraints; and if it is, determine the join type.
331 *
332 * Caller must supply not only the two rels, but the union of their relids.
333 * (We could simplify the API by computing joinrelids locally, but this
334 * would be redundant work in the normal path through make_join_rel.)
335 *
336 * On success, *sjinfo_p is set to NULL if this is to be a plain inner join,
337 * else it's set to point to the associated SpecialJoinInfo node. Also,
338 * *reversed_p is set true if the given relations need to be swapped to
339 * match the SpecialJoinInfo node.
340 */
341static bool
342join_is_legal(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
343 Relids joinrelids,
344 SpecialJoinInfo **sjinfo_p, bool *reversed_p)
345{
346 SpecialJoinInfo *match_sjinfo;
347 bool reversed;
348 bool unique_ified;
349 bool must_be_leftjoin;
350 ListCell *l;
351
352 /*
353 * Ensure output params are set on failure return. This is just to
354 * suppress uninitialized-variable warnings from overly anal compilers.
355 */
356 *sjinfo_p = NULL;
357 *reversed_p = false;
358
359 /*
360 * If we have any special joins, the proposed join might be illegal; and
361 * in any case we have to determine its join type. Scan the join info
362 * list for matches and conflicts.
363 */
364 match_sjinfo = NULL;
365 reversed = false;
366 unique_ified = false;
367 must_be_leftjoin = false;
368
369 foreach(l, root->join_info_list)
370 {
371 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
372
373 /*
374 * This special join is not relevant unless its RHS overlaps the
375 * proposed join. (Check this first as a fast path for dismissing
376 * most irrelevant SJs quickly.)
377 */
378 if (!bms_overlap(sjinfo->min_righthand, joinrelids))
379 continue;
380
381 /*
382 * Also, not relevant if proposed join is fully contained within RHS
383 * (ie, we're still building up the RHS).
384 */
385 if (bms_is_subset(joinrelids, sjinfo->min_righthand))
386 continue;
387
388 /*
389 * Also, not relevant if SJ is already done within either input.
390 */
391 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
392 bms_is_subset(sjinfo->min_righthand, rel1->relids))
393 continue;
394 if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
395 bms_is_subset(sjinfo->min_righthand, rel2->relids))
396 continue;
397
398 /*
399 * If it's a semijoin and we already joined the RHS to any other rels
400 * within either input, then we must have unique-ified the RHS at that
401 * point (see below). Therefore the semijoin is no longer relevant in
402 * this join path.
403 */
404 if (sjinfo->jointype == JOIN_SEMI)
405 {
406 if (bms_is_subset(sjinfo->syn_righthand, rel1->relids) &&
407 !bms_equal(sjinfo->syn_righthand, rel1->relids))
408 continue;
409 if (bms_is_subset(sjinfo->syn_righthand, rel2->relids) &&
410 !bms_equal(sjinfo->syn_righthand, rel2->relids))
411 continue;
412 }
413
414 /*
415 * If one input contains min_lefthand and the other contains
416 * min_righthand, then we can perform the SJ at this join.
417 *
418 * Reject if we get matches to more than one SJ; that implies we're
419 * considering something that's not really valid.
420 */
421 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
422 bms_is_subset(sjinfo->min_righthand, rel2->relids))
423 {
424 if (match_sjinfo)
425 return false; /* invalid join path */
426 match_sjinfo = sjinfo;
427 reversed = false;
428 }
429 else if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
430 bms_is_subset(sjinfo->min_righthand, rel1->relids))
431 {
432 if (match_sjinfo)
433 return false; /* invalid join path */
434 match_sjinfo = sjinfo;
435 reversed = true;
436 }
437 else if (sjinfo->jointype == JOIN_SEMI &&
438 bms_equal(sjinfo->syn_righthand, rel2->relids) &&
439 create_unique_path(root, rel2, rel2->cheapest_total_path,
440 sjinfo) != NULL)
441 {
442 /*----------
443 * For a semijoin, we can join the RHS to anything else by
444 * unique-ifying the RHS (if the RHS can be unique-ified).
445 * We will only get here if we have the full RHS but less
446 * than min_lefthand on the LHS.
447 *
448 * The reason to consider such a join path is exemplified by
449 * SELECT ... FROM a,b WHERE (a.x,b.y) IN (SELECT c1,c2 FROM c)
450 * If we insist on doing this as a semijoin we will first have
451 * to form the cartesian product of A*B. But if we unique-ify
452 * C then the semijoin becomes a plain innerjoin and we can join
453 * in any order, eg C to A and then to B. When C is much smaller
454 * than A and B this can be a huge win. So we allow C to be
455 * joined to just A or just B here, and then make_join_rel has
456 * to handle the case properly.
457 *
458 * Note that actually we'll allow unique-ified C to be joined to
459 * some other relation D here, too. That is legal, if usually not
460 * very sane, and this routine is only concerned with legality not
461 * with whether the join is good strategy.
462 *----------
463 */
464 if (match_sjinfo)
465 return false; /* invalid join path */
466 match_sjinfo = sjinfo;
467 reversed = false;
468 unique_ified = true;
469 }
470 else if (sjinfo->jointype == JOIN_SEMI &&
471 bms_equal(sjinfo->syn_righthand, rel1->relids) &&
472 create_unique_path(root, rel1, rel1->cheapest_total_path,
473 sjinfo) != NULL)
474 {
475 /* Reversed semijoin case */
476 if (match_sjinfo)
477 return false; /* invalid join path */
478 match_sjinfo = sjinfo;
479 reversed = true;
480 unique_ified = true;
481 }
482 else
483 {
484 /*
485 * Otherwise, the proposed join overlaps the RHS but isn't a valid
486 * implementation of this SJ. But don't panic quite yet: the RHS
487 * violation might have occurred previously, in one or both input
488 * relations, in which case we must have previously decided that
489 * it was OK to commute some other SJ with this one. If we need
490 * to perform this join to finish building up the RHS, rejecting
491 * it could lead to not finding any plan at all. (This can occur
492 * because of the heuristics elsewhere in this file that postpone
493 * clauseless joins: we might not consider doing a clauseless join
494 * within the RHS until after we've performed other, validly
495 * commutable SJs with one or both sides of the clauseless join.)
496 * This consideration boils down to the rule that if both inputs
497 * overlap the RHS, we can allow the join --- they are either
498 * fully within the RHS, or represent previously-allowed joins to
499 * rels outside it.
500 */
501 if (bms_overlap(rel1->relids, sjinfo->min_righthand) &&
502 bms_overlap(rel2->relids, sjinfo->min_righthand))
503 continue; /* assume valid previous violation of RHS */
504
505 /*
506 * The proposed join could still be legal, but only if we're
507 * allowed to associate it into the RHS of this SJ. That means
508 * this SJ must be a LEFT join (not SEMI or ANTI, and certainly
509 * not FULL) and the proposed join must not overlap the LHS.
510 */
511 if (sjinfo->jointype != JOIN_LEFT ||
512 bms_overlap(joinrelids, sjinfo->min_lefthand))
513 return false; /* invalid join path */
514
515 /*
516 * To be valid, the proposed join must be a LEFT join; otherwise
517 * it can't associate into this SJ's RHS. But we may not yet have
518 * found the SpecialJoinInfo matching the proposed join, so we
519 * can't test that yet. Remember the requirement for later.
520 */
521 must_be_leftjoin = true;
522 }
523 }
524
525 /*
526 * Fail if violated any SJ's RHS and didn't match to a LEFT SJ: the
527 * proposed join can't associate into an SJ's RHS.
528 *
529 * Also, fail if the proposed join's predicate isn't strict; we're
530 * essentially checking to see if we can apply outer-join identity 3, and
531 * that's a requirement. (This check may be redundant with checks in
532 * make_outerjoininfo, but I'm not quite sure, and it's cheap to test.)
533 */
534 if (must_be_leftjoin &&
535 (match_sjinfo == NULL ||
536 match_sjinfo->jointype != JOIN_LEFT ||
537 !match_sjinfo->lhs_strict))
538 return false; /* invalid join path */
539
540 /*
541 * We also have to check for constraints imposed by LATERAL references.
542 */
543 if (root->hasLateralRTEs)
544 {
545 bool lateral_fwd;
546 bool lateral_rev;
547 Relids join_lateral_rels;
548
549 /*
550 * The proposed rels could each contain lateral references to the
551 * other, in which case the join is impossible. If there are lateral
552 * references in just one direction, then the join has to be done with
553 * a nestloop with the lateral referencer on the inside. If the join
554 * matches an SJ that cannot be implemented by such a nestloop, the
555 * join is impossible.
556 *
557 * Also, if the lateral reference is only indirect, we should reject
558 * the join; whatever rel(s) the reference chain goes through must be
559 * joined to first.
560 *
561 * Another case that might keep us from building a valid plan is the
562 * implementation restriction described by have_dangerous_phv().
563 */
564 lateral_fwd = bms_overlap(rel1->relids, rel2->lateral_relids);
565 lateral_rev = bms_overlap(rel2->relids, rel1->lateral_relids);
566 if (lateral_fwd && lateral_rev)
567 return false; /* have lateral refs in both directions */
568 if (lateral_fwd)
569 {
570 /* has to be implemented as nestloop with rel1 on left */
571 if (match_sjinfo &&
572 (reversed ||
573 unique_ified ||
574 match_sjinfo->jointype == JOIN_FULL))
575 return false; /* not implementable as nestloop */
576 /* check there is a direct reference from rel2 to rel1 */
577 if (!bms_overlap(rel1->relids, rel2->direct_lateral_relids))
578 return false; /* only indirect refs, so reject */
579 /* check we won't have a dangerous PHV */
580 if (have_dangerous_phv(root, rel1->relids, rel2->lateral_relids))
581 return false; /* might be unable to handle required PHV */
582 }
583 else if (lateral_rev)
584 {
585 /* has to be implemented as nestloop with rel2 on left */
586 if (match_sjinfo &&
587 (!reversed ||
588 unique_ified ||
589 match_sjinfo->jointype == JOIN_FULL))
590 return false; /* not implementable as nestloop */
591 /* check there is a direct reference from rel1 to rel2 */
592 if (!bms_overlap(rel2->relids, rel1->direct_lateral_relids))
593 return false; /* only indirect refs, so reject */
594 /* check we won't have a dangerous PHV */
595 if (have_dangerous_phv(root, rel2->relids, rel1->lateral_relids))
596 return false; /* might be unable to handle required PHV */
597 }
598
599 /*
600 * LATERAL references could also cause problems later on if we accept
601 * this join: if the join's minimum parameterization includes any rels
602 * that would have to be on the inside of an outer join with this join
603 * rel, then it's never going to be possible to build the complete
604 * query using this join. We should reject this join not only because
605 * it'll save work, but because if we don't, the clauseless-join
606 * heuristics might think that legality of this join means that some
607 * other join rel need not be formed, and that could lead to failure
608 * to find any plan at all. We have to consider not only rels that
609 * are directly on the inner side of an OJ with the joinrel, but also
610 * ones that are indirectly so, so search to find all such rels.
611 */
612 join_lateral_rels = min_join_parameterization(root, joinrelids,
613 rel1, rel2);
614 if (join_lateral_rels)
615 {
616 Relids join_plus_rhs = bms_copy(joinrelids);
617 bool more;
618
619 do
620 {
621 more = false;
622 foreach(l, root->join_info_list)
623 {
624 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
625
626 /* ignore full joins --- their ordering is predetermined */
627 if (sjinfo->jointype == JOIN_FULL)
628 continue;
629
630 if (bms_overlap(sjinfo->min_lefthand, join_plus_rhs) &&
631 !bms_is_subset(sjinfo->min_righthand, join_plus_rhs))
632 {
633 join_plus_rhs = bms_add_members(join_plus_rhs,
634 sjinfo->min_righthand);
635 more = true;
636 }
637 }
638 } while (more);
639 if (bms_overlap(join_plus_rhs, join_lateral_rels))
640 return false; /* will not be able to join to some RHS rel */
641 }
642 }
643
644 /* Otherwise, it's a valid join */
645 *sjinfo_p = match_sjinfo;
646 *reversed_p = reversed;
647 return true;
648}
649
650
651/*
652 * make_join_rel
653 * Find or create a join RelOptInfo that represents the join of
654 * the two given rels, and add to it path information for paths
655 * created with the two rels as outer and inner rel.
656 * (The join rel may already contain paths generated from other
657 * pairs of rels that add up to the same set of base rels.)
658 *
659 * NB: will return NULL if attempted join is not valid. This can happen
660 * when working with outer joins, or with IN or EXISTS clauses that have been
661 * turned into joins.
662 */
663RelOptInfo *
664make_join_rel(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2)
665{
666 Relids joinrelids;
667 SpecialJoinInfo *sjinfo;
668 bool reversed;
669 SpecialJoinInfo sjinfo_data;
670 RelOptInfo *joinrel;
671 List *restrictlist;
672
673 /* We should never try to join two overlapping sets of rels. */
674 Assert(!bms_overlap(rel1->relids, rel2->relids));
675
676 /* Construct Relids set that identifies the joinrel. */
677 joinrelids = bms_union(rel1->relids, rel2->relids);
678
679 /* Check validity and determine join type. */
680 if (!join_is_legal(root, rel1, rel2, joinrelids,
681 &sjinfo, &reversed))
682 {
683 /* invalid join path */
684 bms_free(joinrelids);
685 return NULL;
686 }
687
688 /* Swap rels if needed to match the join info. */
689 if (reversed)
690 {
691 RelOptInfo *trel = rel1;
692
693 rel1 = rel2;
694 rel2 = trel;
695 }
696
697 /*
698 * If it's a plain inner join, then we won't have found anything in
699 * join_info_list. Make up a SpecialJoinInfo so that selectivity
700 * estimation functions will know what's being joined.
701 */
702 if (sjinfo == NULL)
703 {
704 sjinfo = &sjinfo_data;
705 sjinfo->type = T_SpecialJoinInfo;
706 sjinfo->min_lefthand = rel1->relids;
707 sjinfo->min_righthand = rel2->relids;
708 sjinfo->syn_lefthand = rel1->relids;
709 sjinfo->syn_righthand = rel2->relids;
710 sjinfo->jointype = JOIN_INNER;
711 /* we don't bother trying to make the remaining fields valid */
712 sjinfo->lhs_strict = false;
713 sjinfo->delay_upper_joins = false;
714 sjinfo->semi_can_btree = false;
715 sjinfo->semi_can_hash = false;
716 sjinfo->semi_operators = NIL;
717 sjinfo->semi_rhs_exprs = NIL;
718 }
719
720 /*
721 * Find or build the join RelOptInfo, and compute the restrictlist that
722 * goes with this particular joining.
723 */
724 joinrel = build_join_rel(root, joinrelids, rel1, rel2, sjinfo,
725 &restrictlist);
726
727 /*
728 * If we've already proven this join is empty, we needn't consider any
729 * more paths for it.
730 */
731 if (is_dummy_rel(joinrel))
732 {
733 bms_free(joinrelids);
734 return joinrel;
735 }
736
737 /* Add paths to the join relation. */
738 populate_joinrel_with_paths(root, rel1, rel2, joinrel, sjinfo,
739 restrictlist);
740
741 bms_free(joinrelids);
742
743 return joinrel;
744}
745
746/*
747 * populate_joinrel_with_paths
748 * Add paths to the given joinrel for given pair of joining relations. The
749 * SpecialJoinInfo provides details about the join and the restrictlist
750 * contains the join clauses and the other clauses applicable for given pair
751 * of the joining relations.
752 */
753static void
754populate_joinrel_with_paths(PlannerInfo *root, RelOptInfo *rel1,
755 RelOptInfo *rel2, RelOptInfo *joinrel,
756 SpecialJoinInfo *sjinfo, List *restrictlist)
757{
758 /*
759 * Consider paths using each rel as both outer and inner. Depending on
760 * the join type, a provably empty outer or inner rel might mean the join
761 * is provably empty too; in which case throw away any previously computed
762 * paths and mark the join as dummy. (We do it this way since it's
763 * conceivable that dummy-ness of a multi-element join might only be
764 * noticeable for certain construction paths.)
765 *
766 * Also, a provably constant-false join restriction typically means that
767 * we can skip evaluating one or both sides of the join. We do this by
768 * marking the appropriate rel as dummy. For outer joins, a
769 * constant-false restriction that is pushed down still means the whole
770 * join is dummy, while a non-pushed-down one means that no inner rows
771 * will join so we can treat the inner rel as dummy.
772 *
773 * We need only consider the jointypes that appear in join_info_list, plus
774 * JOIN_INNER.
775 */
776 switch (sjinfo->jointype)
777 {
778 case JOIN_INNER:
779 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
780 restriction_is_constant_false(restrictlist, joinrel, false))
781 {
782 mark_dummy_rel(joinrel);
783 break;
784 }
785 add_paths_to_joinrel(root, joinrel, rel1, rel2,
786 JOIN_INNER, sjinfo,
787 restrictlist);
788 add_paths_to_joinrel(root, joinrel, rel2, rel1,
789 JOIN_INNER, sjinfo,
790 restrictlist);
791 break;
792 case JOIN_LEFT:
793 if (is_dummy_rel(rel1) ||
794 restriction_is_constant_false(restrictlist, joinrel, true))
795 {
796 mark_dummy_rel(joinrel);
797 break;
798 }
799 if (restriction_is_constant_false(restrictlist, joinrel, false) &&
800 bms_is_subset(rel2->relids, sjinfo->syn_righthand))
801 mark_dummy_rel(rel2);
802 add_paths_to_joinrel(root, joinrel, rel1, rel2,
803 JOIN_LEFT, sjinfo,
804 restrictlist);
805 add_paths_to_joinrel(root, joinrel, rel2, rel1,
806 JOIN_RIGHT, sjinfo,
807 restrictlist);
808 break;
809 case JOIN_FULL:
810 if ((is_dummy_rel(rel1) && is_dummy_rel(rel2)) ||
811 restriction_is_constant_false(restrictlist, joinrel, true))
812 {
813 mark_dummy_rel(joinrel);
814 break;
815 }
816 add_paths_to_joinrel(root, joinrel, rel1, rel2,
817 JOIN_FULL, sjinfo,
818 restrictlist);
819 add_paths_to_joinrel(root, joinrel, rel2, rel1,
820 JOIN_FULL, sjinfo,
821 restrictlist);
822
823 /*
824 * If there are join quals that aren't mergeable or hashable, we
825 * may not be able to build any valid plan. Complain here so that
826 * we can give a somewhat-useful error message. (Since we have no
827 * flexibility of planning for a full join, there's no chance of
828 * succeeding later with another pair of input rels.)
829 */
830 if (joinrel->pathlist == NIL)
831 ereport(ERROR,
832 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
833 errmsg("FULL JOIN is only supported with merge-joinable or hash-joinable join conditions")));
834 break;
835 case JOIN_SEMI:
836
837 /*
838 * We might have a normal semijoin, or a case where we don't have
839 * enough rels to do the semijoin but can unique-ify the RHS and
840 * then do an innerjoin (see comments in join_is_legal). In the
841 * latter case we can't apply JOIN_SEMI joining.
842 */
843 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
844 bms_is_subset(sjinfo->min_righthand, rel2->relids))
845 {
846 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
847 restriction_is_constant_false(restrictlist, joinrel, false))
848 {
849 mark_dummy_rel(joinrel);
850 break;
851 }
852 add_paths_to_joinrel(root, joinrel, rel1, rel2,
853 JOIN_SEMI, sjinfo,
854 restrictlist);
855 }
856
857 /*
858 * If we know how to unique-ify the RHS and one input rel is
859 * exactly the RHS (not a superset) we can consider unique-ifying
860 * it and then doing a regular join. (The create_unique_path
861 * check here is probably redundant with what join_is_legal did,
862 * but if so the check is cheap because it's cached. So test
863 * anyway to be sure.)
864 */
865 if (bms_equal(sjinfo->syn_righthand, rel2->relids) &&
866 create_unique_path(root, rel2, rel2->cheapest_total_path,
867 sjinfo) != NULL)
868 {
869 if (is_dummy_rel(rel1) || is_dummy_rel(rel2) ||
870 restriction_is_constant_false(restrictlist, joinrel, false))
871 {
872 mark_dummy_rel(joinrel);
873 break;
874 }
875 add_paths_to_joinrel(root, joinrel, rel1, rel2,
876 JOIN_UNIQUE_INNER, sjinfo,
877 restrictlist);
878 add_paths_to_joinrel(root, joinrel, rel2, rel1,
879 JOIN_UNIQUE_OUTER, sjinfo,
880 restrictlist);
881 }
882 break;
883 case JOIN_ANTI:
884 if (is_dummy_rel(rel1) ||
885 restriction_is_constant_false(restrictlist, joinrel, true))
886 {
887 mark_dummy_rel(joinrel);
888 break;
889 }
890 if (restriction_is_constant_false(restrictlist, joinrel, false) &&
891 bms_is_subset(rel2->relids, sjinfo->syn_righthand))
892 mark_dummy_rel(rel2);
893 add_paths_to_joinrel(root, joinrel, rel1, rel2,
894 JOIN_ANTI, sjinfo,
895 restrictlist);
896 break;
897 default:
898 /* other values not expected here */
899 elog(ERROR, "unrecognized join type: %d", (int) sjinfo->jointype);
900 break;
901 }
902
903 /* Apply partitionwise join technique, if possible. */
904 try_partitionwise_join(root, rel1, rel2, joinrel, sjinfo, restrictlist);
905}
906
907
908/*
909 * have_join_order_restriction
910 * Detect whether the two relations should be joined to satisfy
911 * a join-order restriction arising from special or lateral joins.
912 *
913 * In practice this is always used with have_relevant_joinclause(), and so
914 * could be merged with that function, but it seems clearer to separate the
915 * two concerns. We need this test because there are degenerate cases where
916 * a clauseless join must be performed to satisfy join-order restrictions.
917 * Also, if one rel has a lateral reference to the other, or both are needed
918 * to compute some PHV, we should consider joining them even if the join would
919 * be clauseless.
920 *
921 * Note: this is only a problem if one side of a degenerate outer join
922 * contains multiple rels, or a clauseless join is required within an
923 * IN/EXISTS RHS; else we will find a join path via the "last ditch" case in
924 * join_search_one_level(). We could dispense with this test if we were
925 * willing to try bushy plans in the "last ditch" case, but that seems much
926 * less efficient.
927 */
928bool
929have_join_order_restriction(PlannerInfo *root,
930 RelOptInfo *rel1, RelOptInfo *rel2)
931{
932 bool result = false;
933 ListCell *l;
934
935 /*
936 * If either side has a direct lateral reference to the other, attempt the
937 * join regardless of outer-join considerations.
938 */
939 if (bms_overlap(rel1->relids, rel2->direct_lateral_relids) ||
940 bms_overlap(rel2->relids, rel1->direct_lateral_relids))
941 return true;
942
943 /*
944 * Likewise, if both rels are needed to compute some PlaceHolderVar,
945 * attempt the join regardless of outer-join considerations. (This is not
946 * very desirable, because a PHV with a large eval_at set will cause a lot
947 * of probably-useless joins to be considered, but failing to do this can
948 * cause us to fail to construct a plan at all.)
949 */
950 foreach(l, root->placeholder_list)
951 {
952 PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
953
954 if (bms_is_subset(rel1->relids, phinfo->ph_eval_at) &&
955 bms_is_subset(rel2->relids, phinfo->ph_eval_at))
956 return true;
957 }
958
959 /*
960 * It's possible that the rels correspond to the left and right sides of a
961 * degenerate outer join, that is, one with no joinclause mentioning the
962 * non-nullable side; in which case we should force the join to occur.
963 *
964 * Also, the two rels could represent a clauseless join that has to be
965 * completed to build up the LHS or RHS of an outer join.
966 */
967 foreach(l, root->join_info_list)
968 {
969 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
970
971 /* ignore full joins --- other mechanisms handle them */
972 if (sjinfo->jointype == JOIN_FULL)
973 continue;
974
975 /* Can we perform the SJ with these rels? */
976 if (bms_is_subset(sjinfo->min_lefthand, rel1->relids) &&
977 bms_is_subset(sjinfo->min_righthand, rel2->relids))
978 {
979 result = true;
980 break;
981 }
982 if (bms_is_subset(sjinfo->min_lefthand, rel2->relids) &&
983 bms_is_subset(sjinfo->min_righthand, rel1->relids))
984 {
985 result = true;
986 break;
987 }
988
989 /*
990 * Might we need to join these rels to complete the RHS? We have to
991 * use "overlap" tests since either rel might include a lower SJ that
992 * has been proven to commute with this one.
993 */
994 if (bms_overlap(sjinfo->min_righthand, rel1->relids) &&
995 bms_overlap(sjinfo->min_righthand, rel2->relids))
996 {
997 result = true;
998 break;
999 }
1000
1001 /* Likewise for the LHS. */
1002 if (bms_overlap(sjinfo->min_lefthand, rel1->relids) &&
1003 bms_overlap(sjinfo->min_lefthand, rel2->relids))
1004 {
1005 result = true;
1006 break;
1007 }
1008 }
1009
1010 /*
1011 * We do not force the join to occur if either input rel can legally be
1012 * joined to anything else using joinclauses. This essentially means that
1013 * clauseless bushy joins are put off as long as possible. The reason is
1014 * that when there is a join order restriction high up in the join tree
1015 * (that is, with many rels inside the LHS or RHS), we would otherwise
1016 * expend lots of effort considering very stupid join combinations within
1017 * its LHS or RHS.
1018 */
1019 if (result)
1020 {
1021 if (has_legal_joinclause(root, rel1) ||
1022 has_legal_joinclause(root, rel2))
1023 result = false;
1024 }
1025
1026 return result;
1027}
1028
1029
1030/*
1031 * has_join_restriction
1032 * Detect whether the specified relation has join-order restrictions,
1033 * due to being inside an outer join or an IN (sub-SELECT),
1034 * or participating in any LATERAL references or multi-rel PHVs.
1035 *
1036 * Essentially, this tests whether have_join_order_restriction() could
1037 * succeed with this rel and some other one. It's OK if we sometimes
1038 * say "true" incorrectly. (Therefore, we don't bother with the relatively
1039 * expensive has_legal_joinclause test.)
1040 */
1041static bool
1042has_join_restriction(PlannerInfo *root, RelOptInfo *rel)
1043{
1044 ListCell *l;
1045
1046 if (rel->lateral_relids != NULL || rel->lateral_referencers != NULL)
1047 return true;
1048
1049 foreach(l, root->placeholder_list)
1050 {
1051 PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(l);
1052
1053 if (bms_is_subset(rel->relids, phinfo->ph_eval_at) &&
1054 !bms_equal(rel->relids, phinfo->ph_eval_at))
1055 return true;
1056 }
1057
1058 foreach(l, root->join_info_list)
1059 {
1060 SpecialJoinInfo *sjinfo = (SpecialJoinInfo *) lfirst(l);
1061
1062 /* ignore full joins --- other mechanisms preserve their ordering */
1063 if (sjinfo->jointype == JOIN_FULL)
1064 continue;
1065
1066 /* ignore if SJ is already contained in rel */
1067 if (bms_is_subset(sjinfo->min_lefthand, rel->relids) &&
1068 bms_is_subset(sjinfo->min_righthand, rel->relids))
1069 continue;
1070
1071 /* restricted if it overlaps LHS or RHS, but doesn't contain SJ */
1072 if (bms_overlap(sjinfo->min_lefthand, rel->relids) ||
1073 bms_overlap(sjinfo->min_righthand, rel->relids))
1074 return true;
1075 }
1076
1077 return false;
1078}
1079
1080
1081/*
1082 * has_legal_joinclause
1083 * Detect whether the specified relation can legally be joined
1084 * to any other rels using join clauses.
1085 *
1086 * We consider only joins to single other relations in the current
1087 * initial_rels list. This is sufficient to get a "true" result in most real
1088 * queries, and an occasional erroneous "false" will only cost a bit more
1089 * planning time. The reason for this limitation is that considering joins to
1090 * other joins would require proving that the other join rel can legally be
1091 * formed, which seems like too much trouble for something that's only a
1092 * heuristic to save planning time. (Note: we must look at initial_rels
1093 * and not all of the query, since when we are planning a sub-joinlist we
1094 * may be forced to make clauseless joins within initial_rels even though
1095 * there are join clauses linking to other parts of the query.)
1096 */
1097static bool
1098has_legal_joinclause(PlannerInfo *root, RelOptInfo *rel)
1099{
1100 ListCell *lc;
1101
1102 foreach(lc, root->initial_rels)
1103 {
1104 RelOptInfo *rel2 = (RelOptInfo *) lfirst(lc);
1105
1106 /* ignore rels that are already in "rel" */
1107 if (bms_overlap(rel->relids, rel2->relids))
1108 continue;
1109
1110 if (have_relevant_joinclause(root, rel, rel2))
1111 {
1112 Relids joinrelids;
1113 SpecialJoinInfo *sjinfo;
1114 bool reversed;
1115
1116 /* join_is_legal needs relids of the union */
1117 joinrelids = bms_union(rel->relids, rel2->relids);
1118
1119 if (join_is_legal(root, rel, rel2, joinrelids,
1120 &sjinfo, &reversed))
1121 {
1122 /* Yes, this will work */
1123 bms_free(joinrelids);
1124 return true;
1125 }
1126
1127 bms_free(joinrelids);
1128 }
1129 }
1130
1131 return false;
1132}
1133
1134
1135/*
1136 * There's a pitfall for creating parameterized nestloops: suppose the inner
1137 * rel (call it A) has a parameter that is a PlaceHolderVar, and that PHV's
1138 * minimum eval_at set includes the outer rel (B) and some third rel (C).
1139 * We might think we could create a B/A nestloop join that's parameterized by
1140 * C. But we would end up with a plan in which the PHV's expression has to be
1141 * evaluated as a nestloop parameter at the B/A join; and the executor is only
1142 * set up to handle simple Vars as NestLoopParams. Rather than add complexity
1143 * and overhead to the executor for such corner cases, it seems better to
1144 * forbid the join. (Note that we can still make use of A's parameterized
1145 * path with pre-joined B+C as the outer rel. have_join_order_restriction()
1146 * ensures that we will consider making such a join even if there are not
1147 * other reasons to do so.)
1148 *
1149 * So we check whether any PHVs used in the query could pose such a hazard.
1150 * We don't have any simple way of checking whether a risky PHV would actually
1151 * be used in the inner plan, and the case is so unusual that it doesn't seem
1152 * worth working very hard on it.
1153 *
1154 * This needs to be checked in two places. If the inner rel's minimum
1155 * parameterization would trigger the restriction, then join_is_legal() should
1156 * reject the join altogether, because there will be no workable paths for it.
1157 * But joinpath.c has to check again for every proposed nestloop path, because
1158 * the inner path might have more than the minimum parameterization, causing
1159 * some PHV to be dangerous for it that otherwise wouldn't be.
1160 */
1161bool
1162have_dangerous_phv(PlannerInfo *root,
1163 Relids outer_relids, Relids inner_params)
1164{
1165 ListCell *lc;
1166
1167 foreach(lc, root->placeholder_list)
1168 {
1169 PlaceHolderInfo *phinfo = (PlaceHolderInfo *) lfirst(lc);
1170
1171 if (!bms_is_subset(phinfo->ph_eval_at, inner_params))
1172 continue; /* ignore, could not be a nestloop param */
1173 if (!bms_overlap(phinfo->ph_eval_at, outer_relids))
1174 continue; /* ignore, not relevant to this join */
1175 if (bms_is_subset(phinfo->ph_eval_at, outer_relids))
1176 continue; /* safe, it can be eval'd within outerrel */
1177 /* Otherwise, it's potentially unsafe, so reject the join */
1178 return true;
1179 }
1180
1181 /* OK to perform the join */
1182 return false;
1183}
1184
1185
1186/*
1187 * is_dummy_rel --- has relation been proven empty?
1188 */
1189bool
1190is_dummy_rel(RelOptInfo *rel)
1191{
1192 Path *path;
1193
1194 /*
1195 * A rel that is known dummy will have just one path that is a childless
1196 * Append. (Even if somehow it has more paths, a childless Append will
1197 * have cost zero and hence should be at the front of the pathlist.)
1198 */
1199 if (rel->pathlist == NIL)
1200 return false;
1201 path = (Path *) linitial(rel->pathlist);
1202
1203 /*
1204 * Initially, a dummy path will just be a childless Append. But in later
1205 * planning stages we might stick a ProjectSetPath and/or ProjectionPath
1206 * on top, since Append can't project. Rather than make assumptions about
1207 * which combinations can occur, just descend through whatever we find.
1208 */
1209 for (;;)
1210 {
1211 if (IsA(path, ProjectionPath))
1212 path = ((ProjectionPath *) path)->subpath;
1213 else if (IsA(path, ProjectSetPath))
1214 path = ((ProjectSetPath *) path)->subpath;
1215 else
1216 break;
1217 }
1218 if (IS_DUMMY_APPEND(path))
1219 return true;
1220 return false;
1221}
1222
1223/*
1224 * Mark a relation as proven empty.
1225 *
1226 * During GEQO planning, this can get invoked more than once on the same
1227 * baserel struct, so it's worth checking to see if the rel is already marked
1228 * dummy.
1229 *
1230 * Also, when called during GEQO join planning, we are in a short-lived
1231 * memory context. We must make sure that the dummy path attached to a
1232 * baserel survives the GEQO cycle, else the baserel is trashed for future
1233 * GEQO cycles. On the other hand, when we are marking a joinrel during GEQO,
1234 * we don't want the dummy path to clutter the main planning context. Upshot
1235 * is that the best solution is to explicitly make the dummy path in the same
1236 * context the given RelOptInfo is in.
1237 */
1238void
1239mark_dummy_rel(RelOptInfo *rel)
1240{
1241 MemoryContext oldcontext;
1242
1243 /* Already marked? */
1244 if (is_dummy_rel(rel))
1245 return;
1246
1247 /* No, so choose correct context to make the dummy path in */
1248 oldcontext = MemoryContextSwitchTo(GetMemoryChunkContext(rel));
1249
1250 /* Set dummy size estimate */
1251 rel->rows = 0;
1252
1253 /* Evict any previously chosen paths */
1254 rel->pathlist = NIL;
1255 rel->partial_pathlist = NIL;
1256
1257 /* Set up the dummy path */
1258 add_path(rel, (Path *) create_append_path(NULL, rel, NIL, NIL,
1259 NIL, rel->lateral_relids,
1260 0, false, NIL, -1));
1261
1262 /* Set or update cheapest_total_path and related fields */
1263 set_cheapest(rel);
1264
1265 MemoryContextSwitchTo(oldcontext);
1266}
1267
1268
1269/*
1270 * restriction_is_constant_false --- is a restrictlist just FALSE?
1271 *
1272 * In cases where a qual is provably constant FALSE, eval_const_expressions
1273 * will generally have thrown away anything that's ANDed with it. In outer
1274 * join situations this will leave us computing cartesian products only to
1275 * decide there's no match for an outer row, which is pretty stupid. So,
1276 * we need to detect the case.
1277 *
1278 * If only_pushed_down is true, then consider only quals that are pushed-down
1279 * from the point of view of the joinrel.
1280 */
1281static bool
1282restriction_is_constant_false(List *restrictlist,
1283 RelOptInfo *joinrel,
1284 bool only_pushed_down)
1285{
1286 ListCell *lc;
1287
1288 /*
1289 * Despite the above comment, the restriction list we see here might
1290 * possibly have other members besides the FALSE constant, since other
1291 * quals could get "pushed down" to the outer join level. So we check
1292 * each member of the list.
1293 */
1294 foreach(lc, restrictlist)
1295 {
1296 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1297
1298 if (only_pushed_down && !RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
1299 continue;
1300
1301 if (rinfo->clause && IsA(rinfo->clause, Const))
1302 {
1303 Const *con = (Const *) rinfo->clause;
1304
1305 /* constant NULL is as good as constant FALSE for our purposes */
1306 if (con->constisnull)
1307 return true;
1308 if (!DatumGetBool(con->constvalue))
1309 return true;
1310 }
1311 }
1312 return false;
1313}
1314
1315/*
1316 * Assess whether join between given two partitioned relations can be broken
1317 * down into joins between matching partitions; a technique called
1318 * "partitionwise join"
1319 *
1320 * Partitionwise join is possible when a. Joining relations have same
1321 * partitioning scheme b. There exists an equi-join between the partition keys
1322 * of the two relations.
1323 *
1324 * Partitionwise join is planned as follows (details: optimizer/README.)
1325 *
1326 * 1. Create the RelOptInfos for joins between matching partitions i.e
1327 * child-joins and add paths to them.
1328 *
1329 * 2. Construct Append or MergeAppend paths across the set of child joins.
1330 * This second phase is implemented by generate_partitionwise_join_paths().
1331 *
1332 * The RelOptInfo, SpecialJoinInfo and restrictlist for each child join are
1333 * obtained by translating the respective parent join structures.
1334 */
1335static void
1336try_partitionwise_join(PlannerInfo *root, RelOptInfo *rel1, RelOptInfo *rel2,
1337 RelOptInfo *joinrel, SpecialJoinInfo *parent_sjinfo,
1338 List *parent_restrictlist)
1339{
1340 bool rel1_is_simple = IS_SIMPLE_REL(rel1);
1341 bool rel2_is_simple = IS_SIMPLE_REL(rel2);
1342 int nparts;
1343 int cnt_parts;
1344
1345 /* Guard against stack overflow due to overly deep partition hierarchy. */
1346 check_stack_depth();
1347
1348 /* Nothing to do, if the join relation is not partitioned. */
1349 if (!IS_PARTITIONED_REL(joinrel))
1350 return;
1351
1352 /* The join relation should have consider_partitionwise_join set. */
1353 Assert(joinrel->consider_partitionwise_join);
1354
1355 /*
1356 * Since this join relation is partitioned, all the base relations
1357 * participating in this join must be partitioned and so are all the
1358 * intermediate join relations.
1359 */
1360 Assert(IS_PARTITIONED_REL(rel1) && IS_PARTITIONED_REL(rel2));
1361 Assert(REL_HAS_ALL_PART_PROPS(rel1) && REL_HAS_ALL_PART_PROPS(rel2));
1362
1363 /* The joining relations should have consider_partitionwise_join set. */
1364 Assert(rel1->consider_partitionwise_join &&
1365 rel2->consider_partitionwise_join);
1366
1367 /*
1368 * The partition scheme of the join relation should match that of the
1369 * joining relations.
1370 */
1371 Assert(joinrel->part_scheme == rel1->part_scheme &&
1372 joinrel->part_scheme == rel2->part_scheme);
1373
1374 /*
1375 * Since we allow partitionwise join only when the partition bounds of the
1376 * joining relations exactly match, the partition bounds of the join
1377 * should match those of the joining relations.
1378 */
1379 Assert(partition_bounds_equal(joinrel->part_scheme->partnatts,
1380 joinrel->part_scheme->parttyplen,
1381 joinrel->part_scheme->parttypbyval,
1382 joinrel->boundinfo, rel1->boundinfo));
1383 Assert(partition_bounds_equal(joinrel->part_scheme->partnatts,
1384 joinrel->part_scheme->parttyplen,
1385 joinrel->part_scheme->parttypbyval,
1386 joinrel->boundinfo, rel2->boundinfo));
1387
1388 nparts = joinrel->nparts;
1389
1390 /*
1391 * Create child-join relations for this partitioned join, if those don't
1392 * exist. Add paths to child-joins for a pair of child relations
1393 * corresponding to the given pair of parent relations.
1394 */
1395 for (cnt_parts = 0; cnt_parts < nparts; cnt_parts++)
1396 {
1397 RelOptInfo *child_rel1 = rel1->part_rels[cnt_parts];
1398 RelOptInfo *child_rel2 = rel2->part_rels[cnt_parts];
1399 bool rel1_empty = (child_rel1 == NULL ||
1400 IS_DUMMY_REL(child_rel1));
1401 bool rel2_empty = (child_rel2 == NULL ||
1402 IS_DUMMY_REL(child_rel2));
1403 SpecialJoinInfo *child_sjinfo;
1404 List *child_restrictlist;
1405 RelOptInfo *child_joinrel;
1406 Relids child_joinrelids;
1407 AppendRelInfo **appinfos;
1408 int nappinfos;
1409
1410 /*
1411 * Check for cases where we can prove that this segment of the join
1412 * returns no rows, due to one or both inputs being empty (including
1413 * inputs that have been pruned away entirely). If so just ignore it.
1414 * These rules are equivalent to populate_joinrel_with_paths's rules
1415 * for dummy input relations.
1416 */
1417 switch (parent_sjinfo->jointype)
1418 {
1419 case JOIN_INNER:
1420 case JOIN_SEMI:
1421 if (rel1_empty || rel2_empty)
1422 continue; /* ignore this join segment */
1423 break;
1424 case JOIN_LEFT:
1425 case JOIN_ANTI:
1426 if (rel1_empty)
1427 continue; /* ignore this join segment */
1428 break;
1429 case JOIN_FULL:
1430 if (rel1_empty && rel2_empty)
1431 continue; /* ignore this join segment */
1432 break;
1433 default:
1434 /* other values not expected here */
1435 elog(ERROR, "unrecognized join type: %d",
1436 (int) parent_sjinfo->jointype);
1437 break;
1438 }
1439
1440 /*
1441 * If a child has been pruned entirely then we can't generate paths
1442 * for it, so we have to reject partitionwise joining unless we were
1443 * able to eliminate this partition above.
1444 */
1445 if (child_rel1 == NULL || child_rel2 == NULL)
1446 {
1447 /*
1448 * Mark the joinrel as unpartitioned so that later functions treat
1449 * it correctly.
1450 */
1451 joinrel->nparts = 0;
1452 return;
1453 }
1454
1455 /*
1456 * If a leaf relation has consider_partitionwise_join=false, it means
1457 * that it's a dummy relation for which we skipped setting up tlist
1458 * expressions and adding EC members in set_append_rel_size(), so
1459 * again we have to fail here.
1460 */
1461 if (rel1_is_simple && !child_rel1->consider_partitionwise_join)
1462 {
1463 Assert(child_rel1->reloptkind == RELOPT_OTHER_MEMBER_REL);
1464 Assert(IS_DUMMY_REL(child_rel1));
1465 joinrel->nparts = 0;
1466 return;
1467 }
1468 if (rel2_is_simple && !child_rel2->consider_partitionwise_join)
1469 {
1470 Assert(child_rel2->reloptkind == RELOPT_OTHER_MEMBER_REL);
1471 Assert(IS_DUMMY_REL(child_rel2));
1472 joinrel->nparts = 0;
1473 return;
1474 }
1475
1476 /* We should never try to join two overlapping sets of rels. */
1477 Assert(!bms_overlap(child_rel1->relids, child_rel2->relids));
1478 child_joinrelids = bms_union(child_rel1->relids, child_rel2->relids);
1479 appinfos = find_appinfos_by_relids(root, child_joinrelids, &nappinfos);
1480
1481 /*
1482 * Construct SpecialJoinInfo from parent join relations's
1483 * SpecialJoinInfo.
1484 */
1485 child_sjinfo = build_child_join_sjinfo(root, parent_sjinfo,
1486 child_rel1->relids,
1487 child_rel2->relids);
1488
1489 /*
1490 * Construct restrictions applicable to the child join from those
1491 * applicable to the parent join.
1492 */
1493 child_restrictlist =
1494 (List *) adjust_appendrel_attrs(root,
1495 (Node *) parent_restrictlist,
1496 nappinfos, appinfos);
1497 pfree(appinfos);
1498
1499 child_joinrel = joinrel->part_rels[cnt_parts];
1500 if (!child_joinrel)
1501 {
1502 child_joinrel = build_child_join_rel(root, child_rel1, child_rel2,
1503 joinrel, child_restrictlist,
1504 child_sjinfo,
1505 child_sjinfo->jointype);
1506 joinrel->part_rels[cnt_parts] = child_joinrel;
1507 }
1508
1509 Assert(bms_equal(child_joinrel->relids, child_joinrelids));
1510
1511 populate_joinrel_with_paths(root, child_rel1, child_rel2,
1512 child_joinrel, child_sjinfo,
1513 child_restrictlist);
1514 }
1515}
1516
1517/*
1518 * Construct the SpecialJoinInfo for a child-join by translating
1519 * SpecialJoinInfo for the join between parents. left_relids and right_relids
1520 * are the relids of left and right side of the join respectively.
1521 */
1522static SpecialJoinInfo *
1523build_child_join_sjinfo(PlannerInfo *root, SpecialJoinInfo *parent_sjinfo,
1524 Relids left_relids, Relids right_relids)
1525{
1526 SpecialJoinInfo *sjinfo = makeNode(SpecialJoinInfo);
1527 AppendRelInfo **left_appinfos;
1528 int left_nappinfos;
1529 AppendRelInfo **right_appinfos;
1530 int right_nappinfos;
1531
1532 memcpy(sjinfo, parent_sjinfo, sizeof(SpecialJoinInfo));
1533 left_appinfos = find_appinfos_by_relids(root, left_relids,
1534 &left_nappinfos);
1535 right_appinfos = find_appinfos_by_relids(root, right_relids,
1536 &right_nappinfos);
1537
1538 sjinfo->min_lefthand = adjust_child_relids(sjinfo->min_lefthand,
1539 left_nappinfos, left_appinfos);
1540 sjinfo->min_righthand = adjust_child_relids(sjinfo->min_righthand,
1541 right_nappinfos,
1542 right_appinfos);
1543 sjinfo->syn_lefthand = adjust_child_relids(sjinfo->syn_lefthand,
1544 left_nappinfos, left_appinfos);
1545 sjinfo->syn_righthand = adjust_child_relids(sjinfo->syn_righthand,
1546 right_nappinfos,
1547 right_appinfos);
1548 sjinfo->semi_rhs_exprs = (List *) adjust_appendrel_attrs(root,
1549 (Node *) sjinfo->semi_rhs_exprs,
1550 right_nappinfos,
1551 right_appinfos);
1552
1553 pfree(left_appinfos);
1554 pfree(right_appinfos);
1555
1556 return sjinfo;
1557}
1558
1559/*
1560 * Returns true if there exists an equi-join condition for each pair of
1561 * partition keys from given relations being joined.
1562 */
1563bool
1564have_partkey_equi_join(RelOptInfo *joinrel,
1565 RelOptInfo *rel1, RelOptInfo *rel2,
1566 JoinType jointype, List *restrictlist)
1567{
1568 PartitionScheme part_scheme = rel1->part_scheme;
1569 ListCell *lc;
1570 int cnt_pks;
1571 bool pk_has_clause[PARTITION_MAX_KEYS];
1572 bool strict_op;
1573
1574 /*
1575 * This function should be called when the joining relations have same
1576 * partitioning scheme.
1577 */
1578 Assert(rel1->part_scheme == rel2->part_scheme);
1579 Assert(part_scheme);
1580
1581 memset(pk_has_clause, 0, sizeof(pk_has_clause));
1582 foreach(lc, restrictlist)
1583 {
1584 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc);
1585 OpExpr *opexpr;
1586 Expr *expr1;
1587 Expr *expr2;
1588 int ipk1;
1589 int ipk2;
1590
1591 /* If processing an outer join, only use its own join clauses. */
1592 if (IS_OUTER_JOIN(jointype) &&
1593 RINFO_IS_PUSHED_DOWN(rinfo, joinrel->relids))
1594 continue;
1595
1596 /* Skip clauses which can not be used for a join. */
1597 if (!rinfo->can_join)
1598 continue;
1599
1600 /* Skip clauses which are not equality conditions. */
1601 if (!rinfo->mergeopfamilies && !OidIsValid(rinfo->hashjoinoperator))
1602 continue;
1603
1604 opexpr = castNode(OpExpr, rinfo->clause);
1605
1606 /*
1607 * The equi-join between partition keys is strict if equi-join between
1608 * at least one partition key is using a strict operator. See
1609 * explanation about outer join reordering identity 3 in
1610 * optimizer/README
1611 */
1612 strict_op = op_strict(opexpr->opno);
1613
1614 /* Match the operands to the relation. */
1615 if (bms_is_subset(rinfo->left_relids, rel1->relids) &&
1616 bms_is_subset(rinfo->right_relids, rel2->relids))
1617 {
1618 expr1 = linitial(opexpr->args);
1619 expr2 = lsecond(opexpr->args);
1620 }
1621 else if (bms_is_subset(rinfo->left_relids, rel2->relids) &&
1622 bms_is_subset(rinfo->right_relids, rel1->relids))
1623 {
1624 expr1 = lsecond(opexpr->args);
1625 expr2 = linitial(opexpr->args);
1626 }
1627 else
1628 continue;
1629
1630 /*
1631 * Only clauses referencing the partition keys are useful for
1632 * partitionwise join.
1633 */
1634 ipk1 = match_expr_to_partition_keys(expr1, rel1, strict_op);
1635 if (ipk1 < 0)
1636 continue;
1637 ipk2 = match_expr_to_partition_keys(expr2, rel2, strict_op);
1638 if (ipk2 < 0)
1639 continue;
1640
1641 /*
1642 * If the clause refers to keys at different ordinal positions, it can
1643 * not be used for partitionwise join.
1644 */
1645 if (ipk1 != ipk2)
1646 continue;
1647
1648 /*
1649 * The clause allows partitionwise join if only it uses the same
1650 * operator family as that specified by the partition key.
1651 */
1652 if (rel1->part_scheme->strategy == PARTITION_STRATEGY_HASH)
1653 {
1654 if (!op_in_opfamily(rinfo->hashjoinoperator,
1655 part_scheme->partopfamily[ipk1]))
1656 continue;
1657 }
1658 else if (!list_member_oid(rinfo->mergeopfamilies,
1659 part_scheme->partopfamily[ipk1]))
1660 continue;
1661
1662 /* Mark the partition key as having an equi-join clause. */
1663 pk_has_clause[ipk1] = true;
1664 }
1665
1666 /* Check whether every partition key has an equi-join condition. */
1667 for (cnt_pks = 0; cnt_pks < part_scheme->partnatts; cnt_pks++)
1668 {
1669 if (!pk_has_clause[cnt_pks])
1670 return false;
1671 }
1672
1673 return true;
1674}
1675
1676/*
1677 * Find the partition key from the given relation matching the given
1678 * expression. If found, return the index of the partition key, else return -1.
1679 */
1680static int
1681match_expr_to_partition_keys(Expr *expr, RelOptInfo *rel, bool strict_op)
1682{
1683 int cnt;
1684
1685 /* This function should be called only for partitioned relations. */
1686 Assert(rel->part_scheme);
1687
1688 /* Remove any relabel decorations. */
1689 while (IsA(expr, RelabelType))
1690 expr = (Expr *) (castNode(RelabelType, expr))->arg;
1691
1692 for (cnt = 0; cnt < rel->part_scheme->partnatts; cnt++)
1693 {
1694 ListCell *lc;
1695
1696 Assert(rel->partexprs);
1697 foreach(lc, rel->partexprs[cnt])
1698 {
1699 if (equal(lfirst(lc), expr))
1700 return cnt;
1701 }
1702
1703 if (!strict_op)
1704 continue;
1705
1706 /*
1707 * If it's a strict equi-join a NULL partition key on one side will
1708 * not join a NULL partition key on the other side. So, rows with NULL
1709 * partition key from a partition on one side can not join with those
1710 * from a non-matching partition on the other side. So, search the
1711 * nullable partition keys as well.
1712 */
1713 Assert(rel->nullable_partexprs);
1714 foreach(lc, rel->nullable_partexprs[cnt])
1715 {
1716 if (equal(lfirst(lc), expr))
1717 return cnt;
1718 }
1719 }
1720
1721 return -1;
1722}
1723