1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * restrictinfo.c |
4 | * RestrictInfo node manipulation routines. |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/optimizer/util/restrictinfo.c |
12 | * |
13 | *------------------------------------------------------------------------- |
14 | */ |
15 | #include "postgres.h" |
16 | |
17 | #include "nodes/makefuncs.h" |
18 | #include "nodes/nodeFuncs.h" |
19 | #include "optimizer/clauses.h" |
20 | #include "optimizer/optimizer.h" |
21 | #include "optimizer/restrictinfo.h" |
22 | |
23 | |
24 | static RestrictInfo *make_restrictinfo_internal(Expr *clause, |
25 | Expr *orclause, |
26 | bool is_pushed_down, |
27 | bool outerjoin_delayed, |
28 | bool pseudoconstant, |
29 | Index security_level, |
30 | Relids required_relids, |
31 | Relids outer_relids, |
32 | Relids nullable_relids); |
33 | static Expr *make_sub_restrictinfos(Expr *clause, |
34 | bool is_pushed_down, |
35 | bool outerjoin_delayed, |
36 | bool pseudoconstant, |
37 | Index security_level, |
38 | Relids required_relids, |
39 | Relids outer_relids, |
40 | Relids nullable_relids); |
41 | |
42 | |
43 | /* |
44 | * make_restrictinfo |
45 | * |
46 | * Build a RestrictInfo node containing the given subexpression. |
47 | * |
48 | * The is_pushed_down, outerjoin_delayed, and pseudoconstant flags for the |
49 | * RestrictInfo must be supplied by the caller, as well as the correct values |
50 | * for security_level, outer_relids, and nullable_relids. |
51 | * required_relids can be NULL, in which case it defaults to the actual clause |
52 | * contents (i.e., clause_relids). |
53 | * |
54 | * We initialize fields that depend only on the given subexpression, leaving |
55 | * others that depend on context (or may never be needed at all) to be filled |
56 | * later. |
57 | */ |
58 | RestrictInfo * |
59 | make_restrictinfo(Expr *clause, |
60 | bool is_pushed_down, |
61 | bool outerjoin_delayed, |
62 | bool pseudoconstant, |
63 | Index security_level, |
64 | Relids required_relids, |
65 | Relids outer_relids, |
66 | Relids nullable_relids) |
67 | { |
68 | /* |
69 | * If it's an OR clause, build a modified copy with RestrictInfos inserted |
70 | * above each subclause of the top-level AND/OR structure. |
71 | */ |
72 | if (is_orclause(clause)) |
73 | return (RestrictInfo *) make_sub_restrictinfos(clause, |
74 | is_pushed_down, |
75 | outerjoin_delayed, |
76 | pseudoconstant, |
77 | security_level, |
78 | required_relids, |
79 | outer_relids, |
80 | nullable_relids); |
81 | |
82 | /* Shouldn't be an AND clause, else AND/OR flattening messed up */ |
83 | Assert(!is_andclause(clause)); |
84 | |
85 | return make_restrictinfo_internal(clause, |
86 | NULL, |
87 | is_pushed_down, |
88 | outerjoin_delayed, |
89 | pseudoconstant, |
90 | security_level, |
91 | required_relids, |
92 | outer_relids, |
93 | nullable_relids); |
94 | } |
95 | |
96 | /* |
97 | * make_restrictinfo_internal |
98 | * |
99 | * Common code for the main entry points and the recursive cases. |
100 | */ |
101 | static RestrictInfo * |
102 | make_restrictinfo_internal(Expr *clause, |
103 | Expr *orclause, |
104 | bool is_pushed_down, |
105 | bool outerjoin_delayed, |
106 | bool pseudoconstant, |
107 | Index security_level, |
108 | Relids required_relids, |
109 | Relids outer_relids, |
110 | Relids nullable_relids) |
111 | { |
112 | RestrictInfo *restrictinfo = makeNode(RestrictInfo); |
113 | |
114 | restrictinfo->clause = clause; |
115 | restrictinfo->orclause = orclause; |
116 | restrictinfo->is_pushed_down = is_pushed_down; |
117 | restrictinfo->outerjoin_delayed = outerjoin_delayed; |
118 | restrictinfo->pseudoconstant = pseudoconstant; |
119 | restrictinfo->can_join = false; /* may get set below */ |
120 | restrictinfo->security_level = security_level; |
121 | restrictinfo->outer_relids = outer_relids; |
122 | restrictinfo->nullable_relids = nullable_relids; |
123 | |
124 | /* |
125 | * If it's potentially delayable by lower-level security quals, figure out |
126 | * whether it's leakproof. We can skip testing this for level-zero quals, |
127 | * since they would never get delayed on security grounds anyway. |
128 | */ |
129 | if (security_level > 0) |
130 | restrictinfo->leakproof = !contain_leaked_vars((Node *) clause); |
131 | else |
132 | restrictinfo->leakproof = false; /* really, "don't know" */ |
133 | |
134 | /* |
135 | * If it's a binary opclause, set up left/right relids info. In any case |
136 | * set up the total clause relids info. |
137 | */ |
138 | if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2) |
139 | { |
140 | restrictinfo->left_relids = pull_varnos(get_leftop(clause)); |
141 | restrictinfo->right_relids = pull_varnos(get_rightop(clause)); |
142 | |
143 | restrictinfo->clause_relids = bms_union(restrictinfo->left_relids, |
144 | restrictinfo->right_relids); |
145 | |
146 | /* |
147 | * Does it look like a normal join clause, i.e., a binary operator |
148 | * relating expressions that come from distinct relations? If so we |
149 | * might be able to use it in a join algorithm. Note that this is a |
150 | * purely syntactic test that is made regardless of context. |
151 | */ |
152 | if (!bms_is_empty(restrictinfo->left_relids) && |
153 | !bms_is_empty(restrictinfo->right_relids) && |
154 | !bms_overlap(restrictinfo->left_relids, |
155 | restrictinfo->right_relids)) |
156 | { |
157 | restrictinfo->can_join = true; |
158 | /* pseudoconstant should certainly not be true */ |
159 | Assert(!restrictinfo->pseudoconstant); |
160 | } |
161 | } |
162 | else |
163 | { |
164 | /* Not a binary opclause, so mark left/right relid sets as empty */ |
165 | restrictinfo->left_relids = NULL; |
166 | restrictinfo->right_relids = NULL; |
167 | /* and get the total relid set the hard way */ |
168 | restrictinfo->clause_relids = pull_varnos((Node *) clause); |
169 | } |
170 | |
171 | /* required_relids defaults to clause_relids */ |
172 | if (required_relids != NULL) |
173 | restrictinfo->required_relids = required_relids; |
174 | else |
175 | restrictinfo->required_relids = restrictinfo->clause_relids; |
176 | |
177 | /* |
178 | * Fill in all the cacheable fields with "not yet set" markers. None of |
179 | * these will be computed until/unless needed. Note in particular that we |
180 | * don't mark a binary opclause as mergejoinable or hashjoinable here; |
181 | * that happens only if it appears in the right context (top level of a |
182 | * joinclause list). |
183 | */ |
184 | restrictinfo->parent_ec = NULL; |
185 | |
186 | restrictinfo->eval_cost.startup = -1; |
187 | restrictinfo->norm_selec = -1; |
188 | restrictinfo->outer_selec = -1; |
189 | |
190 | restrictinfo->mergeopfamilies = NIL; |
191 | |
192 | restrictinfo->left_ec = NULL; |
193 | restrictinfo->right_ec = NULL; |
194 | restrictinfo->left_em = NULL; |
195 | restrictinfo->right_em = NULL; |
196 | restrictinfo->scansel_cache = NIL; |
197 | |
198 | restrictinfo->outer_is_left = false; |
199 | |
200 | restrictinfo->hashjoinoperator = InvalidOid; |
201 | |
202 | restrictinfo->left_bucketsize = -1; |
203 | restrictinfo->right_bucketsize = -1; |
204 | restrictinfo->left_mcvfreq = -1; |
205 | restrictinfo->right_mcvfreq = -1; |
206 | |
207 | return restrictinfo; |
208 | } |
209 | |
210 | /* |
211 | * Recursively insert sub-RestrictInfo nodes into a boolean expression. |
212 | * |
213 | * We put RestrictInfos above simple (non-AND/OR) clauses and above |
214 | * sub-OR clauses, but not above sub-AND clauses, because there's no need. |
215 | * This may seem odd but it is closely related to the fact that we use |
216 | * implicit-AND lists at top level of RestrictInfo lists. Only ORs and |
217 | * simple clauses are valid RestrictInfos. |
218 | * |
219 | * The same is_pushed_down, outerjoin_delayed, and pseudoconstant flag |
220 | * values can be applied to all RestrictInfo nodes in the result. Likewise |
221 | * for security_level, outer_relids, and nullable_relids. |
222 | * |
223 | * The given required_relids are attached to our top-level output, |
224 | * but any OR-clause constituents are allowed to default to just the |
225 | * contained rels. |
226 | */ |
227 | static Expr * |
228 | make_sub_restrictinfos(Expr *clause, |
229 | bool is_pushed_down, |
230 | bool outerjoin_delayed, |
231 | bool pseudoconstant, |
232 | Index security_level, |
233 | Relids required_relids, |
234 | Relids outer_relids, |
235 | Relids nullable_relids) |
236 | { |
237 | if (is_orclause(clause)) |
238 | { |
239 | List *orlist = NIL; |
240 | ListCell *temp; |
241 | |
242 | foreach(temp, ((BoolExpr *) clause)->args) |
243 | orlist = lappend(orlist, |
244 | make_sub_restrictinfos(lfirst(temp), |
245 | is_pushed_down, |
246 | outerjoin_delayed, |
247 | pseudoconstant, |
248 | security_level, |
249 | NULL, |
250 | outer_relids, |
251 | nullable_relids)); |
252 | return (Expr *) make_restrictinfo_internal(clause, |
253 | make_orclause(orlist), |
254 | is_pushed_down, |
255 | outerjoin_delayed, |
256 | pseudoconstant, |
257 | security_level, |
258 | required_relids, |
259 | outer_relids, |
260 | nullable_relids); |
261 | } |
262 | else if (is_andclause(clause)) |
263 | { |
264 | List *andlist = NIL; |
265 | ListCell *temp; |
266 | |
267 | foreach(temp, ((BoolExpr *) clause)->args) |
268 | andlist = lappend(andlist, |
269 | make_sub_restrictinfos(lfirst(temp), |
270 | is_pushed_down, |
271 | outerjoin_delayed, |
272 | pseudoconstant, |
273 | security_level, |
274 | required_relids, |
275 | outer_relids, |
276 | nullable_relids)); |
277 | return make_andclause(andlist); |
278 | } |
279 | else |
280 | return (Expr *) make_restrictinfo_internal(clause, |
281 | NULL, |
282 | is_pushed_down, |
283 | outerjoin_delayed, |
284 | pseudoconstant, |
285 | security_level, |
286 | required_relids, |
287 | outer_relids, |
288 | nullable_relids); |
289 | } |
290 | |
291 | /* |
292 | * commute_restrictinfo |
293 | * |
294 | * Given a RestrictInfo containing a binary opclause, produce a RestrictInfo |
295 | * representing the commutation of that clause. The caller must pass the |
296 | * OID of the commutator operator (which it's presumably looked up, else |
297 | * it would not know this is valid). |
298 | * |
299 | * Beware that the result shares sub-structure with the given RestrictInfo. |
300 | * That's okay for the intended usage with derived index quals, but might |
301 | * be hazardous if the source is subject to change. Also notice that we |
302 | * assume without checking that the commutator op is a member of the same |
303 | * btree and hash opclasses as the original op. |
304 | */ |
305 | RestrictInfo * |
306 | commute_restrictinfo(RestrictInfo *rinfo, Oid comm_op) |
307 | { |
308 | RestrictInfo *result; |
309 | OpExpr *newclause; |
310 | OpExpr *clause = castNode(OpExpr, rinfo->clause); |
311 | |
312 | Assert(list_length(clause->args) == 2); |
313 | |
314 | /* flat-copy all the fields of clause ... */ |
315 | newclause = makeNode(OpExpr); |
316 | memcpy(newclause, clause, sizeof(OpExpr)); |
317 | |
318 | /* ... and adjust those we need to change to commute it */ |
319 | newclause->opno = comm_op; |
320 | newclause->opfuncid = InvalidOid; |
321 | newclause->args = list_make2(lsecond(clause->args), |
322 | linitial(clause->args)); |
323 | |
324 | /* likewise, flat-copy all the fields of rinfo ... */ |
325 | result = makeNode(RestrictInfo); |
326 | memcpy(result, rinfo, sizeof(RestrictInfo)); |
327 | |
328 | /* |
329 | * ... and adjust those we need to change. Note in particular that we can |
330 | * preserve any cached selectivity or cost estimates, since those ought to |
331 | * be the same for the new clause. Likewise we can keep the source's |
332 | * parent_ec. |
333 | */ |
334 | result->clause = (Expr *) newclause; |
335 | result->left_relids = rinfo->right_relids; |
336 | result->right_relids = rinfo->left_relids; |
337 | Assert(result->orclause == NULL); |
338 | result->left_ec = rinfo->right_ec; |
339 | result->right_ec = rinfo->left_ec; |
340 | result->left_em = rinfo->right_em; |
341 | result->right_em = rinfo->left_em; |
342 | result->scansel_cache = NIL; /* not worth updating this */ |
343 | if (rinfo->hashjoinoperator == clause->opno) |
344 | result->hashjoinoperator = comm_op; |
345 | else |
346 | result->hashjoinoperator = InvalidOid; |
347 | result->left_bucketsize = rinfo->right_bucketsize; |
348 | result->right_bucketsize = rinfo->left_bucketsize; |
349 | result->left_mcvfreq = rinfo->right_mcvfreq; |
350 | result->right_mcvfreq = rinfo->left_mcvfreq; |
351 | |
352 | return result; |
353 | } |
354 | |
355 | /* |
356 | * restriction_is_or_clause |
357 | * |
358 | * Returns t iff the restrictinfo node contains an 'or' clause. |
359 | */ |
360 | bool |
361 | restriction_is_or_clause(RestrictInfo *restrictinfo) |
362 | { |
363 | if (restrictinfo->orclause != NULL) |
364 | return true; |
365 | else |
366 | return false; |
367 | } |
368 | |
369 | /* |
370 | * restriction_is_securely_promotable |
371 | * |
372 | * Returns true if it's okay to evaluate this clause "early", that is before |
373 | * other restriction clauses attached to the specified relation. |
374 | */ |
375 | bool |
376 | restriction_is_securely_promotable(RestrictInfo *restrictinfo, |
377 | RelOptInfo *rel) |
378 | { |
379 | /* |
380 | * It's okay if there are no baserestrictinfo clauses for the rel that |
381 | * would need to go before this one, *or* if this one is leakproof. |
382 | */ |
383 | if (restrictinfo->security_level <= rel->baserestrict_min_security || |
384 | restrictinfo->leakproof) |
385 | return true; |
386 | else |
387 | return false; |
388 | } |
389 | |
390 | /* |
391 | * get_actual_clauses |
392 | * |
393 | * Returns a list containing the bare clauses from 'restrictinfo_list'. |
394 | * |
395 | * This is only to be used in cases where none of the RestrictInfos can |
396 | * be pseudoconstant clauses (for instance, it's OK on indexqual lists). |
397 | */ |
398 | List * |
399 | get_actual_clauses(List *restrictinfo_list) |
400 | { |
401 | List *result = NIL; |
402 | ListCell *l; |
403 | |
404 | foreach(l, restrictinfo_list) |
405 | { |
406 | RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); |
407 | |
408 | Assert(!rinfo->pseudoconstant); |
409 | |
410 | result = lappend(result, rinfo->clause); |
411 | } |
412 | return result; |
413 | } |
414 | |
415 | /* |
416 | * extract_actual_clauses |
417 | * |
418 | * Extract bare clauses from 'restrictinfo_list', returning either the |
419 | * regular ones or the pseudoconstant ones per 'pseudoconstant'. |
420 | */ |
421 | List * |
422 | (List *restrictinfo_list, |
423 | bool pseudoconstant) |
424 | { |
425 | List *result = NIL; |
426 | ListCell *l; |
427 | |
428 | foreach(l, restrictinfo_list) |
429 | { |
430 | RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); |
431 | |
432 | if (rinfo->pseudoconstant == pseudoconstant) |
433 | result = lappend(result, rinfo->clause); |
434 | } |
435 | return result; |
436 | } |
437 | |
438 | /* |
439 | * extract_actual_join_clauses |
440 | * |
441 | * Extract bare clauses from 'restrictinfo_list', separating those that |
442 | * semantically match the join level from those that were pushed down. |
443 | * Pseudoconstant clauses are excluded from the results. |
444 | * |
445 | * This is only used at outer joins, since for plain joins we don't care |
446 | * about pushed-down-ness. |
447 | */ |
448 | void |
449 | (List *restrictinfo_list, |
450 | Relids joinrelids, |
451 | List **joinquals, |
452 | List **otherquals) |
453 | { |
454 | ListCell *l; |
455 | |
456 | *joinquals = NIL; |
457 | *otherquals = NIL; |
458 | |
459 | foreach(l, restrictinfo_list) |
460 | { |
461 | RestrictInfo *rinfo = lfirst_node(RestrictInfo, l); |
462 | |
463 | if (RINFO_IS_PUSHED_DOWN(rinfo, joinrelids)) |
464 | { |
465 | if (!rinfo->pseudoconstant) |
466 | *otherquals = lappend(*otherquals, rinfo->clause); |
467 | } |
468 | else |
469 | { |
470 | /* joinquals shouldn't have been marked pseudoconstant */ |
471 | Assert(!rinfo->pseudoconstant); |
472 | *joinquals = lappend(*joinquals, rinfo->clause); |
473 | } |
474 | } |
475 | } |
476 | |
477 | |
478 | /* |
479 | * join_clause_is_movable_to |
480 | * Test whether a join clause is a safe candidate for parameterization |
481 | * of a scan on the specified base relation. |
482 | * |
483 | * A movable join clause is one that can safely be evaluated at a rel below |
484 | * its normal semantic level (ie, its required_relids), if the values of |
485 | * variables that it would need from other rels are provided. |
486 | * |
487 | * We insist that the clause actually reference the target relation; this |
488 | * prevents undesirable movement of degenerate join clauses, and ensures |
489 | * that there is a unique place that a clause can be moved down to. |
490 | * |
491 | * We cannot move an outer-join clause into the non-nullable side of its |
492 | * outer join, as that would change the results (rows would be suppressed |
493 | * rather than being null-extended). |
494 | * |
495 | * Also there must not be an outer join below the clause that would null the |
496 | * Vars coming from the target relation. Otherwise the clause might give |
497 | * results different from what it would give at its normal semantic level. |
498 | * |
499 | * Also, the join clause must not use any relations that have LATERAL |
500 | * references to the target relation, since we could not put such rels on |
501 | * the outer side of a nestloop with the target relation. |
502 | */ |
503 | bool |
504 | join_clause_is_movable_to(RestrictInfo *rinfo, RelOptInfo *baserel) |
505 | { |
506 | /* Clause must physically reference target rel */ |
507 | if (!bms_is_member(baserel->relid, rinfo->clause_relids)) |
508 | return false; |
509 | |
510 | /* Cannot move an outer-join clause into the join's outer side */ |
511 | if (bms_is_member(baserel->relid, rinfo->outer_relids)) |
512 | return false; |
513 | |
514 | /* Target rel must not be nullable below the clause */ |
515 | if (bms_is_member(baserel->relid, rinfo->nullable_relids)) |
516 | return false; |
517 | |
518 | /* Clause must not use any rels with LATERAL references to this rel */ |
519 | if (bms_overlap(baserel->lateral_referencers, rinfo->clause_relids)) |
520 | return false; |
521 | |
522 | return true; |
523 | } |
524 | |
525 | /* |
526 | * join_clause_is_movable_into |
527 | * Test whether a join clause is movable and can be evaluated within |
528 | * the current join context. |
529 | * |
530 | * currentrelids: the relids of the proposed evaluation location |
531 | * current_and_outer: the union of currentrelids and the required_outer |
532 | * relids (parameterization's outer relations) |
533 | * |
534 | * The API would be a bit clearer if we passed the current relids and the |
535 | * outer relids separately and did bms_union internally; but since most |
536 | * callers need to apply this function to multiple clauses, we make the |
537 | * caller perform the union. |
538 | * |
539 | * Obviously, the clause must only refer to Vars available from the current |
540 | * relation plus the outer rels. We also check that it does reference at |
541 | * least one current Var, ensuring that the clause will be pushed down to |
542 | * a unique place in a parameterized join tree. And we check that we're |
543 | * not pushing the clause into its outer-join outer side, nor down into |
544 | * a lower outer join's inner side. |
545 | * |
546 | * The check about pushing a clause down into a lower outer join's inner side |
547 | * is only approximate; it sometimes returns "false" when actually it would |
548 | * be safe to use the clause here because we're still above the outer join |
549 | * in question. This is okay as long as the answers at different join levels |
550 | * are consistent: it just means we might sometimes fail to push a clause as |
551 | * far down as it could safely be pushed. It's unclear whether it would be |
552 | * worthwhile to do this more precisely. (But if it's ever fixed to be |
553 | * exactly accurate, there's an Assert in get_joinrel_parampathinfo() that |
554 | * should be re-enabled.) |
555 | * |
556 | * There's no check here equivalent to join_clause_is_movable_to's test on |
557 | * lateral_referencers. We assume the caller wouldn't be inquiring unless |
558 | * it'd verified that the proposed outer rels don't have lateral references |
559 | * to the current rel(s). (If we are considering join paths with the outer |
560 | * rels on the outside and the current rels on the inside, then this should |
561 | * have been checked at the outset of such consideration; see join_is_legal |
562 | * and the path parameterization checks in joinpath.c.) On the other hand, |
563 | * in join_clause_is_movable_to we are asking whether the clause could be |
564 | * moved for some valid set of outer rels, so we don't have the benefit of |
565 | * relying on prior checks for lateral-reference validity. |
566 | * |
567 | * Note: if this returns true, it means that the clause could be moved to |
568 | * this join relation, but that doesn't mean that this is the lowest join |
569 | * it could be moved to. Caller may need to make additional calls to verify |
570 | * that this doesn't succeed on either of the inputs of a proposed join. |
571 | * |
572 | * Note: get_joinrel_parampathinfo depends on the fact that if |
573 | * current_and_outer is NULL, this function will always return false |
574 | * (since one or the other of the first two tests must fail). |
575 | */ |
576 | bool |
577 | join_clause_is_movable_into(RestrictInfo *rinfo, |
578 | Relids currentrelids, |
579 | Relids current_and_outer) |
580 | { |
581 | /* Clause must be evaluable given available context */ |
582 | if (!bms_is_subset(rinfo->clause_relids, current_and_outer)) |
583 | return false; |
584 | |
585 | /* Clause must physically reference at least one target rel */ |
586 | if (!bms_overlap(currentrelids, rinfo->clause_relids)) |
587 | return false; |
588 | |
589 | /* Cannot move an outer-join clause into the join's outer side */ |
590 | if (bms_overlap(currentrelids, rinfo->outer_relids)) |
591 | return false; |
592 | |
593 | /* |
594 | * Target rel(s) must not be nullable below the clause. This is |
595 | * approximate, in the safe direction, because the current join might be |
596 | * above the join where the nulling would happen, in which case the clause |
597 | * would work correctly here. But we don't have enough info to be sure. |
598 | */ |
599 | if (bms_overlap(currentrelids, rinfo->nullable_relids)) |
600 | return false; |
601 | |
602 | return true; |
603 | } |
604 | |