1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * orclauses.c |
4 | * Routines to extract restriction OR clauses from join OR clauses |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/optimizer/util/orclauses.c |
12 | * |
13 | *------------------------------------------------------------------------- |
14 | */ |
15 | |
16 | #include "postgres.h" |
17 | |
18 | #include "nodes/makefuncs.h" |
19 | #include "nodes/nodeFuncs.h" |
20 | #include "optimizer/clauses.h" |
21 | #include "optimizer/cost.h" |
22 | #include "optimizer/optimizer.h" |
23 | #include "optimizer/orclauses.h" |
24 | #include "optimizer/restrictinfo.h" |
25 | |
26 | |
27 | static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel); |
28 | static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel); |
29 | static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, |
30 | Expr *orclause, RestrictInfo *join_or_rinfo); |
31 | |
32 | |
33 | /* |
34 | * extract_restriction_or_clauses |
35 | * Examine join OR-of-AND clauses to see if any useful restriction OR |
36 | * clauses can be extracted. If so, add them to the query. |
37 | * |
38 | * Although a join clause must reference multiple relations overall, |
39 | * an OR of ANDs clause might contain sub-clauses that reference just one |
40 | * relation and can be used to build a restriction clause for that rel. |
41 | * For example consider |
42 | * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)); |
43 | * We can transform this into |
44 | * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)) |
45 | * AND (a.x = 42 OR a.x = 44) |
46 | * AND (b.y = 43 OR b.z = 45); |
47 | * which allows the latter clauses to be applied during the scans of a and b, |
48 | * perhaps as index qualifications, and in any case reducing the number of |
49 | * rows arriving at the join. In essence this is a partial transformation to |
50 | * CNF (AND of ORs format). It is not complete, however, because we do not |
51 | * unravel the original OR --- doing so would usually bloat the qualification |
52 | * expression to little gain. |
53 | * |
54 | * The added quals are partially redundant with the original OR, and therefore |
55 | * would cause the size of the joinrel to be underestimated when it is finally |
56 | * formed. (This would be true of a full transformation to CNF as well; the |
57 | * fault is not really in the transformation, but in clauselist_selectivity's |
58 | * inability to recognize redundant conditions.) We can compensate for this |
59 | * redundancy by changing the cached selectivity of the original OR clause, |
60 | * canceling out the (valid) reduction in the estimated sizes of the base |
61 | * relations so that the estimated joinrel size remains the same. This is |
62 | * a MAJOR HACK: it depends on the fact that clause selectivities are cached |
63 | * and on the fact that the same RestrictInfo node will appear in every |
64 | * joininfo list that might be used when the joinrel is formed. |
65 | * And it doesn't work in cases where the size estimation is nonlinear |
66 | * (i.e., outer and IN joins). But it beats not doing anything. |
67 | * |
68 | * We examine each base relation to see if join clauses associated with it |
69 | * contain extractable restriction conditions. If so, add those conditions |
70 | * to the rel's baserestrictinfo and update the cached selectivities of the |
71 | * join clauses. Note that the same join clause will be examined afresh |
72 | * from the point of view of each baserel that participates in it, so its |
73 | * cached selectivity may get updated multiple times. |
74 | */ |
75 | void |
76 | (PlannerInfo *root) |
77 | { |
78 | Index rti; |
79 | |
80 | /* Examine each baserel for potential join OR clauses */ |
81 | for (rti = 1; rti < root->simple_rel_array_size; rti++) |
82 | { |
83 | RelOptInfo *rel = root->simple_rel_array[rti]; |
84 | ListCell *lc; |
85 | |
86 | /* there may be empty slots corresponding to non-baserel RTEs */ |
87 | if (rel == NULL) |
88 | continue; |
89 | |
90 | Assert(rel->relid == rti); /* sanity check on array */ |
91 | |
92 | /* ignore RTEs that are "other rels" */ |
93 | if (rel->reloptkind != RELOPT_BASEREL) |
94 | continue; |
95 | |
96 | /* |
97 | * Find potentially interesting OR joinclauses. We can use any |
98 | * joinclause that is considered safe to move to this rel by the |
99 | * parameterized-path machinery, even though what we are going to do |
100 | * with it is not exactly a parameterized path. |
101 | * |
102 | * However, it seems best to ignore clauses that have been marked |
103 | * redundant (by setting norm_selec > 1). That likely can't happen |
104 | * for OR clauses, but let's be safe. |
105 | */ |
106 | foreach(lc, rel->joininfo) |
107 | { |
108 | RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); |
109 | |
110 | if (restriction_is_or_clause(rinfo) && |
111 | join_clause_is_movable_to(rinfo, rel) && |
112 | rinfo->norm_selec <= 1) |
113 | { |
114 | /* Try to extract a qual for this rel only */ |
115 | Expr *orclause = extract_or_clause(rinfo, rel); |
116 | |
117 | /* |
118 | * If successful, decide whether we want to use the clause, |
119 | * and insert it into the rel's restrictinfo list if so. |
120 | */ |
121 | if (orclause) |
122 | consider_new_or_clause(root, rel, orclause, rinfo); |
123 | } |
124 | } |
125 | } |
126 | } |
127 | |
128 | /* |
129 | * Is the given primitive (non-OR) RestrictInfo safe to move to the rel? |
130 | */ |
131 | static bool |
132 | is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel) |
133 | { |
134 | /* |
135 | * We want clauses that mention the rel, and only the rel. So in |
136 | * particular pseudoconstant clauses can be rejected quickly. Then check |
137 | * the clause's Var membership. |
138 | */ |
139 | if (rinfo->pseudoconstant) |
140 | return false; |
141 | if (!bms_equal(rinfo->clause_relids, rel->relids)) |
142 | return false; |
143 | |
144 | /* We don't want extra evaluations of any volatile functions */ |
145 | if (contain_volatile_functions((Node *) rinfo->clause)) |
146 | return false; |
147 | |
148 | return true; |
149 | } |
150 | |
151 | /* |
152 | * Try to extract a restriction clause mentioning only "rel" from the given |
153 | * join OR-clause. |
154 | * |
155 | * We must be able to extract at least one qual for this rel from each of |
156 | * the arms of the OR, else we can't use it. |
157 | * |
158 | * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL |
159 | * if no OR clause could be extracted. |
160 | */ |
161 | static Expr * |
162 | (RestrictInfo *or_rinfo, RelOptInfo *rel) |
163 | { |
164 | List *clauselist = NIL; |
165 | ListCell *lc; |
166 | |
167 | /* |
168 | * Scan each arm of the input OR clause. Notice we descend into |
169 | * or_rinfo->orclause, which has RestrictInfo nodes embedded below the |
170 | * toplevel OR/AND structure. This is useful because we can use the info |
171 | * in those nodes to make is_safe_restriction_clause_for()'s checks |
172 | * cheaper. We'll strip those nodes from the returned tree, though, |
173 | * meaning that fresh ones will be built if the clause is accepted as a |
174 | * restriction clause. This might seem wasteful --- couldn't we re-use |
175 | * the existing RestrictInfos? But that'd require assuming that |
176 | * selectivity and other cached data is computed exactly the same way for |
177 | * a restriction clause as for a join clause, which seems undesirable. |
178 | */ |
179 | Assert(is_orclause(or_rinfo->orclause)); |
180 | foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args) |
181 | { |
182 | Node *orarg = (Node *) lfirst(lc); |
183 | List *subclauses = NIL; |
184 | Node *subclause; |
185 | |
186 | /* OR arguments should be ANDs or sub-RestrictInfos */ |
187 | if (is_andclause(orarg)) |
188 | { |
189 | List *andargs = ((BoolExpr *) orarg)->args; |
190 | ListCell *lc2; |
191 | |
192 | foreach(lc2, andargs) |
193 | { |
194 | RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); |
195 | |
196 | if (restriction_is_or_clause(rinfo)) |
197 | { |
198 | /* |
199 | * Recurse to deal with nested OR. Note we *must* recurse |
200 | * here, this isn't just overly-tense optimization: we |
201 | * have to descend far enough to find and strip all |
202 | * RestrictInfos in the expression. |
203 | */ |
204 | Expr *suborclause; |
205 | |
206 | suborclause = extract_or_clause(rinfo, rel); |
207 | if (suborclause) |
208 | subclauses = lappend(subclauses, suborclause); |
209 | } |
210 | else if (is_safe_restriction_clause_for(rinfo, rel)) |
211 | subclauses = lappend(subclauses, rinfo->clause); |
212 | } |
213 | } |
214 | else |
215 | { |
216 | RestrictInfo *rinfo = castNode(RestrictInfo, orarg); |
217 | |
218 | Assert(!restriction_is_or_clause(rinfo)); |
219 | if (is_safe_restriction_clause_for(rinfo, rel)) |
220 | subclauses = lappend(subclauses, rinfo->clause); |
221 | } |
222 | |
223 | /* |
224 | * If nothing could be extracted from this arm, we can't do anything |
225 | * with this OR clause. |
226 | */ |
227 | if (subclauses == NIL) |
228 | return NULL; |
229 | |
230 | /* |
231 | * OK, add subclause(s) to the result OR. If we found more than one, |
232 | * we need an AND node. But if we found only one, and it is itself an |
233 | * OR node, add its subclauses to the result instead; this is needed |
234 | * to preserve AND/OR flatness (ie, no OR directly underneath OR). |
235 | */ |
236 | subclause = (Node *) make_ands_explicit(subclauses); |
237 | if (is_orclause(subclause)) |
238 | clauselist = list_concat(clauselist, |
239 | list_copy(((BoolExpr *) subclause)->args)); |
240 | else |
241 | clauselist = lappend(clauselist, subclause); |
242 | } |
243 | |
244 | /* |
245 | * If we got a restriction clause from every arm, wrap them up in an OR |
246 | * node. (In theory the OR node might be unnecessary, if there was only |
247 | * one arm --- but then the input OR node was also redundant.) |
248 | */ |
249 | if (clauselist != NIL) |
250 | return make_orclause(clauselist); |
251 | return NULL; |
252 | } |
253 | |
254 | /* |
255 | * Consider whether a successfully-extracted restriction OR clause is |
256 | * actually worth using. If so, add it to the planner's data structures, |
257 | * and adjust the original join clause (join_or_rinfo) to compensate. |
258 | */ |
259 | static void |
260 | consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, |
261 | Expr *orclause, RestrictInfo *join_or_rinfo) |
262 | { |
263 | RestrictInfo *or_rinfo; |
264 | Selectivity or_selec, |
265 | orig_selec; |
266 | |
267 | /* |
268 | * Build a RestrictInfo from the new OR clause. We can assume it's valid |
269 | * as a base restriction clause. |
270 | */ |
271 | or_rinfo = make_restrictinfo(orclause, |
272 | true, |
273 | false, |
274 | false, |
275 | join_or_rinfo->security_level, |
276 | NULL, |
277 | NULL, |
278 | NULL); |
279 | |
280 | /* |
281 | * Estimate its selectivity. (We could have done this earlier, but doing |
282 | * it on the RestrictInfo representation allows the result to get cached, |
283 | * saving work later.) |
284 | */ |
285 | or_selec = clause_selectivity(root, (Node *) or_rinfo, |
286 | 0, JOIN_INNER, NULL); |
287 | |
288 | /* |
289 | * The clause is only worth adding to the query if it rejects a useful |
290 | * fraction of the base relation's rows; otherwise, it's just going to |
291 | * cause duplicate computation (since we will still have to check the |
292 | * original OR clause when the join is formed). Somewhat arbitrarily, we |
293 | * set the selectivity threshold at 0.9. |
294 | */ |
295 | if (or_selec > 0.9) |
296 | return; /* forget it */ |
297 | |
298 | /* |
299 | * OK, add it to the rel's restriction-clause list. |
300 | */ |
301 | rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo); |
302 | rel->baserestrict_min_security = Min(rel->baserestrict_min_security, |
303 | or_rinfo->security_level); |
304 | |
305 | /* |
306 | * Adjust the original join OR clause's cached selectivity to compensate |
307 | * for the selectivity of the added (but redundant) lower-level qual. This |
308 | * should result in the join rel getting approximately the same rows |
309 | * estimate as it would have gotten without all these shenanigans. |
310 | * |
311 | * XXX major hack alert: this depends on the assumption that the |
312 | * selectivity will stay cached. |
313 | * |
314 | * XXX another major hack: we adjust only norm_selec, the cached |
315 | * selectivity for JOIN_INNER semantics, even though the join clause |
316 | * might've been an outer-join clause. This is partly because we can't |
317 | * easily identify the relevant SpecialJoinInfo here, and partly because |
318 | * the linearity assumption we're making would fail anyway. (If it is an |
319 | * outer-join clause, "rel" must be on the nullable side, else we'd not |
320 | * have gotten here. So the computation of the join size is going to be |
321 | * quite nonlinear with respect to the size of "rel", so it's not clear |
322 | * how we ought to adjust outer_selec even if we could compute its |
323 | * original value correctly.) |
324 | */ |
325 | if (or_selec > 0) |
326 | { |
327 | SpecialJoinInfo sjinfo; |
328 | |
329 | /* |
330 | * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare |
331 | * approx_tuple_count() in costsize.c.) |
332 | */ |
333 | sjinfo.type = T_SpecialJoinInfo; |
334 | sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids, |
335 | rel->relids); |
336 | sjinfo.min_righthand = rel->relids; |
337 | sjinfo.syn_lefthand = sjinfo.min_lefthand; |
338 | sjinfo.syn_righthand = sjinfo.min_righthand; |
339 | sjinfo.jointype = JOIN_INNER; |
340 | /* we don't bother trying to make the remaining fields valid */ |
341 | sjinfo.lhs_strict = false; |
342 | sjinfo.delay_upper_joins = false; |
343 | sjinfo.semi_can_btree = false; |
344 | sjinfo.semi_can_hash = false; |
345 | sjinfo.semi_operators = NIL; |
346 | sjinfo.semi_rhs_exprs = NIL; |
347 | |
348 | /* Compute inner-join size */ |
349 | orig_selec = clause_selectivity(root, (Node *) join_or_rinfo, |
350 | 0, JOIN_INNER, &sjinfo); |
351 | |
352 | /* And hack cached selectivity so join size remains the same */ |
353 | join_or_rinfo->norm_selec = orig_selec / or_selec; |
354 | /* ensure result stays in sane range, in particular not "redundant" */ |
355 | if (join_or_rinfo->norm_selec > 1) |
356 | join_or_rinfo->norm_selec = 1; |
357 | /* as explained above, we don't touch outer_selec */ |
358 | } |
359 | } |
360 | |