1/*-------------------------------------------------------------------------
2 *
3 * orclauses.c
4 * Routines to extract restriction OR clauses from join OR clauses
5 *
6 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7 * Portions Copyright (c) 1994, Regents of the University of California
8 *
9 *
10 * IDENTIFICATION
11 * src/backend/optimizer/util/orclauses.c
12 *
13 *-------------------------------------------------------------------------
14 */
15
16#include "postgres.h"
17
18#include "nodes/makefuncs.h"
19#include "nodes/nodeFuncs.h"
20#include "optimizer/clauses.h"
21#include "optimizer/cost.h"
22#include "optimizer/optimizer.h"
23#include "optimizer/orclauses.h"
24#include "optimizer/restrictinfo.h"
25
26
27static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel);
28static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel);
29static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
30 Expr *orclause, RestrictInfo *join_or_rinfo);
31
32
33/*
34 * extract_restriction_or_clauses
35 * Examine join OR-of-AND clauses to see if any useful restriction OR
36 * clauses can be extracted. If so, add them to the query.
37 *
38 * Although a join clause must reference multiple relations overall,
39 * an OR of ANDs clause might contain sub-clauses that reference just one
40 * relation and can be used to build a restriction clause for that rel.
41 * For example consider
42 * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
43 * We can transform this into
44 * WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
45 * AND (a.x = 42 OR a.x = 44)
46 * AND (b.y = 43 OR b.z = 45);
47 * which allows the latter clauses to be applied during the scans of a and b,
48 * perhaps as index qualifications, and in any case reducing the number of
49 * rows arriving at the join. In essence this is a partial transformation to
50 * CNF (AND of ORs format). It is not complete, however, because we do not
51 * unravel the original OR --- doing so would usually bloat the qualification
52 * expression to little gain.
53 *
54 * The added quals are partially redundant with the original OR, and therefore
55 * would cause the size of the joinrel to be underestimated when it is finally
56 * formed. (This would be true of a full transformation to CNF as well; the
57 * fault is not really in the transformation, but in clauselist_selectivity's
58 * inability to recognize redundant conditions.) We can compensate for this
59 * redundancy by changing the cached selectivity of the original OR clause,
60 * canceling out the (valid) reduction in the estimated sizes of the base
61 * relations so that the estimated joinrel size remains the same. This is
62 * a MAJOR HACK: it depends on the fact that clause selectivities are cached
63 * and on the fact that the same RestrictInfo node will appear in every
64 * joininfo list that might be used when the joinrel is formed.
65 * And it doesn't work in cases where the size estimation is nonlinear
66 * (i.e., outer and IN joins). But it beats not doing anything.
67 *
68 * We examine each base relation to see if join clauses associated with it
69 * contain extractable restriction conditions. If so, add those conditions
70 * to the rel's baserestrictinfo and update the cached selectivities of the
71 * join clauses. Note that the same join clause will be examined afresh
72 * from the point of view of each baserel that participates in it, so its
73 * cached selectivity may get updated multiple times.
74 */
75void
76extract_restriction_or_clauses(PlannerInfo *root)
77{
78 Index rti;
79
80 /* Examine each baserel for potential join OR clauses */
81 for (rti = 1; rti < root->simple_rel_array_size; rti++)
82 {
83 RelOptInfo *rel = root->simple_rel_array[rti];
84 ListCell *lc;
85
86 /* there may be empty slots corresponding to non-baserel RTEs */
87 if (rel == NULL)
88 continue;
89
90 Assert(rel->relid == rti); /* sanity check on array */
91
92 /* ignore RTEs that are "other rels" */
93 if (rel->reloptkind != RELOPT_BASEREL)
94 continue;
95
96 /*
97 * Find potentially interesting OR joinclauses. We can use any
98 * joinclause that is considered safe to move to this rel by the
99 * parameterized-path machinery, even though what we are going to do
100 * with it is not exactly a parameterized path.
101 *
102 * However, it seems best to ignore clauses that have been marked
103 * redundant (by setting norm_selec > 1). That likely can't happen
104 * for OR clauses, but let's be safe.
105 */
106 foreach(lc, rel->joininfo)
107 {
108 RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc);
109
110 if (restriction_is_or_clause(rinfo) &&
111 join_clause_is_movable_to(rinfo, rel) &&
112 rinfo->norm_selec <= 1)
113 {
114 /* Try to extract a qual for this rel only */
115 Expr *orclause = extract_or_clause(rinfo, rel);
116
117 /*
118 * If successful, decide whether we want to use the clause,
119 * and insert it into the rel's restrictinfo list if so.
120 */
121 if (orclause)
122 consider_new_or_clause(root, rel, orclause, rinfo);
123 }
124 }
125 }
126}
127
128/*
129 * Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
130 */
131static bool
132is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel)
133{
134 /*
135 * We want clauses that mention the rel, and only the rel. So in
136 * particular pseudoconstant clauses can be rejected quickly. Then check
137 * the clause's Var membership.
138 */
139 if (rinfo->pseudoconstant)
140 return false;
141 if (!bms_equal(rinfo->clause_relids, rel->relids))
142 return false;
143
144 /* We don't want extra evaluations of any volatile functions */
145 if (contain_volatile_functions((Node *) rinfo->clause))
146 return false;
147
148 return true;
149}
150
151/*
152 * Try to extract a restriction clause mentioning only "rel" from the given
153 * join OR-clause.
154 *
155 * We must be able to extract at least one qual for this rel from each of
156 * the arms of the OR, else we can't use it.
157 *
158 * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
159 * if no OR clause could be extracted.
160 */
161static Expr *
162extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel)
163{
164 List *clauselist = NIL;
165 ListCell *lc;
166
167 /*
168 * Scan each arm of the input OR clause. Notice we descend into
169 * or_rinfo->orclause, which has RestrictInfo nodes embedded below the
170 * toplevel OR/AND structure. This is useful because we can use the info
171 * in those nodes to make is_safe_restriction_clause_for()'s checks
172 * cheaper. We'll strip those nodes from the returned tree, though,
173 * meaning that fresh ones will be built if the clause is accepted as a
174 * restriction clause. This might seem wasteful --- couldn't we re-use
175 * the existing RestrictInfos? But that'd require assuming that
176 * selectivity and other cached data is computed exactly the same way for
177 * a restriction clause as for a join clause, which seems undesirable.
178 */
179 Assert(is_orclause(or_rinfo->orclause));
180 foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
181 {
182 Node *orarg = (Node *) lfirst(lc);
183 List *subclauses = NIL;
184 Node *subclause;
185
186 /* OR arguments should be ANDs or sub-RestrictInfos */
187 if (is_andclause(orarg))
188 {
189 List *andargs = ((BoolExpr *) orarg)->args;
190 ListCell *lc2;
191
192 foreach(lc2, andargs)
193 {
194 RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
195
196 if (restriction_is_or_clause(rinfo))
197 {
198 /*
199 * Recurse to deal with nested OR. Note we *must* recurse
200 * here, this isn't just overly-tense optimization: we
201 * have to descend far enough to find and strip all
202 * RestrictInfos in the expression.
203 */
204 Expr *suborclause;
205
206 suborclause = extract_or_clause(rinfo, rel);
207 if (suborclause)
208 subclauses = lappend(subclauses, suborclause);
209 }
210 else if (is_safe_restriction_clause_for(rinfo, rel))
211 subclauses = lappend(subclauses, rinfo->clause);
212 }
213 }
214 else
215 {
216 RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
217
218 Assert(!restriction_is_or_clause(rinfo));
219 if (is_safe_restriction_clause_for(rinfo, rel))
220 subclauses = lappend(subclauses, rinfo->clause);
221 }
222
223 /*
224 * If nothing could be extracted from this arm, we can't do anything
225 * with this OR clause.
226 */
227 if (subclauses == NIL)
228 return NULL;
229
230 /*
231 * OK, add subclause(s) to the result OR. If we found more than one,
232 * we need an AND node. But if we found only one, and it is itself an
233 * OR node, add its subclauses to the result instead; this is needed
234 * to preserve AND/OR flatness (ie, no OR directly underneath OR).
235 */
236 subclause = (Node *) make_ands_explicit(subclauses);
237 if (is_orclause(subclause))
238 clauselist = list_concat(clauselist,
239 list_copy(((BoolExpr *) subclause)->args));
240 else
241 clauselist = lappend(clauselist, subclause);
242 }
243
244 /*
245 * If we got a restriction clause from every arm, wrap them up in an OR
246 * node. (In theory the OR node might be unnecessary, if there was only
247 * one arm --- but then the input OR node was also redundant.)
248 */
249 if (clauselist != NIL)
250 return make_orclause(clauselist);
251 return NULL;
252}
253
254/*
255 * Consider whether a successfully-extracted restriction OR clause is
256 * actually worth using. If so, add it to the planner's data structures,
257 * and adjust the original join clause (join_or_rinfo) to compensate.
258 */
259static void
260consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel,
261 Expr *orclause, RestrictInfo *join_or_rinfo)
262{
263 RestrictInfo *or_rinfo;
264 Selectivity or_selec,
265 orig_selec;
266
267 /*
268 * Build a RestrictInfo from the new OR clause. We can assume it's valid
269 * as a base restriction clause.
270 */
271 or_rinfo = make_restrictinfo(orclause,
272 true,
273 false,
274 false,
275 join_or_rinfo->security_level,
276 NULL,
277 NULL,
278 NULL);
279
280 /*
281 * Estimate its selectivity. (We could have done this earlier, but doing
282 * it on the RestrictInfo representation allows the result to get cached,
283 * saving work later.)
284 */
285 or_selec = clause_selectivity(root, (Node *) or_rinfo,
286 0, JOIN_INNER, NULL);
287
288 /*
289 * The clause is only worth adding to the query if it rejects a useful
290 * fraction of the base relation's rows; otherwise, it's just going to
291 * cause duplicate computation (since we will still have to check the
292 * original OR clause when the join is formed). Somewhat arbitrarily, we
293 * set the selectivity threshold at 0.9.
294 */
295 if (or_selec > 0.9)
296 return; /* forget it */
297
298 /*
299 * OK, add it to the rel's restriction-clause list.
300 */
301 rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
302 rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
303 or_rinfo->security_level);
304
305 /*
306 * Adjust the original join OR clause's cached selectivity to compensate
307 * for the selectivity of the added (but redundant) lower-level qual. This
308 * should result in the join rel getting approximately the same rows
309 * estimate as it would have gotten without all these shenanigans.
310 *
311 * XXX major hack alert: this depends on the assumption that the
312 * selectivity will stay cached.
313 *
314 * XXX another major hack: we adjust only norm_selec, the cached
315 * selectivity for JOIN_INNER semantics, even though the join clause
316 * might've been an outer-join clause. This is partly because we can't
317 * easily identify the relevant SpecialJoinInfo here, and partly because
318 * the linearity assumption we're making would fail anyway. (If it is an
319 * outer-join clause, "rel" must be on the nullable side, else we'd not
320 * have gotten here. So the computation of the join size is going to be
321 * quite nonlinear with respect to the size of "rel", so it's not clear
322 * how we ought to adjust outer_selec even if we could compute its
323 * original value correctly.)
324 */
325 if (or_selec > 0)
326 {
327 SpecialJoinInfo sjinfo;
328
329 /*
330 * Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare
331 * approx_tuple_count() in costsize.c.)
332 */
333 sjinfo.type = T_SpecialJoinInfo;
334 sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids,
335 rel->relids);
336 sjinfo.min_righthand = rel->relids;
337 sjinfo.syn_lefthand = sjinfo.min_lefthand;
338 sjinfo.syn_righthand = sjinfo.min_righthand;
339 sjinfo.jointype = JOIN_INNER;
340 /* we don't bother trying to make the remaining fields valid */
341 sjinfo.lhs_strict = false;
342 sjinfo.delay_upper_joins = false;
343 sjinfo.semi_can_btree = false;
344 sjinfo.semi_can_hash = false;
345 sjinfo.semi_operators = NIL;
346 sjinfo.semi_rhs_exprs = NIL;
347
348 /* Compute inner-join size */
349 orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
350 0, JOIN_INNER, &sjinfo);
351
352 /* And hack cached selectivity so join size remains the same */
353 join_or_rinfo->norm_selec = orig_selec / or_selec;
354 /* ensure result stays in sane range, in particular not "redundant" */
355 if (join_or_rinfo->norm_selec > 1)
356 join_or_rinfo->norm_selec = 1;
357 /* as explained above, we don't touch outer_selec */
358 }
359}
360