orclauses.c source code [PostgreSQL/src/backend/optimizer/util/orclauses.c]

1	/-------------------------------------------------------------------------*
2	*
3	* orclauses.c
4	* Routines to extract restriction OR clauses from join OR clauses
5	*
6	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
7	* Portions Copyright (c) 1994, Regents of the University of California
8	*
9	*
10	* IDENTIFICATION
11	* src/backend/optimizer/util/orclauses.c
12	*
13	*-------------------------------------------------------------------------
14	*/
15
16	#include "postgres.h"
17
18	#include "nodes/makefuncs.h"
19	#include "nodes/nodeFuncs.h"
20	#include "optimizer/clauses.h"
21	#include "optimizer/cost.h"
22	#include "optimizer/optimizer.h"
23	#include "optimizer/orclauses.h"
24	#include "optimizer/restrictinfo.h"
25
26
27	static bool is_safe_restriction_clause_for(RestrictInfo rinfo, RelOptInfo rel);
28	static Expr extract_or_clause(RestrictInfo or_rinfo, RelOptInfo *rel);
29	static void consider_new_or_clause(PlannerInfo root, RelOptInfo rel,
30	Expr orclause, RestrictInfo join_or_rinfo);
31
32
33	/*
34	* extract_restriction_or_clauses
35	* Examine join OR-of-AND clauses to see if any useful restriction OR
36	* clauses can be extracted. If so, add them to the query.
37	*
38	* Although a join clause must reference multiple relations overall,
39	* an OR of ANDs clause might contain sub-clauses that reference just one
40	* relation and can be used to build a restriction clause for that rel.
41	* For example consider
42	* WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45));
43	* We can transform this into
44	* WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45))
45	* AND (a.x = 42 OR a.x = 44)
46	* AND (b.y = 43 OR b.z = 45);
47	* which allows the latter clauses to be applied during the scans of a and b,
48	* perhaps as index qualifications, and in any case reducing the number of
49	* rows arriving at the join. In essence this is a partial transformation to
50	* CNF (AND of ORs format). It is not complete, however, because we do not
51	* unravel the original OR --- doing so would usually bloat the qualification
52	* expression to little gain.
53	*
54	* The added quals are partially redundant with the original OR, and therefore
55	* would cause the size of the joinrel to be underestimated when it is finally
56	* formed. (This would be true of a full transformation to CNF as well; the
57	* fault is not really in the transformation, but in clauselist_selectivity's
58	* inability to recognize redundant conditions.) We can compensate for this
59	* redundancy by changing the cached selectivity of the original OR clause,
60	* canceling out the (valid) reduction in the estimated sizes of the base
61	* relations so that the estimated joinrel size remains the same. This is
62	* a MAJOR HACK: it depends on the fact that clause selectivities are cached
63	* and on the fact that the same RestrictInfo node will appear in every
64	* joininfo list that might be used when the joinrel is formed.
65	* And it doesn't work in cases where the size estimation is nonlinear
66	* (i.e., outer and IN joins). But it beats not doing anything.
67	*
68	* We examine each base relation to see if join clauses associated with it
69	* contain extractable restriction conditions. If so, add those conditions
70	* to the rel's baserestrictinfo and update the cached selectivities of the
71	* join clauses. Note that the same join clause will be examined afresh
72	* from the point of view of each baserel that participates in it, so its
73	* cached selectivity may get updated multiple times.
74	*/
75	void
76	extract_restriction_or_clauses(PlannerInfo *root)
77	{
78	Index rti;
79
80	/ Examine each baserel for potential join OR clauses /
81	for (rti = `1`; rti < root->simple_rel_array_size; rti++)
82	{
83	RelOptInfo *rel = root->simple_rel_array[rti];
84	ListCell *lc;
85
86	/ there may be empty slots corresponding to non-baserel RTEs /
87	if (rel == NULL)
88	continue;
89
90	Assert(rel->relid == rti); / sanity check on array /
91
92	/ ignore RTEs that are "other rels" /
93	if (rel->reloptkind != RELOPT_BASEREL)
94	continue;
95
96	/*
97	* Find potentially interesting OR joinclauses. We can use any
98	* joinclause that is considered safe to move to this rel by the
99	* parameterized-path machinery, even though what we are going to do
100	* with it is not exactly a parameterized path.
101	*
102	* However, it seems best to ignore clauses that have been marked
103	* redundant (by setting norm_selec > 1). That likely can't happen
104	* for OR clauses, but let's be safe.
105	*/
106	foreach(lc, rel->joininfo)
107	{
108	RestrictInfo rinfo = (RestrictInfo ) lfirst(lc);
109
110	if (restriction_is_or_clause(rinfo) &&
111	join_clause_is_movable_to(rinfo, rel) &&
112	rinfo->norm_selec <= `1`)
113	{
114	/ Try to extract a qual for this rel only /
115	Expr *orclause = extract_or_clause(rinfo, rel);
116
117	/*
118	* If successful, decide whether we want to use the clause,
119	* and insert it into the rel's restrictinfo list if so.
120	*/
121	if (orclause)
122	consider_new_or_clause(root, rel, orclause, rinfo);
123	}
124	}
125	}
126	}
127
128	/*
129	* Is the given primitive (non-OR) RestrictInfo safe to move to the rel?
130	*/
131	static bool
132	is_safe_restriction_clause_for(RestrictInfo rinfo, RelOptInfo rel)
133	{
134	/*
135	* We want clauses that mention the rel, and only the rel. So in
136	* particular pseudoconstant clauses can be rejected quickly. Then check
137	* the clause's Var membership.
138	*/
139	if (rinfo->pseudoconstant)
140	return false;
141	if (!bms_equal(rinfo->clause_relids, rel->relids))
142	return false;
143
144	/ We don't want extra evaluations of any volatile functions /
145	if (contain_volatile_functions((Node *) rinfo->clause))
146	return false;
147
148	return true;
149	}
150
151	/*
152	* Try to extract a restriction clause mentioning only "rel" from the given
153	* join OR-clause.
154	*
155	* We must be able to extract at least one qual for this rel from each of
156	* the arms of the OR, else we can't use it.
157	*
158	* Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL
159	* if no OR clause could be extracted.
160	*/
161	static Expr *
162	extract_or_clause(RestrictInfo or_rinfo, RelOptInfo rel)
163	{
164	List *clauselist = NIL;
165	ListCell *lc;
166
167	/*
168	* Scan each arm of the input OR clause. Notice we descend into
169	* or_rinfo->orclause, which has RestrictInfo nodes embedded below the
170	* toplevel OR/AND structure. This is useful because we can use the info
171	* in those nodes to make is_safe_restriction_clause_for()'s checks
172	* cheaper. We'll strip those nodes from the returned tree, though,
173	* meaning that fresh ones will be built if the clause is accepted as a
174	* restriction clause. This might seem wasteful --- couldn't we re-use
175	* the existing RestrictInfos? But that'd require assuming that
176	* selectivity and other cached data is computed exactly the same way for
177	* a restriction clause as for a join clause, which seems undesirable.
178	*/
179	Assert(is_orclause(or_rinfo->orclause));
180	foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args)
181	{
182	Node orarg = (Node ) lfirst(lc);
183	List *subclauses = NIL;
184	Node *subclause;
185
186	/ OR arguments should be ANDs or sub-RestrictInfos /
187	if (is_andclause(orarg))
188	{
189	List andargs = ((BoolExpr ) orarg)->args;
190	ListCell *lc2;
191
192	foreach(lc2, andargs)
193	{
194	RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2);
195
196	if (restriction_is_or_clause(rinfo))
197	{
198	/*
199	* Recurse to deal with nested OR. Note we must recurse
200	* here, this isn't just overly-tense optimization: we
201	* have to descend far enough to find and strip all
202	* RestrictInfos in the expression.
203	*/
204	Expr *suborclause;
205
206	suborclause = extract_or_clause(rinfo, rel);
207	if (suborclause)
208	subclauses = lappend(subclauses, suborclause);
209	}
210	else if (is_safe_restriction_clause_for(rinfo, rel))
211	subclauses = lappend(subclauses, rinfo->clause);
212	}
213	}
214	else
215	{
216	RestrictInfo *rinfo = castNode(RestrictInfo, orarg);
217
218	Assert(!restriction_is_or_clause(rinfo));
219	if (is_safe_restriction_clause_for(rinfo, rel))
220	subclauses = lappend(subclauses, rinfo->clause);
221	}
222
223	/*
224	* If nothing could be extracted from this arm, we can't do anything
225	* with this OR clause.
226	*/
227	if (subclauses == NIL)
228	return NULL;
229
230	/*
231	* OK, add subclause(s) to the result OR. If we found more than one,
232	* we need an AND node. But if we found only one, and it is itself an
233	* OR node, add its subclauses to the result instead; this is needed
234	* to preserve AND/OR flatness (ie, no OR directly underneath OR).
235	*/
236	subclause = (Node *) make_ands_explicit(subclauses);
237	if (is_orclause(subclause))
238	clauselist = list_concat(clauselist,
239	list_copy(((BoolExpr *) subclause)->args));
240	else
241	clauselist = lappend(clauselist, subclause);
242	}
243
244	/*
245	* If we got a restriction clause from every arm, wrap them up in an OR
246	* node. (In theory the OR node might be unnecessary, if there was only
247	* one arm --- but then the input OR node was also redundant.)
248	*/
249	if (clauselist != NIL)
250	return make_orclause(clauselist);
251	return NULL;
252	}
253
254	/*
255	* Consider whether a successfully-extracted restriction OR clause is
256	* actually worth using. If so, add it to the planner's data structures,
257	* and adjust the original join clause (join_or_rinfo) to compensate.
258	*/
259	static void
260	consider_new_or_clause(PlannerInfo root, RelOptInfo rel,
261	Expr orclause, RestrictInfo join_or_rinfo)
262	{
263	RestrictInfo *or_rinfo;
264	Selectivity or_selec,
265	orig_selec;
266
267	/*
268	* Build a RestrictInfo from the new OR clause. We can assume it's valid
269	* as a base restriction clause.
270	*/
271	or_rinfo = make_restrictinfo(orclause,
272	true,
273	false,
274	false,
275	join_or_rinfo->security_level,
276	NULL,
277	NULL,
278	NULL);
279
280	/*
281	* Estimate its selectivity. (We could have done this earlier, but doing
282	* it on the RestrictInfo representation allows the result to get cached,
283	* saving work later.)
284	*/
285	or_selec = clause_selectivity(root, (Node *) or_rinfo,
286	`0`, JOIN_INNER, NULL);
287
288	/*
289	* The clause is only worth adding to the query if it rejects a useful
290	* fraction of the base relation's rows; otherwise, it's just going to
291	* cause duplicate computation (since we will still have to check the
292	* original OR clause when the join is formed). Somewhat arbitrarily, we
293	* set the selectivity threshold at 0.9.
294	*/
295	if (or_selec > `0.9`)
296	return; / forget it /
297
298	/*
299	* OK, add it to the rel's restriction-clause list.
300	*/
301	rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo);
302	rel->baserestrict_min_security = Min(rel->baserestrict_min_security,
303	or_rinfo->security_level);
304
305	/*
306	* Adjust the original join OR clause's cached selectivity to compensate
307	* for the selectivity of the added (but redundant) lower-level qual. This
308	* should result in the join rel getting approximately the same rows
309	* estimate as it would have gotten without all these shenanigans.
310	*
311	* XXX major hack alert: this depends on the assumption that the
312	* selectivity will stay cached.
313	*
314	* XXX another major hack: we adjust only norm_selec, the cached
315	* selectivity for JOIN_INNER semantics, even though the join clause
316	* might've been an outer-join clause. This is partly because we can't
317	* easily identify the relevant SpecialJoinInfo here, and partly because
318	* the linearity assumption we're making would fail anyway. (If it is an
319	* outer-join clause, "rel" must be on the nullable side, else we'd not
320	* have gotten here. So the computation of the join size is going to be
321	* quite nonlinear with respect to the size of "rel", so it's not clear
322	* how we ought to adjust outer_selec even if we could compute its
323	* original value correctly.)
324	*/
325	if (or_selec > `0`)
326	{
327	SpecialJoinInfo sjinfo;
328
329	/*
330	* Make up a SpecialJoinInfo for JOIN_INNER semantics. (Compare
331	* approx_tuple_count() in costsize.c.)
332	*/
333	sjinfo.type = T_SpecialJoinInfo;
334	sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids,
335	rel->relids);
336	sjinfo.min_righthand = rel->relids;
337	sjinfo.syn_lefthand = sjinfo.min_lefthand;
338	sjinfo.syn_righthand = sjinfo.min_righthand;
339	sjinfo.jointype = JOIN_INNER;
340	/ we don't bother trying to make the remaining fields valid /
341	sjinfo.lhs_strict = false;
342	sjinfo.delay_upper_joins = false;
343	sjinfo.semi_can_btree = false;
344	sjinfo.semi_can_hash = false;
345	sjinfo.semi_operators = NIL;
346	sjinfo.semi_rhs_exprs = NIL;
347
348	/ Compute inner-join size /
349	orig_selec = clause_selectivity(root, (Node *) join_or_rinfo,
350	`0`, JOIN_INNER, &sjinfo);
351
352	/ And hack cached selectivity so join size remains the same /
353	join_or_rinfo->norm_selec = orig_selec / or_selec;
354	/ ensure result stays in sane range, in particular not "redundant" /
355	if (join_or_rinfo->norm_selec > `1`)
356	join_or_rinfo->norm_selec = `1`;
357	/ as explained above, we don't touch outer_selec /
358	}
359	}
360

Browse the source code of PostgreSQL/src/backend/optimizer/util/orclauses.c