prepqual.c source code [PostgreSQL/src/backend/optimizer/prep/prepqual.c]

1	/-------------------------------------------------------------------------*
2	*
3	* prepqual.c
4	* Routines for preprocessing qualification expressions
5	*
6	*
7	* While the parser will produce flattened (N-argument) AND/OR trees from
8	* simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
9	* directly underneath another AND, or OR underneath OR, if the input was
10	* oddly parenthesized. Also, rule expansion and subquery flattening could
11	* produce such parsetrees. The planner wants to flatten all such cases
12	* to ensure consistent optimization behavior.
13	*
14	* Formerly, this module was responsible for doing the initial flattening,
15	* but now we leave it to eval_const_expressions to do that since it has to
16	* make a complete pass over the expression tree anyway. Instead, we just
17	* have to ensure that our manipulations preserve AND/OR flatness.
18	* pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
19	* tree after local transformations that might introduce nested AND/ORs.
20	*
21	*
22	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
23	* Portions Copyright (c) 1994, Regents of the University of California
24	*
25	*
26	* IDENTIFICATION
27	* src/backend/optimizer/prep/prepqual.c
28	*
29	*-------------------------------------------------------------------------
30	*/
31
32	#include "postgres.h"
33
34	#include "nodes/makefuncs.h"
35	#include "nodes/nodeFuncs.h"
36	#include "optimizer/optimizer.h"
37	#include "optimizer/prep.h"
38	#include "utils/lsyscache.h"
39
40
41	static List pull_ands(List andlist);
42	static List pull_ors(List orlist);
43	static Expr find_duplicate_ors(Expr qual, bool is_check);
44	static Expr process_duplicate_ors(List orlist);
45
46
47	/*
48	* negate_clause
49	* Negate a Boolean expression.
50	*
51	* Input is a clause to be negated (e.g., the argument of a NOT clause).
52	* Returns a new clause equivalent to the negation of the given clause.
53	*
54	* Although this can be invoked on its own, it's mainly intended as a helper
55	* for eval_const_expressions(), and that context drives several design
56	* decisions. In particular, if the input is already AND/OR flat, we must
57	* preserve that property. We also don't bother to recurse in situations
58	* where we can assume that lower-level executions of eval_const_expressions
59	* would already have simplified sub-clauses of the input.
60	*
61	* The difference between this and a simple make_notclause() is that this
62	* tries to get rid of the NOT node by logical simplification. It's clearly
63	* always a win if the NOT node can be eliminated altogether. However, our
64	* use of DeMorgan's laws could result in having more NOT nodes rather than
65	* fewer. We do that unconditionally anyway, because in WHERE clauses it's
66	* important to expose as much top-level AND/OR structure as possible.
67	* Also, eliminating an intermediate NOT may allow us to flatten two levels
68	* of AND or OR together that we couldn't have otherwise. Finally, one of
69	* the motivations for doing this is to ensure that logically equivalent
70	* expressions will be seen as physically equal(), so we should always apply
71	* the same transformations.
72	*/
73	Node *
74	negate_clause(Node *node)
75	{
76	if (node == NULL) / should not happen /
77	elog(ERROR, "can't negate an empty subexpression");
78	switch (nodeTag(node))
79	{
80	case T_Const:
81	{
82	Const c = (Const ) node;
83
84	/ NOT NULL is still NULL /
85	if (c->constisnull)
86	return makeBoolConst(false, true);
87	/ otherwise pretty easy /
88	return makeBoolConst(!DatumGetBool(c->constvalue), false);
89	}
90	break;
91	case T_OpExpr:
92	{
93	/*
94	* Negate operator if possible: (NOT (< A B)) => (>= A B)
95	*/
96	OpExpr opexpr = (OpExpr ) node;
97	Oid negator = get_negator(opexpr->opno);
98
99	if (negator)
100	{
101	OpExpr *newopexpr = makeNode(OpExpr);
102
103	newopexpr->opno = negator;
104	newopexpr->opfuncid = InvalidOid;
105	newopexpr->opresulttype = opexpr->opresulttype;
106	newopexpr->opretset = opexpr->opretset;
107	newopexpr->opcollid = opexpr->opcollid;
108	newopexpr->inputcollid = opexpr->inputcollid;
109	newopexpr->args = opexpr->args;
110	newopexpr->location = opexpr->location;
111	return (Node *) newopexpr;
112	}
113	}
114	break;
115	case T_ScalarArrayOpExpr:
116	{
117	/*
118	* Negate a ScalarArrayOpExpr if its operator has a negator;
119	* for example x = ANY (list) becomes x <> ALL (list)
120	*/
121	ScalarArrayOpExpr saopexpr = (ScalarArrayOpExpr ) node;
122	Oid negator = get_negator(saopexpr->opno);
123
124	if (negator)
125	{
126	ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
127
128	newopexpr->opno = negator;
129	newopexpr->opfuncid = InvalidOid;
130	newopexpr->useOr = !saopexpr->useOr;
131	newopexpr->inputcollid = saopexpr->inputcollid;
132	newopexpr->args = saopexpr->args;
133	newopexpr->location = saopexpr->location;
134	return (Node *) newopexpr;
135	}
136	}
137	break;
138	case T_BoolExpr:
139	{
140	BoolExpr expr = (BoolExpr ) node;
141
142	switch (expr->boolop)
143	{
144	/--------------------*
145	* Apply DeMorgan's Laws:
146	* (NOT (AND A B)) => (OR (NOT A) (NOT B))
147	* (NOT (OR A B)) => (AND (NOT A) (NOT B))
148	* i.e., swap AND for OR and negate each subclause.
149	*
150	* If the input is already AND/OR flat and has no NOT
151	* directly above AND or OR, this transformation preserves
152	* those properties. For example, if no direct child of
153	* the given AND clause is an AND or a NOT-above-OR, then
154	* the recursive calls of negate_clause() can't return any
155	* OR clauses. So we needn't call pull_ors() before
156	* building a new OR clause. Similarly for the OR case.
157	*--------------------
158	*/
159	case AND_EXPR:
160	{
161	List *nargs = NIL;
162	ListCell *lc;
163
164	foreach(lc, expr->args)
165	{
166	nargs = lappend(nargs,
167	negate_clause(lfirst(lc)));
168	}
169	return (Node *) make_orclause(nargs);
170	}
171	break;
172	case OR_EXPR:
173	{
174	List *nargs = NIL;
175	ListCell *lc;
176
177	foreach(lc, expr->args)
178	{
179	nargs = lappend(nargs,
180	negate_clause(lfirst(lc)));
181	}
182	return (Node *) make_andclause(nargs);
183	}
184	break;
185	case NOT_EXPR:
186
187	/*
188	* NOT underneath NOT: they cancel. We assume the
189	* input is already simplified, so no need to recurse.
190	*/
191	return (Node *) linitial(expr->args);
192	default:
193	elog(ERROR, "unrecognized boolop: %d",
194	(int) expr->boolop);
195	break;
196	}
197	}
198	break;
199	case T_NullTest:
200	{
201	NullTest expr = (NullTest ) node;
202
203	/*
204	* In the rowtype case, the two flavors of NullTest are not
205	* logical inverses, so we can't simplify. But it does work
206	* for scalar datatypes.
207	*/
208	if (!expr->argisrow)
209	{
210	NullTest *newexpr = makeNode(NullTest);
211
212	newexpr->arg = expr->arg;
213	newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
214	IS_NOT_NULL : IS_NULL);
215	newexpr->argisrow = expr->argisrow;
216	newexpr->location = expr->location;
217	return (Node *) newexpr;
218	}
219	}
220	break;
221	case T_BooleanTest:
222	{
223	BooleanTest expr = (BooleanTest ) node;
224	BooleanTest *newexpr = makeNode(BooleanTest);
225
226	newexpr->arg = expr->arg;
227	switch (expr->booltesttype)
228	{
229	case IS_TRUE:
230	newexpr->booltesttype = IS_NOT_TRUE;
231	break;
232	case IS_NOT_TRUE:
233	newexpr->booltesttype = IS_TRUE;
234	break;
235	case IS_FALSE:
236	newexpr->booltesttype = IS_NOT_FALSE;
237	break;
238	case IS_NOT_FALSE:
239	newexpr->booltesttype = IS_FALSE;
240	break;
241	case IS_UNKNOWN:
242	newexpr->booltesttype = IS_NOT_UNKNOWN;
243	break;
244	case IS_NOT_UNKNOWN:
245	newexpr->booltesttype = IS_UNKNOWN;
246	break;
247	default:
248	elog(ERROR, "unrecognized booltesttype: %d",
249	(int) expr->booltesttype);
250	break;
251	}
252	newexpr->location = expr->location;
253	return (Node *) newexpr;
254	}
255	break;
256	default:
257	/ else fall through /
258	break;
259	}
260
261	/*
262	* Otherwise we don't know how to simplify this, so just tack on an
263	* explicit NOT node.
264	*/
265	return (Node ) make_notclause((Expr ) node);
266	}
267
268
269	/*
270	* canonicalize_qual
271	* Convert a qualification expression to the most useful form.
272	*
273	* This is primarily intended to be used on top-level WHERE (or JOIN/ON)
274	* clauses. It can also be used on top-level CHECK constraints, for which
275	* pass is_check = true. DO NOT call it on any expression that is not known
276	* to be one or the other, as it might apply inappropriate simplifications.
277	*
278	* The name of this routine is a holdover from a time when it would try to
279	* force the expression into canonical AND-of-ORs or OR-of-ANDs form.
280	* Eventually, we recognized that that had more theoretical purity than
281	* actual usefulness, and so now the transformation doesn't involve any
282	* notion of reaching a canonical form.
283	*
284	* NOTE: we assume the input has already been through eval_const_expressions
285	* and therefore possesses AND/OR flatness. Formerly this function included
286	* its own flattening logic, but that requires a useless extra pass over the
287	* tree.
288	*
289	* Returns the modified qualification.
290	*/
291	Expr *
292	canonicalize_qual(Expr *qual, bool is_check)
293	{
294	Expr *newqual;
295
296	/ Quick exit for empty qual /
297	if (qual == NULL)
298	return NULL;
299
300	/ This should not be invoked on quals in implicit-AND format /
301	Assert(!IsA(qual, List));
302
303	/*
304	* Pull up redundant subclauses in OR-of-AND trees. We do this only
305	* within the top-level AND/OR structure; there's no point in looking
306	* deeper. Also remove any NULL constants in the top-level structure.
307	*/
308	newqual = find_duplicate_ors(qual, is_check);
309
310	return newqual;
311	}
312
313
314	/*
315	* pull_ands
316	* Recursively flatten nested AND clauses into a single and-clause list.
317	*
318	* Input is the arglist of an AND clause.
319	* Returns the rebuilt arglist (note original list structure is not touched).
320	*/
321	static List *
322	pull_ands(List *andlist)
323	{
324	List *out_list = NIL;
325	ListCell *arg;
326
327	foreach(arg, andlist)
328	{
329	Node subexpr = (Node ) lfirst(arg);
330
331	/*
332	* Note: we can destructively concat the subexpression's arglist
333	* because we know the recursive invocation of pull_ands will have
334	* built a new arglist not shared with any other expr. Otherwise we'd
335	* need a list_copy here.
336	*/
337	if (is_andclause(subexpr))
338	out_list = list_concat(out_list,
339	pull_ands(((BoolExpr *) subexpr)->args));
340	else
341	out_list = lappend(out_list, subexpr);
342	}
343	return out_list;
344	}
345
346	/*
347	* pull_ors
348	* Recursively flatten nested OR clauses into a single or-clause list.
349	*
350	* Input is the arglist of an OR clause.
351	* Returns the rebuilt arglist (note original list structure is not touched).
352	*/
353	static List *
354	pull_ors(List *orlist)
355	{
356	List *out_list = NIL;
357	ListCell *arg;
358
359	foreach(arg, orlist)
360	{
361	Node subexpr = (Node ) lfirst(arg);
362
363	/*
364	* Note: we can destructively concat the subexpression's arglist
365	* because we know the recursive invocation of pull_ors will have
366	* built a new arglist not shared with any other expr. Otherwise we'd
367	* need a list_copy here.
368	*/
369	if (is_orclause(subexpr))
370	out_list = list_concat(out_list,
371	pull_ors(((BoolExpr *) subexpr)->args));
372	else
373	out_list = lappend(out_list, subexpr);
374	}
375	return out_list;
376	}
377
378
379	/--------------------*
380	* The following code attempts to apply the inverse OR distributive law:
381	* ((A AND B) OR (A AND C)) => (A AND (B OR C))
382	* That is, locate OR clauses in which every subclause contains an
383	* identical term, and pull out the duplicated terms.
384	*
385	* This may seem like a fairly useless activity, but it turns out to be
386	* applicable to many machine-generated queries, and there are also queries
387	* in some of the TPC benchmarks that need it. This was in fact almost the
388	* sole useful side-effect of the old prepqual code that tried to force
389	* the query into canonical AND-of-ORs form: the canonical equivalent of
390	* ((A AND B) OR (A AND C))
391	* is
392	* ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
393	* which the code was able to simplify to
394	* (A AND (A OR C) AND (B OR A) AND (B OR C))
395	* thus successfully extracting the common condition A --- but at the cost
396	* of cluttering the qual with many redundant clauses.
397	*--------------------
398	*/
399
400	/*
401	* find_duplicate_ors
402	* Given a qualification tree with the NOTs pushed down, search for
403	* OR clauses to which the inverse OR distributive law might apply.
404	* Only the top-level AND/OR structure is searched.
405	*
406	* While at it, we remove any NULL constants within the top-level AND/OR
407	* structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x".
408	* In general that would change the result, so eval_const_expressions can't
409	* do it; but at top level of WHERE, we don't need to distinguish between
410	* FALSE and NULL results, so it's valid to treat NULL::boolean the same
411	* as FALSE and then simplify AND/OR accordingly. Conversely, in a top-level
412	* CHECK constraint, we may treat a NULL the same as TRUE.
413	*
414	* Returns the modified qualification. AND/OR flatness is preserved.
415	*/
416	static Expr *
417	find_duplicate_ors(Expr *qual, bool is_check)
418	{
419	if (is_orclause(qual))
420	{
421	List *orlist = NIL;
422	ListCell *temp;
423
424	/ Recurse /
425	foreach(temp, ((BoolExpr *) qual)->args)
426	{
427	Expr arg = (Expr ) lfirst(temp);
428
429	arg = find_duplicate_ors(arg, is_check);
430
431	/ Get rid of any constant inputs /
432	if (arg && IsA(arg, Const))
433	{
434	Const carg = (Const ) arg;
435
436	if (is_check)
437	{
438	/ Within OR in CHECK, drop constant FALSE /
439	if (!carg->constisnull && !DatumGetBool(carg->constvalue))
440	continue;
441	/ Constant TRUE or NULL, so OR reduces to TRUE /
442	return (Expr *) makeBoolConst(true, false);
443	}
444	else
445	{
446	/ Within OR in WHERE, drop constant FALSE or NULL /
447	if (carg->constisnull \|\| !DatumGetBool(carg->constvalue))
448	continue;
449	/ Constant TRUE, so OR reduces to TRUE /
450	return arg;
451	}
452	}
453
454	orlist = lappend(orlist, arg);
455	}
456
457	/ Flatten any ORs pulled up to just below here /
458	orlist = pull_ors(orlist);
459
460	/ Now we can look for duplicate ORs /
461	return process_duplicate_ors(orlist);
462	}
463	else if (is_andclause(qual))
464	{
465	List *andlist = NIL;
466	ListCell *temp;
467
468	/ Recurse /
469	foreach(temp, ((BoolExpr *) qual)->args)
470	{
471	Expr arg = (Expr ) lfirst(temp);
472
473	arg = find_duplicate_ors(arg, is_check);
474
475	/ Get rid of any constant inputs /
476	if (arg && IsA(arg, Const))
477	{
478	Const carg = (Const ) arg;
479
480	if (is_check)
481	{
482	/ Within AND in CHECK, drop constant TRUE or NULL /
483	if (carg->constisnull \|\| DatumGetBool(carg->constvalue))
484	continue;
485	/ Constant FALSE, so AND reduces to FALSE /
486	return arg;
487	}
488	else
489	{
490	/ Within AND in WHERE, drop constant TRUE /
491	if (!carg->constisnull && DatumGetBool(carg->constvalue))
492	continue;
493	/ Constant FALSE or NULL, so AND reduces to FALSE /
494	return (Expr *) makeBoolConst(false, false);
495	}
496	}
497
498	andlist = lappend(andlist, arg);
499	}
500
501	/ Flatten any ANDs introduced just below here /
502	andlist = pull_ands(andlist);
503
504	/ AND of no inputs reduces to TRUE /
505	if (andlist == NIL)
506	return (Expr *) makeBoolConst(true, false);
507
508	/ Single-expression AND just reduces to that expression /
509	if (list_length(andlist) == `1`)
510	return (Expr *) linitial(andlist);
511
512	/ Else we still need an AND node /
513	return make_andclause(andlist);
514	}
515	else
516	return qual;
517	}
518
519	/*
520	* process_duplicate_ors
521	* Given a list of exprs which are ORed together, try to apply
522	* the inverse OR distributive law.
523	*
524	* Returns the resulting expression (could be an AND clause, an OR
525	* clause, or maybe even a single subexpression).
526	*/
527	static Expr *
528	process_duplicate_ors(List *orlist)
529	{
530	List *reference = NIL;
531	int num_subclauses = `0`;
532	List *winners;
533	List *neworlist;
534	ListCell *temp;
535
536	/ OR of no inputs reduces to FALSE /
537	if (orlist == NIL)
538	return (Expr *) makeBoolConst(false, false);
539
540	/ Single-expression OR just reduces to that expression /
541	if (list_length(orlist) == `1`)
542	return (Expr *) linitial(orlist);
543
544	/*
545	* Choose the shortest AND clause as the reference list --- obviously, any
546	* subclause not in this clause isn't in all the clauses. If we find a
547	* clause that's not an AND, we can treat it as a one-element AND clause,
548	* which necessarily wins as shortest.
549	*/
550	foreach(temp, orlist)
551	{
552	Expr clause = (Expr ) lfirst(temp);
553
554	if (is_andclause(clause))
555	{
556	List subclauses = ((BoolExpr ) clause)->args;
557	int nclauses = list_length(subclauses);
558
559	if (reference == NIL \|\| nclauses < num_subclauses)
560	{
561	reference = subclauses;
562	num_subclauses = nclauses;
563	}
564	}
565	else
566	{
567	reference = list_make1(clause);
568	break;
569	}
570	}
571
572	/*
573	* Just in case, eliminate any duplicates in the reference list.
574	*/
575	reference = list_union(NIL, reference);
576
577	/*
578	* Check each element of the reference list to see if it's in all the OR
579	* clauses. Build a new list of winning clauses.
580	*/
581	winners = NIL;
582	foreach(temp, reference)
583	{
584	Expr refclause = (Expr ) lfirst(temp);
585	bool win = true;
586	ListCell *temp2;
587
588	foreach(temp2, orlist)
589	{
590	Expr clause = (Expr ) lfirst(temp2);
591
592	if (is_andclause(clause))
593	{
594	if (!list_member(((BoolExpr *) clause)->args, refclause))
595	{
596	win = false;
597	break;
598	}
599	}
600	else
601	{
602	if (!equal(refclause, clause))
603	{
604	win = false;
605	break;
606	}
607	}
608	}
609
610	if (win)
611	winners = lappend(winners, refclause);
612	}
613
614	/*
615	* If no winners, we can't transform the OR
616	*/
617	if (winners == NIL)
618	return make_orclause(orlist);
619
620	/*
621	* Generate new OR list consisting of the remaining sub-clauses.
622	*
623	* If any clause degenerates to empty, then we have a situation like (A
624	* AND B) OR (A), which can be reduced to just A --- that is, the
625	* additional conditions in other arms of the OR are irrelevant.
626	*
627	* Note that because we use list_difference, any multiple occurrences of a
628	* winning clause in an AND sub-clause will be removed automatically.
629	*/
630	neworlist = NIL;
631	foreach(temp, orlist)
632	{
633	Expr clause = (Expr ) lfirst(temp);
634
635	if (is_andclause(clause))
636	{
637	List subclauses = ((BoolExpr ) clause)->args;
638
639	subclauses = list_difference(subclauses, winners);
640	if (subclauses != NIL)
641	{
642	if (list_length(subclauses) == `1`)
643	neworlist = lappend(neworlist, linitial(subclauses));
644	else
645	neworlist = lappend(neworlist, make_andclause(subclauses));
646	}
647	else
648	{
649	neworlist = NIL; / degenerate case, see above /
650	break;
651	}
652	}
653	else
654	{
655	if (!list_member(winners, clause))
656	neworlist = lappend(neworlist, clause);
657	else
658	{
659	neworlist = NIL; / degenerate case, see above /
660	break;
661	}
662	}
663	}
664
665	/*
666	* Append reduced OR to the winners list, if it's not degenerate, handling
667	* the special case of one element correctly (can that really happen?).
668	* Also be careful to maintain AND/OR flatness in case we pulled up a
669	* sub-sub-OR-clause.
670	*/
671	if (neworlist != NIL)
672	{
673	if (list_length(neworlist) == `1`)
674	winners = lappend(winners, linitial(neworlist));
675	else
676	winners = lappend(winners, make_orclause(pull_ors(neworlist)));
677	}
678
679	/*
680	* And return the constructed AND clause, again being wary of a single
681	* element and AND/OR flatness.
682	*/
683	if (list_length(winners) == `1`)
684	return (Expr *) linitial(winners);
685	else
686	return make_andclause(pull_ands(winners));
687	}
688

Browse the source code of PostgreSQL/src/backend/optimizer/prep/prepqual.c