1/*-------------------------------------------------------------------------
2 *
3 * prepqual.c
4 * Routines for preprocessing qualification expressions
5 *
6 *
7 * While the parser will produce flattened (N-argument) AND/OR trees from
8 * simple sequences of AND'ed or OR'ed clauses, there might be an AND clause
9 * directly underneath another AND, or OR underneath OR, if the input was
10 * oddly parenthesized. Also, rule expansion and subquery flattening could
11 * produce such parsetrees. The planner wants to flatten all such cases
12 * to ensure consistent optimization behavior.
13 *
14 * Formerly, this module was responsible for doing the initial flattening,
15 * but now we leave it to eval_const_expressions to do that since it has to
16 * make a complete pass over the expression tree anyway. Instead, we just
17 * have to ensure that our manipulations preserve AND/OR flatness.
18 * pull_ands() and pull_ors() are used to maintain flatness of the AND/OR
19 * tree after local transformations that might introduce nested AND/ORs.
20 *
21 *
22 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
23 * Portions Copyright (c) 1994, Regents of the University of California
24 *
25 *
26 * IDENTIFICATION
27 * src/backend/optimizer/prep/prepqual.c
28 *
29 *-------------------------------------------------------------------------
30 */
31
32#include "postgres.h"
33
34#include "nodes/makefuncs.h"
35#include "nodes/nodeFuncs.h"
36#include "optimizer/optimizer.h"
37#include "optimizer/prep.h"
38#include "utils/lsyscache.h"
39
40
41static List *pull_ands(List *andlist);
42static List *pull_ors(List *orlist);
43static Expr *find_duplicate_ors(Expr *qual, bool is_check);
44static Expr *process_duplicate_ors(List *orlist);
45
46
47/*
48 * negate_clause
49 * Negate a Boolean expression.
50 *
51 * Input is a clause to be negated (e.g., the argument of a NOT clause).
52 * Returns a new clause equivalent to the negation of the given clause.
53 *
54 * Although this can be invoked on its own, it's mainly intended as a helper
55 * for eval_const_expressions(), and that context drives several design
56 * decisions. In particular, if the input is already AND/OR flat, we must
57 * preserve that property. We also don't bother to recurse in situations
58 * where we can assume that lower-level executions of eval_const_expressions
59 * would already have simplified sub-clauses of the input.
60 *
61 * The difference between this and a simple make_notclause() is that this
62 * tries to get rid of the NOT node by logical simplification. It's clearly
63 * always a win if the NOT node can be eliminated altogether. However, our
64 * use of DeMorgan's laws could result in having more NOT nodes rather than
65 * fewer. We do that unconditionally anyway, because in WHERE clauses it's
66 * important to expose as much top-level AND/OR structure as possible.
67 * Also, eliminating an intermediate NOT may allow us to flatten two levels
68 * of AND or OR together that we couldn't have otherwise. Finally, one of
69 * the motivations for doing this is to ensure that logically equivalent
70 * expressions will be seen as physically equal(), so we should always apply
71 * the same transformations.
72 */
73Node *
74negate_clause(Node *node)
75{
76 if (node == NULL) /* should not happen */
77 elog(ERROR, "can't negate an empty subexpression");
78 switch (nodeTag(node))
79 {
80 case T_Const:
81 {
82 Const *c = (Const *) node;
83
84 /* NOT NULL is still NULL */
85 if (c->constisnull)
86 return makeBoolConst(false, true);
87 /* otherwise pretty easy */
88 return makeBoolConst(!DatumGetBool(c->constvalue), false);
89 }
90 break;
91 case T_OpExpr:
92 {
93 /*
94 * Negate operator if possible: (NOT (< A B)) => (>= A B)
95 */
96 OpExpr *opexpr = (OpExpr *) node;
97 Oid negator = get_negator(opexpr->opno);
98
99 if (negator)
100 {
101 OpExpr *newopexpr = makeNode(OpExpr);
102
103 newopexpr->opno = negator;
104 newopexpr->opfuncid = InvalidOid;
105 newopexpr->opresulttype = opexpr->opresulttype;
106 newopexpr->opretset = opexpr->opretset;
107 newopexpr->opcollid = opexpr->opcollid;
108 newopexpr->inputcollid = opexpr->inputcollid;
109 newopexpr->args = opexpr->args;
110 newopexpr->location = opexpr->location;
111 return (Node *) newopexpr;
112 }
113 }
114 break;
115 case T_ScalarArrayOpExpr:
116 {
117 /*
118 * Negate a ScalarArrayOpExpr if its operator has a negator;
119 * for example x = ANY (list) becomes x <> ALL (list)
120 */
121 ScalarArrayOpExpr *saopexpr = (ScalarArrayOpExpr *) node;
122 Oid negator = get_negator(saopexpr->opno);
123
124 if (negator)
125 {
126 ScalarArrayOpExpr *newopexpr = makeNode(ScalarArrayOpExpr);
127
128 newopexpr->opno = negator;
129 newopexpr->opfuncid = InvalidOid;
130 newopexpr->useOr = !saopexpr->useOr;
131 newopexpr->inputcollid = saopexpr->inputcollid;
132 newopexpr->args = saopexpr->args;
133 newopexpr->location = saopexpr->location;
134 return (Node *) newopexpr;
135 }
136 }
137 break;
138 case T_BoolExpr:
139 {
140 BoolExpr *expr = (BoolExpr *) node;
141
142 switch (expr->boolop)
143 {
144 /*--------------------
145 * Apply DeMorgan's Laws:
146 * (NOT (AND A B)) => (OR (NOT A) (NOT B))
147 * (NOT (OR A B)) => (AND (NOT A) (NOT B))
148 * i.e., swap AND for OR and negate each subclause.
149 *
150 * If the input is already AND/OR flat and has no NOT
151 * directly above AND or OR, this transformation preserves
152 * those properties. For example, if no direct child of
153 * the given AND clause is an AND or a NOT-above-OR, then
154 * the recursive calls of negate_clause() can't return any
155 * OR clauses. So we needn't call pull_ors() before
156 * building a new OR clause. Similarly for the OR case.
157 *--------------------
158 */
159 case AND_EXPR:
160 {
161 List *nargs = NIL;
162 ListCell *lc;
163
164 foreach(lc, expr->args)
165 {
166 nargs = lappend(nargs,
167 negate_clause(lfirst(lc)));
168 }
169 return (Node *) make_orclause(nargs);
170 }
171 break;
172 case OR_EXPR:
173 {
174 List *nargs = NIL;
175 ListCell *lc;
176
177 foreach(lc, expr->args)
178 {
179 nargs = lappend(nargs,
180 negate_clause(lfirst(lc)));
181 }
182 return (Node *) make_andclause(nargs);
183 }
184 break;
185 case NOT_EXPR:
186
187 /*
188 * NOT underneath NOT: they cancel. We assume the
189 * input is already simplified, so no need to recurse.
190 */
191 return (Node *) linitial(expr->args);
192 default:
193 elog(ERROR, "unrecognized boolop: %d",
194 (int) expr->boolop);
195 break;
196 }
197 }
198 break;
199 case T_NullTest:
200 {
201 NullTest *expr = (NullTest *) node;
202
203 /*
204 * In the rowtype case, the two flavors of NullTest are *not*
205 * logical inverses, so we can't simplify. But it does work
206 * for scalar datatypes.
207 */
208 if (!expr->argisrow)
209 {
210 NullTest *newexpr = makeNode(NullTest);
211
212 newexpr->arg = expr->arg;
213 newexpr->nulltesttype = (expr->nulltesttype == IS_NULL ?
214 IS_NOT_NULL : IS_NULL);
215 newexpr->argisrow = expr->argisrow;
216 newexpr->location = expr->location;
217 return (Node *) newexpr;
218 }
219 }
220 break;
221 case T_BooleanTest:
222 {
223 BooleanTest *expr = (BooleanTest *) node;
224 BooleanTest *newexpr = makeNode(BooleanTest);
225
226 newexpr->arg = expr->arg;
227 switch (expr->booltesttype)
228 {
229 case IS_TRUE:
230 newexpr->booltesttype = IS_NOT_TRUE;
231 break;
232 case IS_NOT_TRUE:
233 newexpr->booltesttype = IS_TRUE;
234 break;
235 case IS_FALSE:
236 newexpr->booltesttype = IS_NOT_FALSE;
237 break;
238 case IS_NOT_FALSE:
239 newexpr->booltesttype = IS_FALSE;
240 break;
241 case IS_UNKNOWN:
242 newexpr->booltesttype = IS_NOT_UNKNOWN;
243 break;
244 case IS_NOT_UNKNOWN:
245 newexpr->booltesttype = IS_UNKNOWN;
246 break;
247 default:
248 elog(ERROR, "unrecognized booltesttype: %d",
249 (int) expr->booltesttype);
250 break;
251 }
252 newexpr->location = expr->location;
253 return (Node *) newexpr;
254 }
255 break;
256 default:
257 /* else fall through */
258 break;
259 }
260
261 /*
262 * Otherwise we don't know how to simplify this, so just tack on an
263 * explicit NOT node.
264 */
265 return (Node *) make_notclause((Expr *) node);
266}
267
268
269/*
270 * canonicalize_qual
271 * Convert a qualification expression to the most useful form.
272 *
273 * This is primarily intended to be used on top-level WHERE (or JOIN/ON)
274 * clauses. It can also be used on top-level CHECK constraints, for which
275 * pass is_check = true. DO NOT call it on any expression that is not known
276 * to be one or the other, as it might apply inappropriate simplifications.
277 *
278 * The name of this routine is a holdover from a time when it would try to
279 * force the expression into canonical AND-of-ORs or OR-of-ANDs form.
280 * Eventually, we recognized that that had more theoretical purity than
281 * actual usefulness, and so now the transformation doesn't involve any
282 * notion of reaching a canonical form.
283 *
284 * NOTE: we assume the input has already been through eval_const_expressions
285 * and therefore possesses AND/OR flatness. Formerly this function included
286 * its own flattening logic, but that requires a useless extra pass over the
287 * tree.
288 *
289 * Returns the modified qualification.
290 */
291Expr *
292canonicalize_qual(Expr *qual, bool is_check)
293{
294 Expr *newqual;
295
296 /* Quick exit for empty qual */
297 if (qual == NULL)
298 return NULL;
299
300 /* This should not be invoked on quals in implicit-AND format */
301 Assert(!IsA(qual, List));
302
303 /*
304 * Pull up redundant subclauses in OR-of-AND trees. We do this only
305 * within the top-level AND/OR structure; there's no point in looking
306 * deeper. Also remove any NULL constants in the top-level structure.
307 */
308 newqual = find_duplicate_ors(qual, is_check);
309
310 return newqual;
311}
312
313
314/*
315 * pull_ands
316 * Recursively flatten nested AND clauses into a single and-clause list.
317 *
318 * Input is the arglist of an AND clause.
319 * Returns the rebuilt arglist (note original list structure is not touched).
320 */
321static List *
322pull_ands(List *andlist)
323{
324 List *out_list = NIL;
325 ListCell *arg;
326
327 foreach(arg, andlist)
328 {
329 Node *subexpr = (Node *) lfirst(arg);
330
331 /*
332 * Note: we can destructively concat the subexpression's arglist
333 * because we know the recursive invocation of pull_ands will have
334 * built a new arglist not shared with any other expr. Otherwise we'd
335 * need a list_copy here.
336 */
337 if (is_andclause(subexpr))
338 out_list = list_concat(out_list,
339 pull_ands(((BoolExpr *) subexpr)->args));
340 else
341 out_list = lappend(out_list, subexpr);
342 }
343 return out_list;
344}
345
346/*
347 * pull_ors
348 * Recursively flatten nested OR clauses into a single or-clause list.
349 *
350 * Input is the arglist of an OR clause.
351 * Returns the rebuilt arglist (note original list structure is not touched).
352 */
353static List *
354pull_ors(List *orlist)
355{
356 List *out_list = NIL;
357 ListCell *arg;
358
359 foreach(arg, orlist)
360 {
361 Node *subexpr = (Node *) lfirst(arg);
362
363 /*
364 * Note: we can destructively concat the subexpression's arglist
365 * because we know the recursive invocation of pull_ors will have
366 * built a new arglist not shared with any other expr. Otherwise we'd
367 * need a list_copy here.
368 */
369 if (is_orclause(subexpr))
370 out_list = list_concat(out_list,
371 pull_ors(((BoolExpr *) subexpr)->args));
372 else
373 out_list = lappend(out_list, subexpr);
374 }
375 return out_list;
376}
377
378
379/*--------------------
380 * The following code attempts to apply the inverse OR distributive law:
381 * ((A AND B) OR (A AND C)) => (A AND (B OR C))
382 * That is, locate OR clauses in which every subclause contains an
383 * identical term, and pull out the duplicated terms.
384 *
385 * This may seem like a fairly useless activity, but it turns out to be
386 * applicable to many machine-generated queries, and there are also queries
387 * in some of the TPC benchmarks that need it. This was in fact almost the
388 * sole useful side-effect of the old prepqual code that tried to force
389 * the query into canonical AND-of-ORs form: the canonical equivalent of
390 * ((A AND B) OR (A AND C))
391 * is
392 * ((A OR A) AND (A OR C) AND (B OR A) AND (B OR C))
393 * which the code was able to simplify to
394 * (A AND (A OR C) AND (B OR A) AND (B OR C))
395 * thus successfully extracting the common condition A --- but at the cost
396 * of cluttering the qual with many redundant clauses.
397 *--------------------
398 */
399
400/*
401 * find_duplicate_ors
402 * Given a qualification tree with the NOTs pushed down, search for
403 * OR clauses to which the inverse OR distributive law might apply.
404 * Only the top-level AND/OR structure is searched.
405 *
406 * While at it, we remove any NULL constants within the top-level AND/OR
407 * structure, eg in a WHERE clause, "x OR NULL::boolean" is reduced to "x".
408 * In general that would change the result, so eval_const_expressions can't
409 * do it; but at top level of WHERE, we don't need to distinguish between
410 * FALSE and NULL results, so it's valid to treat NULL::boolean the same
411 * as FALSE and then simplify AND/OR accordingly. Conversely, in a top-level
412 * CHECK constraint, we may treat a NULL the same as TRUE.
413 *
414 * Returns the modified qualification. AND/OR flatness is preserved.
415 */
416static Expr *
417find_duplicate_ors(Expr *qual, bool is_check)
418{
419 if (is_orclause(qual))
420 {
421 List *orlist = NIL;
422 ListCell *temp;
423
424 /* Recurse */
425 foreach(temp, ((BoolExpr *) qual)->args)
426 {
427 Expr *arg = (Expr *) lfirst(temp);
428
429 arg = find_duplicate_ors(arg, is_check);
430
431 /* Get rid of any constant inputs */
432 if (arg && IsA(arg, Const))
433 {
434 Const *carg = (Const *) arg;
435
436 if (is_check)
437 {
438 /* Within OR in CHECK, drop constant FALSE */
439 if (!carg->constisnull && !DatumGetBool(carg->constvalue))
440 continue;
441 /* Constant TRUE or NULL, so OR reduces to TRUE */
442 return (Expr *) makeBoolConst(true, false);
443 }
444 else
445 {
446 /* Within OR in WHERE, drop constant FALSE or NULL */
447 if (carg->constisnull || !DatumGetBool(carg->constvalue))
448 continue;
449 /* Constant TRUE, so OR reduces to TRUE */
450 return arg;
451 }
452 }
453
454 orlist = lappend(orlist, arg);
455 }
456
457 /* Flatten any ORs pulled up to just below here */
458 orlist = pull_ors(orlist);
459
460 /* Now we can look for duplicate ORs */
461 return process_duplicate_ors(orlist);
462 }
463 else if (is_andclause(qual))
464 {
465 List *andlist = NIL;
466 ListCell *temp;
467
468 /* Recurse */
469 foreach(temp, ((BoolExpr *) qual)->args)
470 {
471 Expr *arg = (Expr *) lfirst(temp);
472
473 arg = find_duplicate_ors(arg, is_check);
474
475 /* Get rid of any constant inputs */
476 if (arg && IsA(arg, Const))
477 {
478 Const *carg = (Const *) arg;
479
480 if (is_check)
481 {
482 /* Within AND in CHECK, drop constant TRUE or NULL */
483 if (carg->constisnull || DatumGetBool(carg->constvalue))
484 continue;
485 /* Constant FALSE, so AND reduces to FALSE */
486 return arg;
487 }
488 else
489 {
490 /* Within AND in WHERE, drop constant TRUE */
491 if (!carg->constisnull && DatumGetBool(carg->constvalue))
492 continue;
493 /* Constant FALSE or NULL, so AND reduces to FALSE */
494 return (Expr *) makeBoolConst(false, false);
495 }
496 }
497
498 andlist = lappend(andlist, arg);
499 }
500
501 /* Flatten any ANDs introduced just below here */
502 andlist = pull_ands(andlist);
503
504 /* AND of no inputs reduces to TRUE */
505 if (andlist == NIL)
506 return (Expr *) makeBoolConst(true, false);
507
508 /* Single-expression AND just reduces to that expression */
509 if (list_length(andlist) == 1)
510 return (Expr *) linitial(andlist);
511
512 /* Else we still need an AND node */
513 return make_andclause(andlist);
514 }
515 else
516 return qual;
517}
518
519/*
520 * process_duplicate_ors
521 * Given a list of exprs which are ORed together, try to apply
522 * the inverse OR distributive law.
523 *
524 * Returns the resulting expression (could be an AND clause, an OR
525 * clause, or maybe even a single subexpression).
526 */
527static Expr *
528process_duplicate_ors(List *orlist)
529{
530 List *reference = NIL;
531 int num_subclauses = 0;
532 List *winners;
533 List *neworlist;
534 ListCell *temp;
535
536 /* OR of no inputs reduces to FALSE */
537 if (orlist == NIL)
538 return (Expr *) makeBoolConst(false, false);
539
540 /* Single-expression OR just reduces to that expression */
541 if (list_length(orlist) == 1)
542 return (Expr *) linitial(orlist);
543
544 /*
545 * Choose the shortest AND clause as the reference list --- obviously, any
546 * subclause not in this clause isn't in all the clauses. If we find a
547 * clause that's not an AND, we can treat it as a one-element AND clause,
548 * which necessarily wins as shortest.
549 */
550 foreach(temp, orlist)
551 {
552 Expr *clause = (Expr *) lfirst(temp);
553
554 if (is_andclause(clause))
555 {
556 List *subclauses = ((BoolExpr *) clause)->args;
557 int nclauses = list_length(subclauses);
558
559 if (reference == NIL || nclauses < num_subclauses)
560 {
561 reference = subclauses;
562 num_subclauses = nclauses;
563 }
564 }
565 else
566 {
567 reference = list_make1(clause);
568 break;
569 }
570 }
571
572 /*
573 * Just in case, eliminate any duplicates in the reference list.
574 */
575 reference = list_union(NIL, reference);
576
577 /*
578 * Check each element of the reference list to see if it's in all the OR
579 * clauses. Build a new list of winning clauses.
580 */
581 winners = NIL;
582 foreach(temp, reference)
583 {
584 Expr *refclause = (Expr *) lfirst(temp);
585 bool win = true;
586 ListCell *temp2;
587
588 foreach(temp2, orlist)
589 {
590 Expr *clause = (Expr *) lfirst(temp2);
591
592 if (is_andclause(clause))
593 {
594 if (!list_member(((BoolExpr *) clause)->args, refclause))
595 {
596 win = false;
597 break;
598 }
599 }
600 else
601 {
602 if (!equal(refclause, clause))
603 {
604 win = false;
605 break;
606 }
607 }
608 }
609
610 if (win)
611 winners = lappend(winners, refclause);
612 }
613
614 /*
615 * If no winners, we can't transform the OR
616 */
617 if (winners == NIL)
618 return make_orclause(orlist);
619
620 /*
621 * Generate new OR list consisting of the remaining sub-clauses.
622 *
623 * If any clause degenerates to empty, then we have a situation like (A
624 * AND B) OR (A), which can be reduced to just A --- that is, the
625 * additional conditions in other arms of the OR are irrelevant.
626 *
627 * Note that because we use list_difference, any multiple occurrences of a
628 * winning clause in an AND sub-clause will be removed automatically.
629 */
630 neworlist = NIL;
631 foreach(temp, orlist)
632 {
633 Expr *clause = (Expr *) lfirst(temp);
634
635 if (is_andclause(clause))
636 {
637 List *subclauses = ((BoolExpr *) clause)->args;
638
639 subclauses = list_difference(subclauses, winners);
640 if (subclauses != NIL)
641 {
642 if (list_length(subclauses) == 1)
643 neworlist = lappend(neworlist, linitial(subclauses));
644 else
645 neworlist = lappend(neworlist, make_andclause(subclauses));
646 }
647 else
648 {
649 neworlist = NIL; /* degenerate case, see above */
650 break;
651 }
652 }
653 else
654 {
655 if (!list_member(winners, clause))
656 neworlist = lappend(neworlist, clause);
657 else
658 {
659 neworlist = NIL; /* degenerate case, see above */
660 break;
661 }
662 }
663 }
664
665 /*
666 * Append reduced OR to the winners list, if it's not degenerate, handling
667 * the special case of one element correctly (can that really happen?).
668 * Also be careful to maintain AND/OR flatness in case we pulled up a
669 * sub-sub-OR-clause.
670 */
671 if (neworlist != NIL)
672 {
673 if (list_length(neworlist) == 1)
674 winners = lappend(winners, linitial(neworlist));
675 else
676 winners = lappend(winners, make_orclause(pull_ors(neworlist)));
677 }
678
679 /*
680 * And return the constructed AND clause, again being wary of a single
681 * element and AND/OR flatness.
682 */
683 if (list_length(winners) == 1)
684 return (Expr *) linitial(winners);
685 else
686 return make_andclause(pull_ands(winners));
687}
688