1/*-------------------------------------------------------------------------
2 *
3 * planagg.c
4 * Special planning for aggregate queries.
5 *
6 * This module tries to replace MIN/MAX aggregate functions by subqueries
7 * of the form
8 * (SELECT col FROM tab
9 * WHERE col IS NOT NULL AND existing-quals
10 * ORDER BY col ASC/DESC
11 * LIMIT 1)
12 * Given a suitable index on tab.col, this can be much faster than the
13 * generic scan-all-the-rows aggregation plan. We can handle multiple
14 * MIN/MAX aggregates by generating multiple subqueries, and their
15 * orderings can be different. However, if the query contains any
16 * non-optimizable aggregates, there's no point since we'll have to
17 * scan all the rows anyway.
18 *
19 *
20 * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
21 * Portions Copyright (c) 1994, Regents of the University of California
22 *
23 *
24 * IDENTIFICATION
25 * src/backend/optimizer/plan/planagg.c
26 *
27 *-------------------------------------------------------------------------
28 */
29#include "postgres.h"
30
31#include "access/htup_details.h"
32#include "catalog/pg_aggregate.h"
33#include "catalog/pg_type.h"
34#include "nodes/makefuncs.h"
35#include "nodes/nodeFuncs.h"
36#include "optimizer/clauses.h"
37#include "optimizer/cost.h"
38#include "optimizer/optimizer.h"
39#include "optimizer/pathnode.h"
40#include "optimizer/paths.h"
41#include "optimizer/planmain.h"
42#include "optimizer/subselect.h"
43#include "optimizer/tlist.h"
44#include "parser/parsetree.h"
45#include "parser/parse_clause.h"
46#include "rewrite/rewriteManip.h"
47#include "utils/lsyscache.h"
48#include "utils/syscache.h"
49
50
51static bool find_minmax_aggs_walker(Node *node, List **context);
52static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
53 Oid eqop, Oid sortop, bool nulls_first);
54static void minmax_qp_callback(PlannerInfo *root, void *extra);
55static Oid fetch_agg_sort_op(Oid aggfnoid);
56
57
58/*
59 * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
60 *
61 * Check to see whether the query contains MIN/MAX aggregate functions that
62 * might be optimizable via indexscans. If it does, and all the aggregates
63 * are potentially optimizable, then create a MinMaxAggPath and add it to
64 * the (UPPERREL_GROUP_AGG, NULL) upperrel.
65 *
66 * This should be called by grouping_planner() just before it's ready to call
67 * query_planner(), because we generate indexscan paths by cloning the
68 * planner's state and invoking query_planner() on a modified version of
69 * the query parsetree. Thus, all preprocessing needed before query_planner()
70 * must already be done.
71 */
72void
73preprocess_minmax_aggregates(PlannerInfo *root)
74{
75 Query *parse = root->parse;
76 FromExpr *jtnode;
77 RangeTblRef *rtr;
78 RangeTblEntry *rte;
79 List *aggs_list;
80 RelOptInfo *grouped_rel;
81 ListCell *lc;
82
83 /* minmax_aggs list should be empty at this point */
84 Assert(root->minmax_aggs == NIL);
85
86 /* Nothing to do if query has no aggregates */
87 if (!parse->hasAggs)
88 return;
89
90 Assert(!parse->setOperations); /* shouldn't get here if a setop */
91 Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */
92
93 /*
94 * Reject unoptimizable cases.
95 *
96 * We don't handle GROUP BY or windowing, because our current
97 * implementations of grouping require looking at all the rows anyway, and
98 * so there's not much point in optimizing MIN/MAX.
99 */
100 if (parse->groupClause || list_length(parse->groupingSets) > 1 ||
101 parse->hasWindowFuncs)
102 return;
103
104 /*
105 * Reject if query contains any CTEs; there's no way to build an indexscan
106 * on one so we couldn't succeed here. (If the CTEs are unreferenced,
107 * that's not true, but it doesn't seem worth expending cycles to check.)
108 */
109 if (parse->cteList)
110 return;
111
112 /*
113 * We also restrict the query to reference exactly one table, since join
114 * conditions can't be handled reasonably. (We could perhaps handle a
115 * query containing cartesian-product joins, but it hardly seems worth the
116 * trouble.) However, the single table could be buried in several levels
117 * of FromExpr due to subqueries. Note the "single" table could be an
118 * inheritance parent, too, including the case of a UNION ALL subquery
119 * that's been flattened to an appendrel.
120 */
121 jtnode = parse->jointree;
122 while (IsA(jtnode, FromExpr))
123 {
124 if (list_length(jtnode->fromlist) != 1)
125 return;
126 jtnode = linitial(jtnode->fromlist);
127 }
128 if (!IsA(jtnode, RangeTblRef))
129 return;
130 rtr = (RangeTblRef *) jtnode;
131 rte = planner_rt_fetch(rtr->rtindex, root);
132 if (rte->rtekind == RTE_RELATION)
133 /* ordinary relation, ok */ ;
134 else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
135 /* flattened UNION ALL subquery, ok */ ;
136 else
137 return;
138
139 /*
140 * Scan the tlist and HAVING qual to find all the aggregates and verify
141 * all are MIN/MAX aggregates. Stop as soon as we find one that isn't.
142 */
143 aggs_list = NIL;
144 if (find_minmax_aggs_walker((Node *) root->processed_tlist, &aggs_list))
145 return;
146 if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
147 return;
148
149 /*
150 * OK, there is at least the possibility of performing the optimization.
151 * Build an access path for each aggregate. If any of the aggregates
152 * prove to be non-indexable, give up; there is no point in optimizing
153 * just some of them.
154 */
155 foreach(lc, aggs_list)
156 {
157 MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
158 Oid eqop;
159 bool reverse;
160
161 /*
162 * We'll need the equality operator that goes with the aggregate's
163 * ordering operator.
164 */
165 eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
166 if (!OidIsValid(eqop)) /* shouldn't happen */
167 elog(ERROR, "could not find equality operator for ordering operator %u",
168 mminfo->aggsortop);
169
170 /*
171 * We can use either an ordering that gives NULLS FIRST or one that
172 * gives NULLS LAST; furthermore there's unlikely to be much
173 * performance difference between them, so it doesn't seem worth
174 * costing out both ways if we get a hit on the first one. NULLS
175 * FIRST is more likely to be available if the operator is a
176 * reverse-sort operator, so try that first if reverse.
177 */
178 if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
179 continue;
180 if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
181 continue;
182
183 /* No indexable path for this aggregate, so fail */
184 return;
185 }
186
187 /*
188 * OK, we can do the query this way. Prepare to create a MinMaxAggPath
189 * node.
190 *
191 * First, create an output Param node for each agg. (If we end up not
192 * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg,
193 * which is not worth worrying about. We can't wait till create_plan time
194 * to decide whether to make the Param, unfortunately.)
195 */
196 foreach(lc, aggs_list)
197 {
198 MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc);
199
200 mminfo->param =
201 SS_make_initplan_output_param(root,
202 exprType((Node *) mminfo->target),
203 -1,
204 exprCollation((Node *) mminfo->target));
205 }
206
207 /*
208 * Create a MinMaxAggPath node with the appropriate estimated costs and
209 * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where
210 * it will compete against the standard aggregate implementation. (It
211 * will likely always win, but we need not assume that here.)
212 *
213 * Note: grouping_planner won't have created this upperrel yet, but it's
214 * fine for us to create it first. We will not have inserted the correct
215 * consider_parallel value in it, but MinMaxAggPath paths are currently
216 * never parallel-safe anyway, so that doesn't matter. Likewise, it
217 * doesn't matter that we haven't filled FDW-related fields in the rel.
218 * Also, because there are no rowmarks, we know that the processed_tlist
219 * doesn't need to change anymore, so making the pathtarget now is safe.
220 */
221 grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
222 add_path(grouped_rel, (Path *)
223 create_minmaxagg_path(root, grouped_rel,
224 create_pathtarget(root,
225 root->processed_tlist),
226 aggs_list,
227 (List *) parse->havingQual));
228}
229
230/*
231 * find_minmax_aggs_walker
232 * Recursively scan the Aggref nodes in an expression tree, and check
233 * that each one is a MIN/MAX aggregate. If so, build a list of the
234 * distinct aggregate calls in the tree.
235 *
236 * Returns true if a non-MIN/MAX aggregate is found, false otherwise.
237 * (This seemingly-backward definition is used because expression_tree_walker
238 * aborts the scan on true return, which is what we want.)
239 *
240 * Found aggregates are added to the list at *context; it's up to the caller
241 * to initialize the list to NIL.
242 *
243 * This does not descend into subqueries, and so should be used only after
244 * reduction of sublinks to subplans. There mustn't be outer-aggregate
245 * references either.
246 */
247static bool
248find_minmax_aggs_walker(Node *node, List **context)
249{
250 if (node == NULL)
251 return false;
252 if (IsA(node, Aggref))
253 {
254 Aggref *aggref = (Aggref *) node;
255 Oid aggsortop;
256 TargetEntry *curTarget;
257 MinMaxAggInfo *mminfo;
258 ListCell *l;
259
260 Assert(aggref->agglevelsup == 0);
261 if (list_length(aggref->args) != 1)
262 return true; /* it couldn't be MIN/MAX */
263
264 /*
265 * ORDER BY is usually irrelevant for MIN/MAX, but it can change the
266 * outcome if the aggsortop's operator class recognizes non-identical
267 * values as equal. For example, 4.0 and 4.00 are equal according to
268 * numeric_ops, yet distinguishable. If MIN() receives more than one
269 * value equal to 4.0 and no value less than 4.0, it is unspecified
270 * which of those equal values MIN() returns. An ORDER BY expression
271 * that differs for each of those equal values of the argument
272 * expression makes the result predictable once again. This is a
273 * niche requirement, and we do not implement it with subquery paths.
274 * In any case, this test lets us reject ordered-set aggregates
275 * quickly.
276 */
277 if (aggref->aggorder != NIL)
278 return true;
279 /* note: we do not care if DISTINCT is mentioned ... */
280
281 /*
282 * We might implement the optimization when a FILTER clause is present
283 * by adding the filter to the quals of the generated subquery. For
284 * now, just punt.
285 */
286 if (aggref->aggfilter != NULL)
287 return true;
288
289 aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
290 if (!OidIsValid(aggsortop))
291 return true; /* not a MIN/MAX aggregate */
292
293 curTarget = (TargetEntry *) linitial(aggref->args);
294
295 if (contain_mutable_functions((Node *) curTarget->expr))
296 return true; /* not potentially indexable */
297
298 if (type_is_rowtype(exprType((Node *) curTarget->expr)))
299 return true; /* IS NOT NULL would have weird semantics */
300
301 /*
302 * Check whether it's already in the list, and add it if not.
303 */
304 foreach(l, *context)
305 {
306 mminfo = (MinMaxAggInfo *) lfirst(l);
307 if (mminfo->aggfnoid == aggref->aggfnoid &&
308 equal(mminfo->target, curTarget->expr))
309 return false;
310 }
311
312 mminfo = makeNode(MinMaxAggInfo);
313 mminfo->aggfnoid = aggref->aggfnoid;
314 mminfo->aggsortop = aggsortop;
315 mminfo->target = curTarget->expr;
316 mminfo->subroot = NULL; /* don't compute path yet */
317 mminfo->path = NULL;
318 mminfo->pathcost = 0;
319 mminfo->param = NULL;
320
321 *context = lappend(*context, mminfo);
322
323 /*
324 * We need not recurse into the argument, since it can't contain any
325 * aggregates.
326 */
327 return false;
328 }
329 Assert(!IsA(node, SubLink));
330 return expression_tree_walker(node, find_minmax_aggs_walker,
331 (void *) context);
332}
333
334/*
335 * build_minmax_path
336 * Given a MIN/MAX aggregate, try to build an indexscan Path it can be
337 * optimized with.
338 *
339 * If successful, stash the best path in *mminfo and return true.
340 * Otherwise, return false.
341 */
342static bool
343build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo,
344 Oid eqop, Oid sortop, bool nulls_first)
345{
346 PlannerInfo *subroot;
347 Query *parse;
348 TargetEntry *tle;
349 List *tlist;
350 NullTest *ntest;
351 SortGroupClause *sortcl;
352 RelOptInfo *final_rel;
353 Path *sorted_path;
354 Cost path_cost;
355 double path_fraction;
356
357 /*
358 * We are going to construct what is effectively a sub-SELECT query, so
359 * clone the current query level's state and adjust it to make it look
360 * like a subquery. Any outer references will now be one level higher
361 * than before. (This means that when we are done, there will be no Vars
362 * of level 1, which is why the subquery can become an initplan.)
363 */
364 subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo));
365 memcpy(subroot, root, sizeof(PlannerInfo));
366 subroot->query_level++;
367 subroot->parent_root = root;
368 /* reset subplan-related stuff */
369 subroot->plan_params = NIL;
370 subroot->outer_params = NULL;
371 subroot->init_plans = NIL;
372
373 subroot->parse = parse = copyObject(root->parse);
374 IncrementVarSublevelsUp((Node *) parse, 1, 1);
375
376 /* append_rel_list might contain outer Vars? */
377 subroot->append_rel_list = copyObject(root->append_rel_list);
378 IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1);
379 /* There shouldn't be any OJ info to translate, as yet */
380 Assert(subroot->join_info_list == NIL);
381 /* and we haven't made equivalence classes, either */
382 Assert(subroot->eq_classes == NIL);
383 /* and we haven't created PlaceHolderInfos, either */
384 Assert(subroot->placeholder_list == NIL);
385
386 /*----------
387 * Generate modified query of the form
388 * (SELECT col FROM tab
389 * WHERE col IS NOT NULL AND existing-quals
390 * ORDER BY col ASC/DESC
391 * LIMIT 1)
392 *----------
393 */
394 /* single tlist entry that is the aggregate target */
395 tle = makeTargetEntry(copyObject(mminfo->target),
396 (AttrNumber) 1,
397 pstrdup("agg_target"),
398 false);
399 tlist = list_make1(tle);
400 subroot->processed_tlist = parse->targetList = tlist;
401
402 /* No HAVING, no DISTINCT, no aggregates anymore */
403 parse->havingQual = NULL;
404 subroot->hasHavingQual = false;
405 parse->distinctClause = NIL;
406 parse->hasDistinctOn = false;
407 parse->hasAggs = false;
408
409 /* Build "target IS NOT NULL" expression */
410 ntest = makeNode(NullTest);
411 ntest->nulltesttype = IS_NOT_NULL;
412 ntest->arg = copyObject(mminfo->target);
413 /* we checked it wasn't a rowtype in find_minmax_aggs_walker */
414 ntest->argisrow = false;
415 ntest->location = -1;
416
417 /* User might have had that in WHERE already */
418 if (!list_member((List *) parse->jointree->quals, ntest))
419 parse->jointree->quals = (Node *)
420 lcons(ntest, (List *) parse->jointree->quals);
421
422 /* Build suitable ORDER BY clause */
423 sortcl = makeNode(SortGroupClause);
424 sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist);
425 sortcl->eqop = eqop;
426 sortcl->sortop = sortop;
427 sortcl->nulls_first = nulls_first;
428 sortcl->hashable = false; /* no need to make this accurate */
429 parse->sortClause = list_make1(sortcl);
430
431 /* set up expressions for LIMIT 1 */
432 parse->limitOffset = NULL;
433 parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid,
434 sizeof(int64),
435 Int64GetDatum(1), false,
436 FLOAT8PASSBYVAL);
437
438 /*
439 * Generate the best paths for this query, telling query_planner that we
440 * have LIMIT 1.
441 */
442 subroot->tuple_fraction = 1.0;
443 subroot->limit_tuples = 1.0;
444
445 final_rel = query_planner(subroot, minmax_qp_callback, NULL);
446
447 /*
448 * Since we didn't go through subquery_planner() to handle the subquery,
449 * we have to do some of the same cleanup it would do, in particular cope
450 * with params and initplans used within this subquery. (This won't
451 * matter if we end up not using the subplan.)
452 */
453 SS_identify_outer_params(subroot);
454 SS_charge_for_initplans(subroot, final_rel);
455
456 /*
457 * Get the best presorted path, that being the one that's cheapest for
458 * fetching just one row. If there's no such path, fail.
459 */
460 if (final_rel->rows > 1.0)
461 path_fraction = 1.0 / final_rel->rows;
462 else
463 path_fraction = 1.0;
464
465 sorted_path =
466 get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
467 subroot->query_pathkeys,
468 NULL,
469 path_fraction);
470 if (!sorted_path)
471 return false;
472
473 /*
474 * The path might not return exactly what we want, so fix that. (We
475 * assume that this won't change any conclusions about which was the
476 * cheapest path.)
477 */
478 sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
479 create_pathtarget(subroot,
480 subroot->processed_tlist));
481
482 /*
483 * Determine cost to get just the first row of the presorted path.
484 *
485 * Note: cost calculation here should match
486 * compare_fractional_path_costs().
487 */
488 path_cost = sorted_path->startup_cost +
489 path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
490
491 /* Save state for further processing */
492 mminfo->subroot = subroot;
493 mminfo->path = sorted_path;
494 mminfo->pathcost = path_cost;
495
496 return true;
497}
498
499/*
500 * Compute query_pathkeys and other pathkeys during query_planner()
501 */
502static void
503minmax_qp_callback(PlannerInfo *root, void *extra)
504{
505 root->group_pathkeys = NIL;
506 root->window_pathkeys = NIL;
507 root->distinct_pathkeys = NIL;
508
509 root->sort_pathkeys =
510 make_pathkeys_for_sortclauses(root,
511 root->parse->sortClause,
512 root->parse->targetList);
513
514 root->query_pathkeys = root->sort_pathkeys;
515}
516
517/*
518 * Get the OID of the sort operator, if any, associated with an aggregate.
519 * Returns InvalidOid if there is no such operator.
520 */
521static Oid
522fetch_agg_sort_op(Oid aggfnoid)
523{
524 HeapTuple aggTuple;
525 Form_pg_aggregate aggform;
526 Oid aggsortop;
527
528 /* fetch aggregate entry from pg_aggregate */
529 aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
530 if (!HeapTupleIsValid(aggTuple))
531 return InvalidOid;
532 aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
533 aggsortop = aggform->aggsortop;
534 ReleaseSysCache(aggTuple);
535
536 return aggsortop;
537}
538