1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * planagg.c |
4 | * Special planning for aggregate queries. |
5 | * |
6 | * This module tries to replace MIN/MAX aggregate functions by subqueries |
7 | * of the form |
8 | * (SELECT col FROM tab |
9 | * WHERE col IS NOT NULL AND existing-quals |
10 | * ORDER BY col ASC/DESC |
11 | * LIMIT 1) |
12 | * Given a suitable index on tab.col, this can be much faster than the |
13 | * generic scan-all-the-rows aggregation plan. We can handle multiple |
14 | * MIN/MAX aggregates by generating multiple subqueries, and their |
15 | * orderings can be different. However, if the query contains any |
16 | * non-optimizable aggregates, there's no point since we'll have to |
17 | * scan all the rows anyway. |
18 | * |
19 | * |
20 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
21 | * Portions Copyright (c) 1994, Regents of the University of California |
22 | * |
23 | * |
24 | * IDENTIFICATION |
25 | * src/backend/optimizer/plan/planagg.c |
26 | * |
27 | *------------------------------------------------------------------------- |
28 | */ |
29 | #include "postgres.h" |
30 | |
31 | #include "access/htup_details.h" |
32 | #include "catalog/pg_aggregate.h" |
33 | #include "catalog/pg_type.h" |
34 | #include "nodes/makefuncs.h" |
35 | #include "nodes/nodeFuncs.h" |
36 | #include "optimizer/clauses.h" |
37 | #include "optimizer/cost.h" |
38 | #include "optimizer/optimizer.h" |
39 | #include "optimizer/pathnode.h" |
40 | #include "optimizer/paths.h" |
41 | #include "optimizer/planmain.h" |
42 | #include "optimizer/subselect.h" |
43 | #include "optimizer/tlist.h" |
44 | #include "parser/parsetree.h" |
45 | #include "parser/parse_clause.h" |
46 | #include "rewrite/rewriteManip.h" |
47 | #include "utils/lsyscache.h" |
48 | #include "utils/syscache.h" |
49 | |
50 | |
51 | static bool find_minmax_aggs_walker(Node *node, List **context); |
52 | static bool build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, |
53 | Oid eqop, Oid sortop, bool nulls_first); |
54 | static void minmax_qp_callback(PlannerInfo *root, void *); |
55 | static Oid fetch_agg_sort_op(Oid aggfnoid); |
56 | |
57 | |
58 | /* |
59 | * preprocess_minmax_aggregates - preprocess MIN/MAX aggregates |
60 | * |
61 | * Check to see whether the query contains MIN/MAX aggregate functions that |
62 | * might be optimizable via indexscans. If it does, and all the aggregates |
63 | * are potentially optimizable, then create a MinMaxAggPath and add it to |
64 | * the (UPPERREL_GROUP_AGG, NULL) upperrel. |
65 | * |
66 | * This should be called by grouping_planner() just before it's ready to call |
67 | * query_planner(), because we generate indexscan paths by cloning the |
68 | * planner's state and invoking query_planner() on a modified version of |
69 | * the query parsetree. Thus, all preprocessing needed before query_planner() |
70 | * must already be done. |
71 | */ |
72 | void |
73 | preprocess_minmax_aggregates(PlannerInfo *root) |
74 | { |
75 | Query *parse = root->parse; |
76 | FromExpr *jtnode; |
77 | RangeTblRef *rtr; |
78 | RangeTblEntry *rte; |
79 | List *aggs_list; |
80 | RelOptInfo *grouped_rel; |
81 | ListCell *lc; |
82 | |
83 | /* minmax_aggs list should be empty at this point */ |
84 | Assert(root->minmax_aggs == NIL); |
85 | |
86 | /* Nothing to do if query has no aggregates */ |
87 | if (!parse->hasAggs) |
88 | return; |
89 | |
90 | Assert(!parse->setOperations); /* shouldn't get here if a setop */ |
91 | Assert(parse->rowMarks == NIL); /* nor if FOR UPDATE */ |
92 | |
93 | /* |
94 | * Reject unoptimizable cases. |
95 | * |
96 | * We don't handle GROUP BY or windowing, because our current |
97 | * implementations of grouping require looking at all the rows anyway, and |
98 | * so there's not much point in optimizing MIN/MAX. |
99 | */ |
100 | if (parse->groupClause || list_length(parse->groupingSets) > 1 || |
101 | parse->hasWindowFuncs) |
102 | return; |
103 | |
104 | /* |
105 | * Reject if query contains any CTEs; there's no way to build an indexscan |
106 | * on one so we couldn't succeed here. (If the CTEs are unreferenced, |
107 | * that's not true, but it doesn't seem worth expending cycles to check.) |
108 | */ |
109 | if (parse->cteList) |
110 | return; |
111 | |
112 | /* |
113 | * We also restrict the query to reference exactly one table, since join |
114 | * conditions can't be handled reasonably. (We could perhaps handle a |
115 | * query containing cartesian-product joins, but it hardly seems worth the |
116 | * trouble.) However, the single table could be buried in several levels |
117 | * of FromExpr due to subqueries. Note the "single" table could be an |
118 | * inheritance parent, too, including the case of a UNION ALL subquery |
119 | * that's been flattened to an appendrel. |
120 | */ |
121 | jtnode = parse->jointree; |
122 | while (IsA(jtnode, FromExpr)) |
123 | { |
124 | if (list_length(jtnode->fromlist) != 1) |
125 | return; |
126 | jtnode = linitial(jtnode->fromlist); |
127 | } |
128 | if (!IsA(jtnode, RangeTblRef)) |
129 | return; |
130 | rtr = (RangeTblRef *) jtnode; |
131 | rte = planner_rt_fetch(rtr->rtindex, root); |
132 | if (rte->rtekind == RTE_RELATION) |
133 | /* ordinary relation, ok */ ; |
134 | else if (rte->rtekind == RTE_SUBQUERY && rte->inh) |
135 | /* flattened UNION ALL subquery, ok */ ; |
136 | else |
137 | return; |
138 | |
139 | /* |
140 | * Scan the tlist and HAVING qual to find all the aggregates and verify |
141 | * all are MIN/MAX aggregates. Stop as soon as we find one that isn't. |
142 | */ |
143 | aggs_list = NIL; |
144 | if (find_minmax_aggs_walker((Node *) root->processed_tlist, &aggs_list)) |
145 | return; |
146 | if (find_minmax_aggs_walker(parse->havingQual, &aggs_list)) |
147 | return; |
148 | |
149 | /* |
150 | * OK, there is at least the possibility of performing the optimization. |
151 | * Build an access path for each aggregate. If any of the aggregates |
152 | * prove to be non-indexable, give up; there is no point in optimizing |
153 | * just some of them. |
154 | */ |
155 | foreach(lc, aggs_list) |
156 | { |
157 | MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); |
158 | Oid eqop; |
159 | bool reverse; |
160 | |
161 | /* |
162 | * We'll need the equality operator that goes with the aggregate's |
163 | * ordering operator. |
164 | */ |
165 | eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse); |
166 | if (!OidIsValid(eqop)) /* shouldn't happen */ |
167 | elog(ERROR, "could not find equality operator for ordering operator %u" , |
168 | mminfo->aggsortop); |
169 | |
170 | /* |
171 | * We can use either an ordering that gives NULLS FIRST or one that |
172 | * gives NULLS LAST; furthermore there's unlikely to be much |
173 | * performance difference between them, so it doesn't seem worth |
174 | * costing out both ways if we get a hit on the first one. NULLS |
175 | * FIRST is more likely to be available if the operator is a |
176 | * reverse-sort operator, so try that first if reverse. |
177 | */ |
178 | if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse)) |
179 | continue; |
180 | if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse)) |
181 | continue; |
182 | |
183 | /* No indexable path for this aggregate, so fail */ |
184 | return; |
185 | } |
186 | |
187 | /* |
188 | * OK, we can do the query this way. Prepare to create a MinMaxAggPath |
189 | * node. |
190 | * |
191 | * First, create an output Param node for each agg. (If we end up not |
192 | * using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg, |
193 | * which is not worth worrying about. We can't wait till create_plan time |
194 | * to decide whether to make the Param, unfortunately.) |
195 | */ |
196 | foreach(lc, aggs_list) |
197 | { |
198 | MinMaxAggInfo *mminfo = (MinMaxAggInfo *) lfirst(lc); |
199 | |
200 | mminfo->param = |
201 | SS_make_initplan_output_param(root, |
202 | exprType((Node *) mminfo->target), |
203 | -1, |
204 | exprCollation((Node *) mminfo->target)); |
205 | } |
206 | |
207 | /* |
208 | * Create a MinMaxAggPath node with the appropriate estimated costs and |
209 | * other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where |
210 | * it will compete against the standard aggregate implementation. (It |
211 | * will likely always win, but we need not assume that here.) |
212 | * |
213 | * Note: grouping_planner won't have created this upperrel yet, but it's |
214 | * fine for us to create it first. We will not have inserted the correct |
215 | * consider_parallel value in it, but MinMaxAggPath paths are currently |
216 | * never parallel-safe anyway, so that doesn't matter. Likewise, it |
217 | * doesn't matter that we haven't filled FDW-related fields in the rel. |
218 | * Also, because there are no rowmarks, we know that the processed_tlist |
219 | * doesn't need to change anymore, so making the pathtarget now is safe. |
220 | */ |
221 | grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL); |
222 | add_path(grouped_rel, (Path *) |
223 | create_minmaxagg_path(root, grouped_rel, |
224 | create_pathtarget(root, |
225 | root->processed_tlist), |
226 | aggs_list, |
227 | (List *) parse->havingQual)); |
228 | } |
229 | |
230 | /* |
231 | * find_minmax_aggs_walker |
232 | * Recursively scan the Aggref nodes in an expression tree, and check |
233 | * that each one is a MIN/MAX aggregate. If so, build a list of the |
234 | * distinct aggregate calls in the tree. |
235 | * |
236 | * Returns true if a non-MIN/MAX aggregate is found, false otherwise. |
237 | * (This seemingly-backward definition is used because expression_tree_walker |
238 | * aborts the scan on true return, which is what we want.) |
239 | * |
240 | * Found aggregates are added to the list at *context; it's up to the caller |
241 | * to initialize the list to NIL. |
242 | * |
243 | * This does not descend into subqueries, and so should be used only after |
244 | * reduction of sublinks to subplans. There mustn't be outer-aggregate |
245 | * references either. |
246 | */ |
247 | static bool |
248 | find_minmax_aggs_walker(Node *node, List **context) |
249 | { |
250 | if (node == NULL) |
251 | return false; |
252 | if (IsA(node, Aggref)) |
253 | { |
254 | Aggref *aggref = (Aggref *) node; |
255 | Oid aggsortop; |
256 | TargetEntry *curTarget; |
257 | MinMaxAggInfo *mminfo; |
258 | ListCell *l; |
259 | |
260 | Assert(aggref->agglevelsup == 0); |
261 | if (list_length(aggref->args) != 1) |
262 | return true; /* it couldn't be MIN/MAX */ |
263 | |
264 | /* |
265 | * ORDER BY is usually irrelevant for MIN/MAX, but it can change the |
266 | * outcome if the aggsortop's operator class recognizes non-identical |
267 | * values as equal. For example, 4.0 and 4.00 are equal according to |
268 | * numeric_ops, yet distinguishable. If MIN() receives more than one |
269 | * value equal to 4.0 and no value less than 4.0, it is unspecified |
270 | * which of those equal values MIN() returns. An ORDER BY expression |
271 | * that differs for each of those equal values of the argument |
272 | * expression makes the result predictable once again. This is a |
273 | * niche requirement, and we do not implement it with subquery paths. |
274 | * In any case, this test lets us reject ordered-set aggregates |
275 | * quickly. |
276 | */ |
277 | if (aggref->aggorder != NIL) |
278 | return true; |
279 | /* note: we do not care if DISTINCT is mentioned ... */ |
280 | |
281 | /* |
282 | * We might implement the optimization when a FILTER clause is present |
283 | * by adding the filter to the quals of the generated subquery. For |
284 | * now, just punt. |
285 | */ |
286 | if (aggref->aggfilter != NULL) |
287 | return true; |
288 | |
289 | aggsortop = fetch_agg_sort_op(aggref->aggfnoid); |
290 | if (!OidIsValid(aggsortop)) |
291 | return true; /* not a MIN/MAX aggregate */ |
292 | |
293 | curTarget = (TargetEntry *) linitial(aggref->args); |
294 | |
295 | if (contain_mutable_functions((Node *) curTarget->expr)) |
296 | return true; /* not potentially indexable */ |
297 | |
298 | if (type_is_rowtype(exprType((Node *) curTarget->expr))) |
299 | return true; /* IS NOT NULL would have weird semantics */ |
300 | |
301 | /* |
302 | * Check whether it's already in the list, and add it if not. |
303 | */ |
304 | foreach(l, *context) |
305 | { |
306 | mminfo = (MinMaxAggInfo *) lfirst(l); |
307 | if (mminfo->aggfnoid == aggref->aggfnoid && |
308 | equal(mminfo->target, curTarget->expr)) |
309 | return false; |
310 | } |
311 | |
312 | mminfo = makeNode(MinMaxAggInfo); |
313 | mminfo->aggfnoid = aggref->aggfnoid; |
314 | mminfo->aggsortop = aggsortop; |
315 | mminfo->target = curTarget->expr; |
316 | mminfo->subroot = NULL; /* don't compute path yet */ |
317 | mminfo->path = NULL; |
318 | mminfo->pathcost = 0; |
319 | mminfo->param = NULL; |
320 | |
321 | *context = lappend(*context, mminfo); |
322 | |
323 | /* |
324 | * We need not recurse into the argument, since it can't contain any |
325 | * aggregates. |
326 | */ |
327 | return false; |
328 | } |
329 | Assert(!IsA(node, SubLink)); |
330 | return expression_tree_walker(node, find_minmax_aggs_walker, |
331 | (void *) context); |
332 | } |
333 | |
334 | /* |
335 | * build_minmax_path |
336 | * Given a MIN/MAX aggregate, try to build an indexscan Path it can be |
337 | * optimized with. |
338 | * |
339 | * If successful, stash the best path in *mminfo and return true. |
340 | * Otherwise, return false. |
341 | */ |
342 | static bool |
343 | build_minmax_path(PlannerInfo *root, MinMaxAggInfo *mminfo, |
344 | Oid eqop, Oid sortop, bool nulls_first) |
345 | { |
346 | PlannerInfo *subroot; |
347 | Query *parse; |
348 | TargetEntry *tle; |
349 | List *tlist; |
350 | NullTest *ntest; |
351 | SortGroupClause *sortcl; |
352 | RelOptInfo *final_rel; |
353 | Path *sorted_path; |
354 | Cost path_cost; |
355 | double path_fraction; |
356 | |
357 | /* |
358 | * We are going to construct what is effectively a sub-SELECT query, so |
359 | * clone the current query level's state and adjust it to make it look |
360 | * like a subquery. Any outer references will now be one level higher |
361 | * than before. (This means that when we are done, there will be no Vars |
362 | * of level 1, which is why the subquery can become an initplan.) |
363 | */ |
364 | subroot = (PlannerInfo *) palloc(sizeof(PlannerInfo)); |
365 | memcpy(subroot, root, sizeof(PlannerInfo)); |
366 | subroot->query_level++; |
367 | subroot->parent_root = root; |
368 | /* reset subplan-related stuff */ |
369 | subroot->plan_params = NIL; |
370 | subroot->outer_params = NULL; |
371 | subroot->init_plans = NIL; |
372 | |
373 | subroot->parse = parse = copyObject(root->parse); |
374 | IncrementVarSublevelsUp((Node *) parse, 1, 1); |
375 | |
376 | /* append_rel_list might contain outer Vars? */ |
377 | subroot->append_rel_list = copyObject(root->append_rel_list); |
378 | IncrementVarSublevelsUp((Node *) subroot->append_rel_list, 1, 1); |
379 | /* There shouldn't be any OJ info to translate, as yet */ |
380 | Assert(subroot->join_info_list == NIL); |
381 | /* and we haven't made equivalence classes, either */ |
382 | Assert(subroot->eq_classes == NIL); |
383 | /* and we haven't created PlaceHolderInfos, either */ |
384 | Assert(subroot->placeholder_list == NIL); |
385 | |
386 | /*---------- |
387 | * Generate modified query of the form |
388 | * (SELECT col FROM tab |
389 | * WHERE col IS NOT NULL AND existing-quals |
390 | * ORDER BY col ASC/DESC |
391 | * LIMIT 1) |
392 | *---------- |
393 | */ |
394 | /* single tlist entry that is the aggregate target */ |
395 | tle = makeTargetEntry(copyObject(mminfo->target), |
396 | (AttrNumber) 1, |
397 | pstrdup("agg_target" ), |
398 | false); |
399 | tlist = list_make1(tle); |
400 | subroot->processed_tlist = parse->targetList = tlist; |
401 | |
402 | /* No HAVING, no DISTINCT, no aggregates anymore */ |
403 | parse->havingQual = NULL; |
404 | subroot->hasHavingQual = false; |
405 | parse->distinctClause = NIL; |
406 | parse->hasDistinctOn = false; |
407 | parse->hasAggs = false; |
408 | |
409 | /* Build "target IS NOT NULL" expression */ |
410 | ntest = makeNode(NullTest); |
411 | ntest->nulltesttype = IS_NOT_NULL; |
412 | ntest->arg = copyObject(mminfo->target); |
413 | /* we checked it wasn't a rowtype in find_minmax_aggs_walker */ |
414 | ntest->argisrow = false; |
415 | ntest->location = -1; |
416 | |
417 | /* User might have had that in WHERE already */ |
418 | if (!list_member((List *) parse->jointree->quals, ntest)) |
419 | parse->jointree->quals = (Node *) |
420 | lcons(ntest, (List *) parse->jointree->quals); |
421 | |
422 | /* Build suitable ORDER BY clause */ |
423 | sortcl = makeNode(SortGroupClause); |
424 | sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist); |
425 | sortcl->eqop = eqop; |
426 | sortcl->sortop = sortop; |
427 | sortcl->nulls_first = nulls_first; |
428 | sortcl->hashable = false; /* no need to make this accurate */ |
429 | parse->sortClause = list_make1(sortcl); |
430 | |
431 | /* set up expressions for LIMIT 1 */ |
432 | parse->limitOffset = NULL; |
433 | parse->limitCount = (Node *) makeConst(INT8OID, -1, InvalidOid, |
434 | sizeof(int64), |
435 | Int64GetDatum(1), false, |
436 | FLOAT8PASSBYVAL); |
437 | |
438 | /* |
439 | * Generate the best paths for this query, telling query_planner that we |
440 | * have LIMIT 1. |
441 | */ |
442 | subroot->tuple_fraction = 1.0; |
443 | subroot->limit_tuples = 1.0; |
444 | |
445 | final_rel = query_planner(subroot, minmax_qp_callback, NULL); |
446 | |
447 | /* |
448 | * Since we didn't go through subquery_planner() to handle the subquery, |
449 | * we have to do some of the same cleanup it would do, in particular cope |
450 | * with params and initplans used within this subquery. (This won't |
451 | * matter if we end up not using the subplan.) |
452 | */ |
453 | SS_identify_outer_params(subroot); |
454 | SS_charge_for_initplans(subroot, final_rel); |
455 | |
456 | /* |
457 | * Get the best presorted path, that being the one that's cheapest for |
458 | * fetching just one row. If there's no such path, fail. |
459 | */ |
460 | if (final_rel->rows > 1.0) |
461 | path_fraction = 1.0 / final_rel->rows; |
462 | else |
463 | path_fraction = 1.0; |
464 | |
465 | sorted_path = |
466 | get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist, |
467 | subroot->query_pathkeys, |
468 | NULL, |
469 | path_fraction); |
470 | if (!sorted_path) |
471 | return false; |
472 | |
473 | /* |
474 | * The path might not return exactly what we want, so fix that. (We |
475 | * assume that this won't change any conclusions about which was the |
476 | * cheapest path.) |
477 | */ |
478 | sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path, |
479 | create_pathtarget(subroot, |
480 | subroot->processed_tlist)); |
481 | |
482 | /* |
483 | * Determine cost to get just the first row of the presorted path. |
484 | * |
485 | * Note: cost calculation here should match |
486 | * compare_fractional_path_costs(). |
487 | */ |
488 | path_cost = sorted_path->startup_cost + |
489 | path_fraction * (sorted_path->total_cost - sorted_path->startup_cost); |
490 | |
491 | /* Save state for further processing */ |
492 | mminfo->subroot = subroot; |
493 | mminfo->path = sorted_path; |
494 | mminfo->pathcost = path_cost; |
495 | |
496 | return true; |
497 | } |
498 | |
499 | /* |
500 | * Compute query_pathkeys and other pathkeys during query_planner() |
501 | */ |
502 | static void |
503 | minmax_qp_callback(PlannerInfo *root, void *) |
504 | { |
505 | root->group_pathkeys = NIL; |
506 | root->window_pathkeys = NIL; |
507 | root->distinct_pathkeys = NIL; |
508 | |
509 | root->sort_pathkeys = |
510 | make_pathkeys_for_sortclauses(root, |
511 | root->parse->sortClause, |
512 | root->parse->targetList); |
513 | |
514 | root->query_pathkeys = root->sort_pathkeys; |
515 | } |
516 | |
517 | /* |
518 | * Get the OID of the sort operator, if any, associated with an aggregate. |
519 | * Returns InvalidOid if there is no such operator. |
520 | */ |
521 | static Oid |
522 | fetch_agg_sort_op(Oid aggfnoid) |
523 | { |
524 | HeapTuple aggTuple; |
525 | Form_pg_aggregate aggform; |
526 | Oid aggsortop; |
527 | |
528 | /* fetch aggregate entry from pg_aggregate */ |
529 | aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid)); |
530 | if (!HeapTupleIsValid(aggTuple)) |
531 | return InvalidOid; |
532 | aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple); |
533 | aggsortop = aggform->aggsortop; |
534 | ReleaseSysCache(aggTuple); |
535 | |
536 | return aggsortop; |
537 | } |
538 | |