planagg.c source code [PostgreSQL/src/backend/optimizer/plan/planagg.c]

1	/-------------------------------------------------------------------------*
2	*
3	* planagg.c
4	* Special planning for aggregate queries.
5	*
6	* This module tries to replace MIN/MAX aggregate functions by subqueries
7	* of the form
8	* (SELECT col FROM tab
9	* WHERE col IS NOT NULL AND existing-quals
10	* ORDER BY col ASC/DESC
11	* LIMIT 1)
12	* Given a suitable index on tab.col, this can be much faster than the
13	* generic scan-all-the-rows aggregation plan. We can handle multiple
14	* MIN/MAX aggregates by generating multiple subqueries, and their
15	* orderings can be different. However, if the query contains any
16	* non-optimizable aggregates, there's no point since we'll have to
17	* scan all the rows anyway.
18	*
19	*
20	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
21	* Portions Copyright (c) 1994, Regents of the University of California
22	*
23	*
24	* IDENTIFICATION
25	* src/backend/optimizer/plan/planagg.c
26	*
27	*-------------------------------------------------------------------------
28	*/
29	#include "postgres.h"
30
31	#include "access/htup_details.h"
32	#include "catalog/pg_aggregate.h"
33	#include "catalog/pg_type.h"
34	#include "nodes/makefuncs.h"
35	#include "nodes/nodeFuncs.h"
36	#include "optimizer/clauses.h"
37	#include "optimizer/cost.h"
38	#include "optimizer/optimizer.h"
39	#include "optimizer/pathnode.h"
40	#include "optimizer/paths.h"
41	#include "optimizer/planmain.h"
42	#include "optimizer/subselect.h"
43	#include "optimizer/tlist.h"
44	#include "parser/parsetree.h"
45	#include "parser/parse_clause.h"
46	#include "rewrite/rewriteManip.h"
47	#include "utils/lsyscache.h"
48	#include "utils/syscache.h"
49
50
51	static bool find_minmax_aggs_walker(Node node, List *context);
52	static bool build_minmax_path(PlannerInfo root, MinMaxAggInfo mminfo,
53	Oid eqop, Oid sortop, bool nulls_first);
54	static void minmax_qp_callback(PlannerInfo root, void* *extra);
55	static Oid fetch_agg_sort_op(Oid aggfnoid);
56
57
58	/*
59	* preprocess_minmax_aggregates - preprocess MIN/MAX aggregates
60	*
61	* Check to see whether the query contains MIN/MAX aggregate functions that
62	* might be optimizable via indexscans. If it does, and all the aggregates
63	* are potentially optimizable, then create a MinMaxAggPath and add it to
64	* the (UPPERREL_GROUP_AGG, NULL) upperrel.
65	*
66	* This should be called by grouping_planner() just before it's ready to call
67	* query_planner(), because we generate indexscan paths by cloning the
68	* planner's state and invoking query_planner() on a modified version of
69	* the query parsetree. Thus, all preprocessing needed before query_planner()
70	* must already be done.
71	*/
72	void
73	preprocess_minmax_aggregates(PlannerInfo *root)
74	{
75	Query *parse = root->parse;
76	FromExpr *jtnode;
77	RangeTblRef *rtr;
78	RangeTblEntry *rte;
79	List *aggs_list;
80	RelOptInfo *grouped_rel;
81	ListCell *lc;
82
83	/ minmax_aggs list should be empty at this point /
84	Assert(root->minmax_aggs == NIL);
85
86	/ Nothing to do if query has no aggregates /
87	if (!parse->hasAggs)
88	return;
89
90	Assert(!parse->setOperations); / shouldn't get here if a setop /
91	Assert(parse->rowMarks == NIL); / nor if FOR UPDATE /
92
93	/*
94	* Reject unoptimizable cases.
95	*
96	* We don't handle GROUP BY or windowing, because our current
97	* implementations of grouping require looking at all the rows anyway, and
98	* so there's not much point in optimizing MIN/MAX.
99	*/
100	if (parse->groupClause \|\| list_length(parse->groupingSets) > `1` \|\|
101	parse->hasWindowFuncs)
102	return;
103
104	/*
105	* Reject if query contains any CTEs; there's no way to build an indexscan
106	* on one so we couldn't succeed here. (If the CTEs are unreferenced,
107	* that's not true, but it doesn't seem worth expending cycles to check.)
108	*/
109	if (parse->cteList)
110	return;
111
112	/*
113	* We also restrict the query to reference exactly one table, since join
114	* conditions can't be handled reasonably. (We could perhaps handle a
115	* query containing cartesian-product joins, but it hardly seems worth the
116	* trouble.) However, the single table could be buried in several levels
117	* of FromExpr due to subqueries. Note the "single" table could be an
118	* inheritance parent, too, including the case of a UNION ALL subquery
119	* that's been flattened to an appendrel.
120	*/
121	jtnode = parse->jointree;
122	while (IsA(jtnode, FromExpr))
123	{
124	if (list_length(jtnode->fromlist) != `1`)
125	return;
126	jtnode = linitial(jtnode->fromlist);
127	}
128	if (!IsA(jtnode, RangeTblRef))
129	return;
130	rtr = (RangeTblRef *) jtnode;
131	rte = planner_rt_fetch(rtr->rtindex, root);
132	if (rte->rtekind == RTE_RELATION)
133	/ ordinary relation, ok / ;
134	else if (rte->rtekind == RTE_SUBQUERY && rte->inh)
135	/ flattened UNION ALL subquery, ok / ;
136	else
137	return;
138
139	/*
140	* Scan the tlist and HAVING qual to find all the aggregates and verify
141	* all are MIN/MAX aggregates. Stop as soon as we find one that isn't.
142	*/
143	aggs_list = NIL;
144	if (find_minmax_aggs_walker((Node *) root->processed_tlist, &aggs_list))
145	return;
146	if (find_minmax_aggs_walker(parse->havingQual, &aggs_list))
147	return;
148
149	/*
150	* OK, there is at least the possibility of performing the optimization.
151	* Build an access path for each aggregate. If any of the aggregates
152	* prove to be non-indexable, give up; there is no point in optimizing
153	* just some of them.
154	*/
155	foreach(lc, aggs_list)
156	{
157	MinMaxAggInfo mminfo = (MinMaxAggInfo ) lfirst(lc);
158	Oid eqop;
159	bool reverse;
160
161	/*
162	* We'll need the equality operator that goes with the aggregate's
163	* ordering operator.
164	*/
165	eqop = get_equality_op_for_ordering_op(mminfo->aggsortop, &reverse);
166	if (!OidIsValid(eqop)) / shouldn't happen /
167	elog(ERROR, "could not find equality operator for ordering operator %u",
168	mminfo->aggsortop);
169
170	/*
171	* We can use either an ordering that gives NULLS FIRST or one that
172	* gives NULLS LAST; furthermore there's unlikely to be much
173	* performance difference between them, so it doesn't seem worth
174	* costing out both ways if we get a hit on the first one. NULLS
175	* FIRST is more likely to be available if the operator is a
176	* reverse-sort operator, so try that first if reverse.
177	*/
178	if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, reverse))
179	continue;
180	if (build_minmax_path(root, mminfo, eqop, mminfo->aggsortop, !reverse))
181	continue;
182
183	/ No indexable path for this aggregate, so fail /
184	return;
185	}
186
187	/*
188	* OK, we can do the query this way. Prepare to create a MinMaxAggPath
189	* node.
190	*
191	* First, create an output Param node for each agg. (If we end up not
192	* using the MinMaxAggPath, we'll waste a PARAM_EXEC slot for each agg,
193	* which is not worth worrying about. We can't wait till create_plan time
194	* to decide whether to make the Param, unfortunately.)
195	*/
196	foreach(lc, aggs_list)
197	{
198	MinMaxAggInfo mminfo = (MinMaxAggInfo ) lfirst(lc);
199
200	mminfo->param =
201	SS_make_initplan_output_param(root,
202	exprType((Node *) mminfo->target),
203	-`1`,
204	exprCollation((Node *) mminfo->target));
205	}
206
207	/*
208	* Create a MinMaxAggPath node with the appropriate estimated costs and
209	* other needed data, and add it to the UPPERREL_GROUP_AGG upperrel, where
210	* it will compete against the standard aggregate implementation. (It
211	* will likely always win, but we need not assume that here.)
212	*
213	* Note: grouping_planner won't have created this upperrel yet, but it's
214	* fine for us to create it first. We will not have inserted the correct
215	* consider_parallel value in it, but MinMaxAggPath paths are currently
216	* never parallel-safe anyway, so that doesn't matter. Likewise, it
217	* doesn't matter that we haven't filled FDW-related fields in the rel.
218	* Also, because there are no rowmarks, we know that the processed_tlist
219	* doesn't need to change anymore, so making the pathtarget now is safe.
220	*/
221	grouped_rel = fetch_upper_rel(root, UPPERREL_GROUP_AGG, NULL);
222	add_path(grouped_rel, (Path *)
223	create_minmaxagg_path(root, grouped_rel,
224	create_pathtarget(root,
225	root->processed_tlist),
226	aggs_list,
227	(List *) parse->havingQual));
228	}
229
230	/*
231	* find_minmax_aggs_walker
232	* Recursively scan the Aggref nodes in an expression tree, and check
233	* that each one is a MIN/MAX aggregate. If so, build a list of the
234	* distinct aggregate calls in the tree.
235	*
236	* Returns true if a non-MIN/MAX aggregate is found, false otherwise.
237	* (This seemingly-backward definition is used because expression_tree_walker
238	* aborts the scan on true return, which is what we want.)
239	*
240	* Found aggregates are added to the list at *context; it's up to the caller
241	* to initialize the list to NIL.
242	*
243	* This does not descend into subqueries, and so should be used only after
244	* reduction of sublinks to subplans. There mustn't be outer-aggregate
245	* references either.
246	*/
247	static bool
248	find_minmax_aggs_walker(Node node, List *context)
249	{
250	if (node == NULL)
251	return false;
252	if (IsA(node, Aggref))
253	{
254	Aggref aggref = (Aggref ) node;
255	Oid aggsortop;
256	TargetEntry *curTarget;
257	MinMaxAggInfo *mminfo;
258	ListCell *l;
259
260	Assert(aggref->agglevelsup == `0`);
261	if (list_length(aggref->args) != `1`)
262	return true; / it couldn't be MIN/MAX /
263
264	/*
265	* ORDER BY is usually irrelevant for MIN/MAX, but it can change the
266	* outcome if the aggsortop's operator class recognizes non-identical
267	* values as equal. For example, 4.0 and 4.00 are equal according to
268	* numeric_ops, yet distinguishable. If MIN() receives more than one
269	* value equal to 4.0 and no value less than 4.0, it is unspecified
270	* which of those equal values MIN() returns. An ORDER BY expression
271	* that differs for each of those equal values of the argument
272	* expression makes the result predictable once again. This is a
273	* niche requirement, and we do not implement it with subquery paths.
274	* In any case, this test lets us reject ordered-set aggregates
275	* quickly.
276	*/
277	if (aggref->aggorder != NIL)
278	return true;
279	/ note: we do not care if DISTINCT is mentioned ... /
280
281	/*
282	* We might implement the optimization when a FILTER clause is present
283	* by adding the filter to the quals of the generated subquery. For
284	* now, just punt.
285	*/
286	if (aggref->aggfilter != NULL)
287	return true;
288
289	aggsortop = fetch_agg_sort_op(aggref->aggfnoid);
290	if (!OidIsValid(aggsortop))
291	return true; / not a MIN/MAX aggregate /
292
293	curTarget = (TargetEntry *) linitial(aggref->args);
294
295	if (contain_mutable_functions((Node *) curTarget->expr))
296	return true; / not potentially indexable /
297
298	if (type_is_rowtype(exprType((Node *) curTarget->expr)))
299	return true; / IS NOT NULL would have weird semantics /
300
301	/*
302	* Check whether it's already in the list, and add it if not.
303	*/
304	foreach(l, *context)
305	{
306	mminfo = (MinMaxAggInfo *) lfirst(l);
307	if (mminfo->aggfnoid == aggref->aggfnoid &&
308	equal(mminfo->target, curTarget->expr))
309	return false;
310	}
311
312	mminfo = makeNode(MinMaxAggInfo);
313	mminfo->aggfnoid = aggref->aggfnoid;
314	mminfo->aggsortop = aggsortop;
315	mminfo->target = curTarget->expr;
316	mminfo->subroot = NULL; / don't compute path yet /
317	mminfo->path = NULL;
318	mminfo->pathcost = `0`;
319	mminfo->param = NULL;
320
321	context = lappend(context, mminfo);
322
323	/*
324	* We need not recurse into the argument, since it can't contain any
325	* aggregates.
326	*/
327	return false;
328	}
329	Assert(!IsA(node, SubLink));
330	return expression_tree_walker(node, find_minmax_aggs_walker,
331	(void *) context);
332	}
333
334	/*
335	* build_minmax_path
336	* Given a MIN/MAX aggregate, try to build an indexscan Path it can be
337	* optimized with.
338	*
339	* If successful, stash the best path in *mminfo and return true.
340	* Otherwise, return false.
341	*/
342	static bool
343	build_minmax_path(PlannerInfo root, MinMaxAggInfo mminfo,
344	Oid eqop, Oid sortop, bool nulls_first)
345	{
346	PlannerInfo *subroot;
347	Query *parse;
348	TargetEntry *tle;
349	List *tlist;
350	NullTest *ntest;
351	SortGroupClause *sortcl;
352	RelOptInfo *final_rel;
353	Path *sorted_path;
354	Cost path_cost;
355	double path_fraction;
356
357	/*
358	* We are going to construct what is effectively a sub-SELECT query, so
359	* clone the current query level's state and adjust it to make it look
360	* like a subquery. Any outer references will now be one level higher
361	* than before. (This means that when we are done, there will be no Vars
362	* of level 1, which is why the subquery can become an initplan.)
363	*/
364	subroot = (PlannerInfo ) palloc(sizeof*(PlannerInfo));
365	memcpy(subroot, root, sizeof(PlannerInfo));
366	subroot->query_level++;
367	subroot->parent_root = root;
368	/ reset subplan-related stuff /
369	subroot->plan_params = NIL;
370	subroot->outer_params = NULL;
371	subroot->init_plans = NIL;
372
373	subroot->parse = parse = copyObject(root->parse);
374	IncrementVarSublevelsUp((Node *) parse, `1`, `1`);
375
376	/ append_rel_list might contain outer Vars? /
377	subroot->append_rel_list = copyObject(root->append_rel_list);
378	IncrementVarSublevelsUp((Node *) subroot->append_rel_list, `1`, `1`);
379	/ There shouldn't be any OJ info to translate, as yet /
380	Assert(subroot->join_info_list == NIL);
381	/ and we haven't made equivalence classes, either /
382	Assert(subroot->eq_classes == NIL);
383	/ and we haven't created PlaceHolderInfos, either /
384	Assert(subroot->placeholder_list == NIL);
385
386	/----------*
387	* Generate modified query of the form
388	* (SELECT col FROM tab
389	* WHERE col IS NOT NULL AND existing-quals
390	* ORDER BY col ASC/DESC
391	* LIMIT 1)
392	*----------
393	*/
394	/ single tlist entry that is the aggregate target /
395	tle = makeTargetEntry(copyObject(mminfo->target),
396	(AttrNumber) `1`,
397	pstrdup("agg_target"),
398	false);
399	tlist = list_make1(tle);
400	subroot->processed_tlist = parse->targetList = tlist;
401
402	/ No HAVING, no DISTINCT, no aggregates anymore /
403	parse->havingQual = NULL;
404	subroot->hasHavingQual = false;
405	parse->distinctClause = NIL;
406	parse->hasDistinctOn = false;
407	parse->hasAggs = false;
408
409	/ Build "target IS NOT NULL" expression /
410	ntest = makeNode(NullTest);
411	ntest->nulltesttype = IS_NOT_NULL;
412	ntest->arg = copyObject(mminfo->target);
413	/ we checked it wasn't a rowtype in find_minmax_aggs_walker /
414	ntest->argisrow = false;
415	ntest->location = -`1`;
416
417	/ User might have had that in WHERE already /
418	if (!list_member((List *) parse->jointree->quals, ntest))
419	parse->jointree->quals = (Node *)
420	lcons(ntest, (List *) parse->jointree->quals);
421
422	/ Build suitable ORDER BY clause /
423	sortcl = makeNode(SortGroupClause);
424	sortcl->tleSortGroupRef = assignSortGroupRef(tle, subroot->processed_tlist);
425	sortcl->eqop = eqop;
426	sortcl->sortop = sortop;
427	sortcl->nulls_first = nulls_first;
428	sortcl->hashable = false; / no need to make this accurate /
429	parse->sortClause = list_make1(sortcl);
430
431	/ set up expressions for LIMIT 1 /
432	parse->limitOffset = NULL;
433	parse->limitCount = (Node *) makeConst(INT8OID, -`1`, InvalidOid,
434	sizeof(int64),
435	Int64GetDatum(`1`), false,
436	FLOAT8PASSBYVAL);
437
438	/*
439	* Generate the best paths for this query, telling query_planner that we
440	* have LIMIT 1.
441	*/
442	subroot->tuple_fraction = `1.0`;
443	subroot->limit_tuples = `1.0`;
444
445	final_rel = query_planner(subroot, minmax_qp_callback, NULL);
446
447	/*
448	* Since we didn't go through subquery_planner() to handle the subquery,
449	* we have to do some of the same cleanup it would do, in particular cope
450	* with params and initplans used within this subquery. (This won't
451	* matter if we end up not using the subplan.)
452	*/
453	SS_identify_outer_params(subroot);
454	SS_charge_for_initplans(subroot, final_rel);
455
456	/*
457	* Get the best presorted path, that being the one that's cheapest for
458	* fetching just one row. If there's no such path, fail.
459	*/
460	if (final_rel->rows > `1.0`)
461	path_fraction = `1.0` / final_rel->rows;
462	else
463	path_fraction = `1.0`;
464
465	sorted_path =
466	get_cheapest_fractional_path_for_pathkeys(final_rel->pathlist,
467	subroot->query_pathkeys,
468	NULL,
469	path_fraction);
470	if (!sorted_path)
471	return false;
472
473	/*
474	* The path might not return exactly what we want, so fix that. (We
475	* assume that this won't change any conclusions about which was the
476	* cheapest path.)
477	*/
478	sorted_path = apply_projection_to_path(subroot, final_rel, sorted_path,
479	create_pathtarget(subroot,
480	subroot->processed_tlist));
481
482	/*
483	* Determine cost to get just the first row of the presorted path.
484	*
485	* Note: cost calculation here should match
486	* compare_fractional_path_costs().
487	*/
488	path_cost = sorted_path->startup_cost +
489	path_fraction * (sorted_path->total_cost - sorted_path->startup_cost);
490
491	/ Save state for further processing /
492	mminfo->subroot = subroot;
493	mminfo->path = sorted_path;
494	mminfo->pathcost = path_cost;
495
496	return true;
497	}
498
499	/*
500	* Compute query_pathkeys and other pathkeys during query_planner()
501	*/
502	static void
503	minmax_qp_callback(PlannerInfo root, void* *extra)
504	{
505	root->group_pathkeys = NIL;
506	root->window_pathkeys = NIL;
507	root->distinct_pathkeys = NIL;
508
509	root->sort_pathkeys =
510	make_pathkeys_for_sortclauses(root,
511	root->parse->sortClause,
512	root->parse->targetList);
513
514	root->query_pathkeys = root->sort_pathkeys;
515	}
516
517	/*
518	* Get the OID of the sort operator, if any, associated with an aggregate.
519	* Returns InvalidOid if there is no such operator.
520	*/
521	static Oid
522	fetch_agg_sort_op(Oid aggfnoid)
523	{
524	HeapTuple aggTuple;
525	Form_pg_aggregate aggform;
526	Oid aggsortop;
527
528	/ fetch aggregate entry from pg_aggregate /
529	aggTuple = SearchSysCache1(AGGFNOID, ObjectIdGetDatum(aggfnoid));
530	if (!HeapTupleIsValid(aggTuple))
531	return InvalidOid;
532	aggform = (Form_pg_aggregate) GETSTRUCT(aggTuple);
533	aggsortop = aggform->aggsortop;
534	ReleaseSysCache(aggTuple);
535
536	return aggsortop;
537	}
538

Browse the source code of PostgreSQL/src/backend/optimizer/plan/planagg.c