prepunion.c source code [PostgreSQL/src/backend/optimizer/prep/prepunion.c]

1	/-------------------------------------------------------------------------*
2	*
3	* prepunion.c
4	* Routines to plan set-operation queries. The filename is a leftover
5	* from a time when only UNIONs were implemented.
6	*
7	* There are two code paths in the planner for set-operation queries.
8	* If a subquery consists entirely of simple UNION ALL operations, it
9	* is converted into an "append relation". Otherwise, it is handled
10	* by the general code in this module (plan_set_operations and its
11	* subroutines). There is some support code here for the append-relation
12	* case, but most of the heavy lifting for that is done elsewhere,
13	* notably in prepjointree.c and allpaths.c.
14	*
15	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
16	* Portions Copyright (c) 1994, Regents of the University of California
17	*
18	*
19	* IDENTIFICATION
20	* src/backend/optimizer/prep/prepunion.c
21	*
22	*-------------------------------------------------------------------------
23	*/
24	#include "postgres.h"
25
26	#include "access/htup_details.h"
27	#include "access/sysattr.h"
28	#include "catalog/partition.h"
29	#include "catalog/pg_inherits.h"
30	#include "catalog/pg_type.h"
31	#include "miscadmin.h"
32	#include "nodes/makefuncs.h"
33	#include "nodes/nodeFuncs.h"
34	#include "optimizer/cost.h"
35	#include "optimizer/pathnode.h"
36	#include "optimizer/paths.h"
37	#include "optimizer/planmain.h"
38	#include "optimizer/planner.h"
39	#include "optimizer/prep.h"
40	#include "optimizer/tlist.h"
41	#include "parser/parse_coerce.h"
42	#include "parser/parsetree.h"
43	#include "utils/lsyscache.h"
44	#include "utils/rel.h"
45	#include "utils/selfuncs.h"
46	#include "utils/syscache.h"
47
48
49	static RelOptInfo recurse_set_operations(Node setOp, PlannerInfo *root,
50	List colTypes, List colCollations,
51	bool junkOK,
52	int flag, List *refnames_tlist,
53	List **pTargetList,
54	double *pNumGroups);
55	static RelOptInfo generate_recursion_path(SetOperationStmt setOp,
56	PlannerInfo *root,
57	List *refnames_tlist,
58	List **pTargetList);
59	static RelOptInfo generate_union_paths(SetOperationStmt op, PlannerInfo *root,
60	List *refnames_tlist,
61	List **pTargetList);
62	static RelOptInfo generate_nonunion_paths(SetOperationStmt op, PlannerInfo *root,
63	List *refnames_tlist,
64	List **pTargetList);
65	static List plan_union_children(PlannerInfo root,
66	SetOperationStmt *top_union,
67	List *refnames_tlist,
68	List **tlist_list);
69	static Path make_union_unique(SetOperationStmt op, Path path, List tlist,
70	PlannerInfo *root);
71	static void postprocess_setop_rel(PlannerInfo root, RelOptInfo rel);
72	static bool choose_hashed_setop(PlannerInfo root, List groupClauses,
73	Path *input_path,
74	double dNumGroups, double dNumOutputRows,
75	const char *construct);
76	static List generate_setop_tlist(List colTypes, List *colCollations,
77	int flag,
78	Index varno,
79	bool hack_constants,
80	List *input_tlist,
81	List *refnames_tlist);
82	static List generate_append_tlist(List colTypes, List *colCollations,
83	bool flag,
84	List *input_tlists,
85	List *refnames_tlist);
86	static List generate_setop_grouplist(SetOperationStmt op, List *targetlist);
87
88
89	/*
90	* plan_set_operations
91	*
92	* Plans the queries for a tree of set operations (UNION/INTERSECT/EXCEPT)
93	*
94	* This routine only deals with the setOperations tree of the given query.
95	* Any top-level ORDER BY requested in root->parse->sortClause will be handled
96	* when we return to grouping_planner; likewise for LIMIT.
97	*
98	* What we return is an "upperrel" RelOptInfo containing at least one Path
99	* that implements the set-operation tree. In addition, root->processed_tlist
100	* receives a targetlist representing the output of the topmost setop node.
101	*/
102	RelOptInfo *
103	plan_set_operations(PlannerInfo *root)
104	{
105	Query *parse = root->parse;
106	SetOperationStmt *topop = castNode(SetOperationStmt, parse->setOperations);
107	Node *node;
108	RangeTblEntry *leftmostRTE;
109	Query *leftmostQuery;
110	RelOptInfo *setop_rel;
111	List *top_tlist;
112
113	Assert(topop);
114
115	/ check for unsupported stuff /
116	Assert(parse->jointree->fromlist == NIL);
117	Assert(parse->jointree->quals == NULL);
118	Assert(parse->groupClause == NIL);
119	Assert(parse->havingQual == NULL);
120	Assert(parse->windowClause == NIL);
121	Assert(parse->distinctClause == NIL);
122
123	/*
124	* We'll need to build RelOptInfos for each of the leaf subqueries, which
125	* are RTE_SUBQUERY rangetable entries in this Query. Prepare the index
126	* arrays for that.
127	*/
128	setup_simple_rel_arrays(root);
129
130	/*
131	* Populate append_rel_array with each AppendRelInfo to allow direct
132	* lookups by child relid.
133	*/
134	setup_append_rel_array(root);
135
136	/*
137	* Find the leftmost component Query. We need to use its column names for
138	* all generated tlists (else SELECT INTO won't work right).
139	*/
140	node = topop->larg;
141	while (node && IsA(node, SetOperationStmt))
142	node = ((SetOperationStmt *) node)->larg;
143	Assert(node && IsA(node, RangeTblRef));
144	leftmostRTE = root->simple_rte_array[((RangeTblRef *) node)->rtindex];
145	leftmostQuery = leftmostRTE->subquery;
146	Assert(leftmostQuery != NULL);
147
148	/*
149	* If the topmost node is a recursive union, it needs special processing.
150	*/
151	if (root->hasRecursion)
152	{
153	setop_rel = generate_recursion_path(topop, root,
154	leftmostQuery->targetList,
155	&top_tlist);
156	}
157	else
158	{
159	/*
160	* Recurse on setOperations tree to generate paths for set ops. The
161	* final output paths should have just the column types shown as the
162	* output from the top-level node, plus possibly resjunk working
163	* columns (we can rely on upper-level nodes to deal with that).
164	*/
165	setop_rel = recurse_set_operations((Node *) topop, root,
166	topop->colTypes, topop->colCollations,
167	true, -`1`,
168	leftmostQuery->targetList,
169	&top_tlist,
170	NULL);
171	}
172
173	/ Must return the built tlist into root->processed_tlist. /
174	root->processed_tlist = top_tlist;
175
176	return setop_rel;
177	}
178
179	/*
180	* recurse_set_operations
181	* Recursively handle one step in a tree of set operations
182	*
183	* colTypes: OID list of set-op's result column datatypes
184	* colCollations: OID list of set-op's result column collations
185	* junkOK: if true, child resjunk columns may be left in the result
186	* flag: if >= 0, add a resjunk output column indicating value of flag
187	* refnames_tlist: targetlist to take column names from
188	*
189	* Returns a RelOptInfo for the subtree, as well as these output parameters:
190	* *pTargetList: receives the fully-fledged tlist for the subtree's top plan
191	* *pNumGroups: if not NULL, we estimate the number of distinct groups
192	* in the result, and store it there
193	*
194	* The pTargetList output parameter is mostly redundant with the pathtarget
195	* of the returned RelOptInfo, but for the moment we need it because much of
196	* the logic in this file depends on flag columns being marked resjunk.
197	* Pending a redesign of how that works, this is the easy way out.
198	*
199	* We don't have to care about typmods here: the only allowed difference
200	* between set-op input and output typmods is input is a specific typmod
201	* and output is -1, and that does not require a coercion.
202	*/
203	static RelOptInfo *
204	recurse_set_operations(Node setOp, PlannerInfo root,
205	List colTypes, List colCollations,
206	bool junkOK,
207	int flag, List *refnames_tlist,
208	List **pTargetList,
209	double *pNumGroups)
210	{
211	RelOptInfo rel = NULL; /* keep compiler quiet /
212
213	/ Guard against stack overflow due to overly complex setop nests /
214	check_stack_depth();
215
216	if (IsA(setOp, RangeTblRef))
217	{
218	RangeTblRef rtr = (RangeTblRef ) setOp;
219	RangeTblEntry *rte = root->simple_rte_array[rtr->rtindex];
220	Query *subquery = rte->subquery;
221	PlannerInfo *subroot;
222	RelOptInfo *final_rel;
223	Path *subpath;
224	Path *path;
225	List *tlist;
226
227	Assert(subquery != NULL);
228
229	/ Build a RelOptInfo for this leaf subquery. /
230	rel = build_simple_rel(root, rtr->rtindex, NULL);
231
232	/ plan_params should not be in use in current query level /
233	Assert(root->plan_params == NIL);
234
235	/ Generate a subroot and Paths for the subquery /
236	subroot = rel->subroot = subquery_planner(root->glob, subquery,
237	root,
238	false,
239	root->tuple_fraction);
240
241	/*
242	* It should not be possible for the primitive query to contain any
243	* cross-references to other primitive queries in the setop tree.
244	*/
245	if (root->plan_params)
246	elog(ERROR, "unexpected outer reference in set operation subquery");
247
248	/ Figure out the appropriate target list for this subquery. /
249	tlist = generate_setop_tlist(colTypes, colCollations,
250	flag,
251	rtr->rtindex,
252	true,
253	subroot->processed_tlist,
254	refnames_tlist);
255	rel->reltarget = create_pathtarget(root, tlist);
256
257	/ Return the fully-fledged tlist to caller, too /
258	*pTargetList = tlist;
259
260	/*
261	* Mark rel with estimated output rows, width, etc. Note that we have
262	* to do this before generating outer-query paths, else
263	* cost_subqueryscan is not happy.
264	*/
265	set_subquery_size_estimates(root, rel);
266
267	/*
268	* Since we may want to add a partial path to this relation, we must
269	* set its consider_parallel flag correctly.
270	*/
271	final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL);
272	rel->consider_parallel = final_rel->consider_parallel;
273
274	/*
275	* For the moment, we consider only a single Path for the subquery.
276	* This should change soon (make it look more like
277	* set_subquery_pathlist).
278	*/
279	subpath = get_cheapest_fractional_path(final_rel,
280	root->tuple_fraction);
281
282	/*
283	* Stick a SubqueryScanPath atop that.
284	*
285	* We don't bother to determine the subquery's output ordering since
286	* it won't be reflected in the set-op result anyhow; so just label
287	* the SubqueryScanPath with nil pathkeys. (XXX that should change
288	* soon too, likely.)
289	*/
290	path = (Path *) create_subqueryscan_path(root, rel, subpath,
291	NIL, NULL);
292
293	add_path(rel, path);
294
295	/*
296	* If we have a partial path for the child relation, we can use that
297	* to build a partial path for this relation. But there's no point in
298	* considering any path but the cheapest.
299	*/
300	if (rel->consider_parallel && bms_is_empty(rel->lateral_relids) &&
301	final_rel->partial_pathlist != NIL)
302	{
303	Path *partial_subpath;
304	Path *partial_path;
305
306	partial_subpath = linitial(final_rel->partial_pathlist);
307	partial_path = (Path *)
308	create_subqueryscan_path(root, rel, partial_subpath,
309	NIL, NULL);
310	add_partial_path(rel, partial_path);
311	}
312
313	/*
314	* Estimate number of groups if caller wants it. If the subquery used
315	* grouping or aggregation, its output is probably mostly unique
316	* anyway; otherwise do statistical estimation.
317	*
318	* XXX you don't really want to know about this: we do the estimation
319	* using the subquery's original targetlist expressions, not the
320	* subroot->processed_tlist which might seem more appropriate. The
321	* reason is that if the subquery is itself a setop, it may return a
322	* processed_tlist containing "varno 0" Vars generated by
323	* generate_append_tlist, and those would confuse estimate_num_groups
324	* mightily. We ought to get rid of the "varno 0" hack, but that
325	* requires a redesign of the parsetree representation of setops, so
326	* that there can be an RTE corresponding to each setop's output.
327	*/
328	if (pNumGroups)
329	{
330	if (subquery->groupClause \|\| subquery->groupingSets \|\|
331	subquery->distinctClause \|\|
332	subroot->hasHavingQual \|\| subquery->hasAggs)
333	*pNumGroups = subpath->rows;
334	else
335	*pNumGroups = estimate_num_groups(subroot,
336	get_tlist_exprs(subquery->targetList, false),
337	subpath->rows,
338	NULL);
339	}
340	}
341	else if (IsA(setOp, SetOperationStmt))
342	{
343	SetOperationStmt op = (SetOperationStmt ) setOp;
344
345	/ UNIONs are much different from INTERSECT/EXCEPT /
346	if (op->op == SETOP_UNION)
347	rel = generate_union_paths(op, root,
348	refnames_tlist,
349	pTargetList);
350	else
351	rel = generate_nonunion_paths(op, root,
352	refnames_tlist,
353	pTargetList);
354	if (pNumGroups)
355	*pNumGroups = rel->rows;
356
357	/*
358	* If necessary, add a Result node to project the caller-requested
359	* output columns.
360	*
361	* XXX you don't really want to know about this: setrefs.c will apply
362	* fix_upper_expr() to the Result node's tlist. This would fail if the
363	* Vars generated by generate_setop_tlist() were not exactly equal()
364	* to the corresponding tlist entries of the subplan. However, since
365	* the subplan was generated by generate_union_plan() or
366	* generate_nonunion_plan(), and hence its tlist was generated by
367	* generate_append_tlist(), this will work. We just tell
368	* generate_setop_tlist() to use varno 0.
369	*/
370	if (flag >= `0` \|\|
371	!tlist_same_datatypes(*pTargetList, colTypes, junkOK) \|\|
372	!tlist_same_collations(*pTargetList, colCollations, junkOK))
373	{
374	PathTarget *target;
375	ListCell *lc;
376
377	*pTargetList = generate_setop_tlist(colTypes, colCollations,
378	flag,
379	`0`,
380	false,
381	*pTargetList,
382	refnames_tlist);
383	target = create_pathtarget(root, *pTargetList);
384
385	/ Apply projection to each path /
386	foreach(lc, rel->pathlist)
387	{
388	Path subpath = (Path ) lfirst(lc);
389	Path *path;
390
391	Assert(subpath->param_info == NULL);
392	path = apply_projection_to_path(root, subpath->parent,
393	subpath, target);
394	/ If we had to add a Result, path is different from subpath /
395	if (path != subpath)
396	lfirst(lc) = path;
397	}
398
399	/ Apply projection to each partial path /
400	foreach(lc, rel->partial_pathlist)
401	{
402	Path subpath = (Path ) lfirst(lc);
403	Path *path;
404
405	Assert(subpath->param_info == NULL);
406
407	/ avoid apply_projection_to_path, in case of multiple refs /
408	path = (Path *) create_projection_path(root, subpath->parent,
409	subpath, target);
410	lfirst(lc) = path;
411	}
412	}
413	}
414	else
415	{
416	elog(ERROR, "unrecognized node type: %d",
417	(int) nodeTag(setOp));
418	*pTargetList = NIL;
419	}
420
421	postprocess_setop_rel(root, rel);
422
423	return rel;
424	}
425
426	/*
427	* Generate paths for a recursive UNION node
428	*/
429	static RelOptInfo *
430	generate_recursion_path(SetOperationStmt setOp, PlannerInfo root,
431	List *refnames_tlist,
432	List **pTargetList)
433	{
434	RelOptInfo *result_rel;
435	Path *path;
436	RelOptInfo *lrel,
437	*rrel;
438	Path *lpath;
439	Path *rpath;
440	List *lpath_tlist;
441	List *rpath_tlist;
442	List *tlist;
443	List *groupList;
444	double dNumGroups;
445
446	/ Parser should have rejected other cases /
447	if (setOp->op != SETOP_UNION)
448	elog(ERROR, "only UNION queries can be recursive");
449	/ Worktable ID should be assigned /
450	Assert(root->wt_param_id >= `0`);
451
452	/*
453	* Unlike a regular UNION node, process the left and right inputs
454	* separately without any intention of combining them into one Append.
455	*/
456	lrel = recurse_set_operations(setOp->larg, root,
457	setOp->colTypes, setOp->colCollations,
458	false, -`1`,
459	refnames_tlist,
460	&lpath_tlist,
461	NULL);
462	lpath = lrel->cheapest_total_path;
463	/ The right path will want to look at the left one ... /
464	root->non_recursive_path = lpath;
465	rrel = recurse_set_operations(setOp->rarg, root,
466	setOp->colTypes, setOp->colCollations,
467	false, -`1`,
468	refnames_tlist,
469	&rpath_tlist,
470	NULL);
471	rpath = rrel->cheapest_total_path;
472	root->non_recursive_path = NULL;
473
474	/*
475	* Generate tlist for RecursiveUnion path node --- same as in Append cases
476	*/
477	tlist = generate_append_tlist(setOp->colTypes, setOp->colCollations, false,
478	list_make2(lpath_tlist, rpath_tlist),
479	refnames_tlist);
480
481	*pTargetList = tlist;
482
483	/ Build result relation. /
484	result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
485	bms_union(lrel->relids, rrel->relids));
486	result_rel->reltarget = create_pathtarget(root, tlist);
487
488	/*
489	* If UNION, identify the grouping operators
490	*/
491	if (setOp->all)
492	{
493	groupList = NIL;
494	dNumGroups = `0`;
495	}
496	else
497	{
498	/ Identify the grouping semantics /
499	groupList = generate_setop_grouplist(setOp, tlist);
500
501	/ We only support hashing here /
502	if (!grouping_is_hashable(groupList))
503	ereport(ERROR,
504	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
505	errmsg("could not implement recursive UNION"),
506	errdetail("All column datatypes must be hashable.")));
507
508	/*
509	* For the moment, take the number of distinct groups as equal to the
510	* total input size, ie, the worst case.
511	*/
512	dNumGroups = lpath->rows + rpath->rows * `10`;
513	}
514
515	/*
516	* And make the path node.
517	*/
518	path = (Path *) create_recursiveunion_path(root,
519	result_rel,
520	lpath,
521	rpath,
522	result_rel->reltarget,
523	groupList,
524	root->wt_param_id,
525	dNumGroups);
526
527	add_path(result_rel, path);
528	postprocess_setop_rel(root, result_rel);
529	return result_rel;
530	}
531
532	/*
533	* Generate paths for a UNION or UNION ALL node
534	*/
535	static RelOptInfo *
536	generate_union_paths(SetOperationStmt op, PlannerInfo root,
537	List *refnames_tlist,
538	List **pTargetList)
539	{
540	Relids relids = NULL;
541	RelOptInfo *result_rel;
542	double save_fraction = root->tuple_fraction;
543	ListCell *lc;
544	List *pathlist = NIL;
545	List *partial_pathlist = NIL;
546	bool partial_paths_valid = true;
547	bool consider_parallel = true;
548	List *rellist;
549	List *tlist_list;
550	List *tlist;
551	Path *path;
552
553	/*
554	* If plain UNION, tell children to fetch all tuples.
555	*
556	* Note: in UNION ALL, we pass the top-level tuple_fraction unmodified to
557	* each arm of the UNION ALL. One could make a case for reducing the
558	* tuple fraction for later arms (discounting by the expected size of the
559	* earlier arms' results) but it seems not worth the trouble. The normal
560	* case where tuple_fraction isn't already zero is a LIMIT at top level,
561	* and passing it down as-is is usually enough to get the desired result
562	* of preferring fast-start plans.
563	*/
564	if (!op->all)
565	root->tuple_fraction = `0.0`;
566
567	/*
568	* If any of my children are identical UNION nodes (same op, all-flag, and
569	* colTypes) then they can be merged into this node so that we generate
570	* only one Append and unique-ification for the lot. Recurse to find such
571	* nodes and compute their children's paths.
572	*/
573	rellist = plan_union_children(root, op, refnames_tlist, &tlist_list);
574
575	/*
576	* Generate tlist for Append plan node.
577	*
578	* The tlist for an Append plan isn't important as far as the Append is
579	* concerned, but we must make it look real anyway for the benefit of the
580	* next plan level up.
581	*/
582	tlist = generate_append_tlist(op->colTypes, op->colCollations, false,
583	tlist_list, refnames_tlist);
584
585	*pTargetList = tlist;
586
587	/ Build path lists and relid set. /
588	foreach(lc, rellist)
589	{
590	RelOptInfo *rel = lfirst(lc);
591
592	pathlist = lappend(pathlist, rel->cheapest_total_path);
593
594	if (consider_parallel)
595	{
596	if (!rel->consider_parallel)
597	{
598	consider_parallel = false;
599	partial_paths_valid = false;
600	}
601	else if (rel->partial_pathlist == NIL)
602	partial_paths_valid = false;
603	else
604	partial_pathlist = lappend(partial_pathlist,
605	linitial(rel->partial_pathlist));
606	}
607
608	relids = bms_union(relids, rel->relids);
609	}
610
611	/ Build result relation. /
612	result_rel = fetch_upper_rel(root, UPPERREL_SETOP, relids);
613	result_rel->reltarget = create_pathtarget(root, tlist);
614	result_rel->consider_parallel = consider_parallel;
615
616	/*
617	* Append the child results together.
618	*/
619	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
620	NIL, NULL, `0`, false, NIL, -`1`);
621
622	/*
623	* For UNION ALL, we just need the Append path. For UNION, need to add
624	* node(s) to remove duplicates.
625	*/
626	if (!op->all)
627	path = make_union_unique(op, path, tlist, root);
628
629	add_path(result_rel, path);
630
631	/*
632	* Estimate number of groups. For now we just assume the output is unique
633	* --- this is certainly true for the UNION case, and we want worst-case
634	* estimates anyway.
635	*/
636	result_rel->rows = path->rows;
637
638	/*
639	* Now consider doing the same thing using the partial paths plus Append
640	* plus Gather.
641	*/
642	if (partial_paths_valid)
643	{
644	Path *ppath;
645	ListCell *lc;
646	int parallel_workers = `0`;
647
648	/ Find the highest number of workers requested for any subpath. /
649	foreach(lc, partial_pathlist)
650	{
651	Path *path = lfirst(lc);
652
653	parallel_workers = Max(parallel_workers, path->parallel_workers);
654	}
655	Assert(parallel_workers > `0`);
656
657	/*
658	* If the use of parallel append is permitted, always request at least
659	* log2(# of children) paths. We assume it can be useful to have
660	* extra workers in this case because they will be spread out across
661	* the children. The precise formula is just a guess; see
662	* add_paths_to_append_rel.
663	*/
664	if (enable_parallel_append)
665	{
666	parallel_workers = Max(parallel_workers,
667	fls(list_length(partial_pathlist)));
668	parallel_workers = Min(parallel_workers,
669	max_parallel_workers_per_gather);
670	}
671	Assert(parallel_workers > `0`);
672
673	ppath = (Path *)
674	create_append_path(root, result_rel, NIL, partial_pathlist,
675	NIL, NULL,
676	parallel_workers, enable_parallel_append,
677	NIL, -`1`);
678	ppath = (Path *)
679	create_gather_path(root, result_rel, ppath,
680	result_rel->reltarget, NULL, NULL);
681	if (!op->all)
682	ppath = make_union_unique(op, ppath, tlist, root);
683	add_path(result_rel, ppath);
684	}
685
686	/ Undo effects of possibly forcing tuple_fraction to 0 /
687	root->tuple_fraction = save_fraction;
688
689	return result_rel;
690	}
691
692	/*
693	* Generate paths for an INTERSECT, INTERSECT ALL, EXCEPT, or EXCEPT ALL node
694	*/
695	static RelOptInfo *
696	generate_nonunion_paths(SetOperationStmt op, PlannerInfo root,
697	List *refnames_tlist,
698	List **pTargetList)
699	{
700	RelOptInfo *result_rel;
701	RelOptInfo *lrel,
702	*rrel;
703	double save_fraction = root->tuple_fraction;
704	Path *lpath,
705	*rpath,
706	*path;
707	List *lpath_tlist,
708	*rpath_tlist,
709	*tlist_list,
710	*tlist,
711	*groupList,
712	*pathlist;
713	double dLeftGroups,
714	dRightGroups,
715	dNumGroups,
716	dNumOutputRows;
717	bool use_hash;
718	SetOpCmd cmd;
719	int firstFlag;
720
721	/*
722	* Tell children to fetch all tuples.
723	*/
724	root->tuple_fraction = `0.0`;
725
726	/ Recurse on children, ensuring their outputs are marked /
727	lrel = recurse_set_operations(op->larg, root,
728	op->colTypes, op->colCollations,
729	false, `0`,
730	refnames_tlist,
731	&lpath_tlist,
732	&dLeftGroups);
733	lpath = lrel->cheapest_total_path;
734	rrel = recurse_set_operations(op->rarg, root,
735	op->colTypes, op->colCollations,
736	false, `1`,
737	refnames_tlist,
738	&rpath_tlist,
739	&dRightGroups);
740	rpath = rrel->cheapest_total_path;
741
742	/ Undo effects of forcing tuple_fraction to 0 /
743	root->tuple_fraction = save_fraction;
744
745	/*
746	* For EXCEPT, we must put the left input first. For INTERSECT, either
747	* order should give the same results, and we prefer to put the smaller
748	* input first in order to minimize the size of the hash table in the
749	* hashing case. "Smaller" means the one with the fewer groups.
750	*/
751	if (op->op == SETOP_EXCEPT \|\| dLeftGroups <= dRightGroups)
752	{
753	pathlist = list_make2(lpath, rpath);
754	tlist_list = list_make2(lpath_tlist, rpath_tlist);
755	firstFlag = `0`;
756	}
757	else
758	{
759	pathlist = list_make2(rpath, lpath);
760	tlist_list = list_make2(rpath_tlist, lpath_tlist);
761	firstFlag = `1`;
762	}
763
764	/*
765	* Generate tlist for Append plan node.
766	*
767	* The tlist for an Append plan isn't important as far as the Append is
768	* concerned, but we must make it look real anyway for the benefit of the
769	* next plan level up. In fact, it has to be real enough that the flag
770	* column is shown as a variable not a constant, else setrefs.c will get
771	* confused.
772	*/
773	tlist = generate_append_tlist(op->colTypes, op->colCollations, true,
774	tlist_list, refnames_tlist);
775
776	*pTargetList = tlist;
777
778	/ Build result relation. /
779	result_rel = fetch_upper_rel(root, UPPERREL_SETOP,
780	bms_union(lrel->relids, rrel->relids));
781	result_rel->reltarget = create_pathtarget(root, tlist);
782
783	/*
784	* Append the child results together.
785	*/
786	path = (Path *) create_append_path(root, result_rel, pathlist, NIL,
787	NIL, NULL, `0`, false, NIL, -`1`);
788
789	/ Identify the grouping semantics /
790	groupList = generate_setop_grouplist(op, tlist);
791
792	/*
793	* Estimate number of distinct groups that we'll need hashtable entries
794	* for; this is the size of the left-hand input for EXCEPT, or the smaller
795	* input for INTERSECT. Also estimate the number of eventual output rows.
796	* In non-ALL cases, we estimate each group produces one output row; in
797	* ALL cases use the relevant relation size. These are worst-case
798	* estimates, of course, but we need to be conservative.
799	*/
800	if (op->op == SETOP_EXCEPT)
801	{
802	dNumGroups = dLeftGroups;
803	dNumOutputRows = op->all ? lpath->rows : dNumGroups;
804	}
805	else
806	{
807	dNumGroups = Min(dLeftGroups, dRightGroups);
808	dNumOutputRows = op->all ? Min(lpath->rows, rpath->rows) : dNumGroups;
809	}
810
811	/*
812	* Decide whether to hash or sort, and add a sort node if needed.
813	*/
814	use_hash = choose_hashed_setop(root, groupList, path,
815	dNumGroups, dNumOutputRows,
816	(op->op == SETOP_INTERSECT) ? "INTERSECT" : "EXCEPT");
817
818	if (groupList && !use_hash)
819	path = (Path *) create_sort_path(root,
820	result_rel,
821	path,
822	make_pathkeys_for_sortclauses(root,
823	groupList,
824	tlist),
825	-`1.0`);
826
827	/*
828	* Finally, add a SetOp path node to generate the correct output.
829	*/
830	switch (op->op)
831	{
832	case SETOP_INTERSECT:
833	cmd = op->all ? SETOPCMD_INTERSECT_ALL : SETOPCMD_INTERSECT;
834	break;
835	case SETOP_EXCEPT:
836	cmd = op->all ? SETOPCMD_EXCEPT_ALL : SETOPCMD_EXCEPT;
837	break;
838	default:
839	elog(ERROR, "unrecognized set op: %d", (int) op->op);
840	cmd = SETOPCMD_INTERSECT; / keep compiler quiet /
841	break;
842	}
843	path = (Path *) create_setop_path(root,
844	result_rel,
845	path,
846	cmd,
847	use_hash ? SETOP_HASHED : SETOP_SORTED,
848	groupList,
849	list_length(op->colTypes) + `1`,
850	use_hash ? firstFlag : -`1`,
851	dNumGroups,
852	dNumOutputRows);
853
854	result_rel->rows = path->rows;
855	add_path(result_rel, path);
856	return result_rel;
857	}
858
859	/*
860	* Pull up children of a UNION node that are identically-propertied UNIONs.
861	*
862	* NOTE: we can also pull a UNION ALL up into a UNION, since the distinct
863	* output rows will be lost anyway.
864	*
865	* NOTE: currently, we ignore collations while determining if a child has
866	* the same properties. This is semantically sound only so long as all
867	* collations have the same notion of equality. It is valid from an
868	* implementation standpoint because we don't care about the ordering of
869	* a UNION child's result: UNION ALL results are always unordered, and
870	* generate_union_paths will force a fresh sort if the top level is a UNION.
871	*/
872	static List *
873	plan_union_children(PlannerInfo *root,
874	SetOperationStmt *top_union,
875	List *refnames_tlist,
876	List **tlist_list)
877	{
878	List *pending_rels = list_make1(top_union);
879	List *result = NIL;
880	List *child_tlist;
881
882	*tlist_list = NIL;
883
884	while (pending_rels != NIL)
885	{
886	Node *setOp = linitial(pending_rels);
887
888	pending_rels = list_delete_first(pending_rels);
889
890	if (IsA(setOp, SetOperationStmt))
891	{
892	SetOperationStmt op = (SetOperationStmt ) setOp;
893
894	if (op->op == top_union->op &&
895	(op->all == top_union->all \|\| op->all) &&
896	equal(op->colTypes, top_union->colTypes))
897	{
898	/ Same UNION, so fold children into parent /
899	pending_rels = lcons(op->rarg, pending_rels);
900	pending_rels = lcons(op->larg, pending_rels);
901	continue;
902	}
903	}
904
905	/*
906	* Not same, so plan this child separately.
907	*
908	* Note we disallow any resjunk columns in child results. This is
909	* necessary since the Append node that implements the union won't do
910	* any projection, and upper levels will get confused if some of our
911	* output tuples have junk and some don't. This case only arises when
912	* we have an EXCEPT or INTERSECT as child, else there won't be
913	* resjunk anyway.
914	*/
915	result = lappend(result, recurse_set_operations(setOp, root,
916	top_union->colTypes,
917	top_union->colCollations,
918	false, -`1`,
919	refnames_tlist,
920	&child_tlist,
921	NULL));
922	tlist_list = lappend(tlist_list, child_tlist);
923	}
924
925	return result;
926	}
927
928	/*
929	* Add nodes to the given path tree to unique-ify the result of a UNION.
930	*/
931	static Path *
932	make_union_unique(SetOperationStmt op, Path path, List *tlist,
933	PlannerInfo *root)
934	{
935	RelOptInfo *result_rel = fetch_upper_rel(root, UPPERREL_SETOP, NULL);
936	List *groupList;
937	double dNumGroups;
938
939	/ Identify the grouping semantics /
940	groupList = generate_setop_grouplist(op, tlist);
941
942	/*
943	* XXX for the moment, take the number of distinct groups as equal to the
944	* total input size, ie, the worst case. This is too conservative, but we
945	* don't want to risk having the hashtable overrun memory; also, it's not
946	* clear how to get a decent estimate of the true size. One should note
947	* as well the propensity of novices to write UNION rather than UNION ALL
948	* even when they don't expect any duplicates...
949	*/
950	dNumGroups = path->rows;
951
952	/ Decide whether to hash or sort /
953	if (choose_hashed_setop(root, groupList, path,
954	dNumGroups, dNumGroups,
955	"UNION"))
956	{
957	/ Hashed aggregate plan --- no sort needed /
958	path = (Path *) create_agg_path(root,
959	result_rel,
960	path,
961	create_pathtarget(root, tlist),
962	AGG_HASHED,
963	AGGSPLIT_SIMPLE,
964	groupList,
965	NIL,
966	NULL,
967	dNumGroups);
968	}
969	else
970	{
971	/ Sort and Unique /
972	if (groupList)
973	path = (Path *)
974	create_sort_path(root,
975	result_rel,
976	path,
977	make_pathkeys_for_sortclauses(root,
978	groupList,
979	tlist),
980	-`1.0`);
981	path = (Path *) create_upper_unique_path(root,
982	result_rel,
983	path,
984	list_length(path->pathkeys),
985	dNumGroups);
986	}
987
988	return path;
989	}
990
991	/*
992	* postprocess_setop_rel - perform steps required after adding paths
993	*/
994	static void
995	postprocess_setop_rel(PlannerInfo root, RelOptInfo rel)
996	{
997	/*
998	* We don't currently worry about allowing FDWs to contribute paths to
999	* this relation, but give extensions a chance.
1000	*/
1001	if (create_upper_paths_hook)
1002	(*create_upper_paths_hook) (root, UPPERREL_SETOP,
1003	NULL, rel, NULL);
1004
1005	/ Select cheapest path /
1006	set_cheapest(rel);
1007	}
1008
1009	/*
1010	* choose_hashed_setop - should we use hashing for a set operation?
1011	*/
1012	static bool
1013	choose_hashed_setop(PlannerInfo root, List groupClauses,
1014	Path *input_path,
1015	double dNumGroups, double dNumOutputRows,
1016	const char *construct)
1017	{
1018	int numGroupCols = list_length(groupClauses);
1019	bool can_sort;
1020	bool can_hash;
1021	Size hashentrysize;
1022	Path hashed_p;
1023	Path sorted_p;
1024	double tuple_fraction;
1025
1026	/ Check whether the operators support sorting or hashing /
1027	can_sort = grouping_is_sortable(groupClauses);
1028	can_hash = grouping_is_hashable(groupClauses);
1029	if (can_hash && can_sort)
1030	{
1031	/ we have a meaningful choice to make, continue ... /
1032	}
1033	else if (can_hash)
1034	return true;
1035	else if (can_sort)
1036	return false;
1037	else
1038	ereport(ERROR,
1039	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1040	/ translator: %s is UNION, INTERSECT, or EXCEPT /
1041	errmsg("could not implement %s", construct),
1042	errdetail("Some of the datatypes only support hashing, while others only support sorting.")));
1043
1044	/ Prefer sorting when enable_hashagg is off /
1045	if (!enable_hashagg)
1046	return false;
1047
1048	/*
1049	* Don't do it if it doesn't look like the hashtable will fit into
1050	* work_mem.
1051	*/
1052	hashentrysize = MAXALIGN(input_path->pathtarget->width) + MAXALIGN(SizeofMinimalTupleHeader);
1053
1054	if (hashentrysize * dNumGroups > work_mem * `1024L`)
1055	return false;
1056
1057	/*
1058	* See if the estimated cost is no more than doing it the other way.
1059	*
1060	* We need to consider input_plan + hashagg versus input_plan + sort +
1061	* group. Note that the actual result plan might involve a SetOp or
1062	* Unique node, not Agg or Group, but the cost estimates for Agg and Group
1063	* should be close enough for our purposes here.
1064	*
1065	* These path variables are dummies that just hold cost fields; we don't
1066	* make actual Paths for these steps.
1067	*/
1068	cost_agg(&hashed_p, root, AGG_HASHED, NULL,
1069	numGroupCols, dNumGroups,
1070	NIL,
1071	input_path->startup_cost, input_path->total_cost,
1072	input_path->rows);
1073
1074	/*
1075	* Now for the sorted case. Note that the input is always unsorted,
1076	* since it was made by appending unrelated sub-relations together.
1077	*/
1078	sorted_p.startup_cost = input_path->startup_cost;
1079	sorted_p.total_cost = input_path->total_cost;
1080	/ XXX cost_sort doesn't actually look at pathkeys, so just pass NIL /
1081	cost_sort(&sorted_p, root, NIL, sorted_p.total_cost,
1082	input_path->rows, input_path->pathtarget->width,
1083	`0.0`, work_mem, -`1.0`);
1084	cost_group(&sorted_p, root, numGroupCols, dNumGroups,
1085	NIL,
1086	sorted_p.startup_cost, sorted_p.total_cost,
1087	input_path->rows);
1088
1089	/*
1090	* Now make the decision using the top-level tuple fraction. First we
1091	* have to convert an absolute count (LIMIT) into fractional form.
1092	*/
1093	tuple_fraction = root->tuple_fraction;
1094	if (tuple_fraction >= `1.0`)
1095	tuple_fraction /= dNumOutputRows;
1096
1097	if (compare_fractional_path_costs(&hashed_p, &sorted_p,
1098	tuple_fraction) < `0`)
1099	{
1100	/ Hashed is cheaper, so use it /
1101	return true;
1102	}
1103	return false;
1104	}
1105
1106	/*
1107	* Generate targetlist for a set-operation plan node
1108	*
1109	* colTypes: OID list of set-op's result column datatypes
1110	* colCollations: OID list of set-op's result column collations
1111	* flag: -1 if no flag column needed, 0 or 1 to create a const flag column
1112	* varno: varno to use in generated Vars
1113	* hack_constants: true to copy up constants (see comments in code)
1114	* input_tlist: targetlist of this node's input node
1115	* refnames_tlist: targetlist to take column names from
1116	*/
1117	static List *
1118	generate_setop_tlist(List colTypes, List colCollations,
1119	int flag,
1120	Index varno,
1121	bool hack_constants,
1122	List *input_tlist,
1123	List *refnames_tlist)
1124	{
1125	List *tlist = NIL;
1126	int resno = `1`;
1127	ListCell *ctlc,
1128	*cclc,
1129	*itlc,
1130	*rtlc;
1131	TargetEntry *tle;
1132	Node *expr;
1133
1134	forfour(ctlc, colTypes, cclc, colCollations,
1135	itlc, input_tlist, rtlc, refnames_tlist)
1136	{
1137	Oid colType = lfirst_oid(ctlc);
1138	Oid colColl = lfirst_oid(cclc);
1139	TargetEntry inputtle = (TargetEntry ) lfirst(itlc);
1140	TargetEntry reftle = (TargetEntry ) lfirst(rtlc);
1141
1142	Assert(inputtle->resno == resno);
1143	Assert(reftle->resno == resno);
1144	Assert(!inputtle->resjunk);
1145	Assert(!reftle->resjunk);
1146
1147	/*
1148	* Generate columns referencing input columns and having appropriate
1149	* data types and column names. Insert datatype coercions where
1150	* necessary.
1151	*
1152	* HACK: constants in the input's targetlist are copied up as-is
1153	* rather than being referenced as subquery outputs. This is mainly
1154	* to ensure that when we try to coerce them to the output column's
1155	* datatype, the right things happen for UNKNOWN constants. But do
1156	* this only at the first level of subquery-scan plans; we don't want
1157	* phony constants appearing in the output tlists of upper-level
1158	* nodes!
1159	*/
1160	if (hack_constants && inputtle->expr && IsA(inputtle->expr, Const))
1161	expr = (Node *) inputtle->expr;
1162	else
1163	expr = (Node *) makeVar(varno,
1164	inputtle->resno,
1165	exprType((Node *) inputtle->expr),
1166	exprTypmod((Node *) inputtle->expr),
1167	exprCollation((Node *) inputtle->expr),
1168	`0`);
1169
1170	if (exprType(expr) != colType)
1171	{
1172	/*
1173	* Note: it's not really cool to be applying coerce_to_common_type
1174	* here; one notable point is that assign_expr_collations never
1175	* gets run on any generated nodes. For the moment that's not a
1176	* problem because we force the correct exposed collation below.
1177	* It would likely be best to make the parser generate the correct
1178	* output tlist for every set-op to begin with, though.
1179	*/
1180	expr = coerce_to_common_type(NULL, / no UNKNOWNs here /
1181	expr,
1182	colType,
1183	"UNION/INTERSECT/EXCEPT");
1184	}
1185
1186	/*
1187	* Ensure the tlist entry's exposed collation matches the set-op. This
1188	* is necessary because plan_set_operations() reports the result
1189	* ordering as a list of SortGroupClauses, which don't carry collation
1190	* themselves but just refer to tlist entries. If we don't show the
1191	* right collation then planner.c might do the wrong thing in
1192	* higher-level queries.
1193	*
1194	* Note we use RelabelType, not CollateExpr, since this expression
1195	* will reach the executor without any further processing.
1196	*/
1197	if (exprCollation(expr) != colColl)
1198	{
1199	expr = (Node ) makeRelabelType((Expr ) expr,
1200	exprType(expr),
1201	exprTypmod(expr),
1202	colColl,
1203	COERCE_IMPLICIT_CAST);
1204	}
1205
1206	tle = makeTargetEntry((Expr *) expr,
1207	(AttrNumber) resno++,
1208	pstrdup(reftle->resname),
1209	false);
1210
1211	/*
1212	* By convention, all non-resjunk columns in a setop tree have
1213	* ressortgroupref equal to their resno. In some cases the ref isn't
1214	* needed, but this is a cleaner way than modifying the tlist later.
1215	*/
1216	tle->ressortgroupref = tle->resno;
1217
1218	tlist = lappend(tlist, tle);
1219	}
1220
1221	if (flag >= `0`)
1222	{
1223	/ Add a resjunk flag column /
1224	/ flag value is the given constant /
1225	expr = (Node *) makeConst(INT4OID,
1226	-`1`,
1227	InvalidOid,
1228	sizeof(int32),
1229	Int32GetDatum(flag),
1230	false,
1231	true);
1232	tle = makeTargetEntry((Expr *) expr,
1233	(AttrNumber) resno++,
1234	pstrdup("flag"),
1235	true);
1236	tlist = lappend(tlist, tle);
1237	}
1238
1239	return tlist;
1240	}
1241
1242	/*
1243	* Generate targetlist for a set-operation Append node
1244	*
1245	* colTypes: OID list of set-op's result column datatypes
1246	* colCollations: OID list of set-op's result column collations
1247	* flag: true to create a flag column copied up from subplans
1248	* input_tlists: list of tlists for sub-plans of the Append
1249	* refnames_tlist: targetlist to take column names from
1250	*
1251	* The entries in the Append's targetlist should always be simple Vars;
1252	* we just have to make sure they have the right datatypes/typmods/collations.
1253	* The Vars are always generated with varno 0.
1254	*
1255	* XXX a problem with the varno-zero approach is that set_pathtarget_cost_width
1256	* cannot figure out a realistic width for the tlist we make here. But we
1257	* ought to refactor this code to produce a PathTarget directly, anyway.
1258	*/
1259	static List *
1260	generate_append_tlist(List colTypes, List colCollations,
1261	bool flag,
1262	List *input_tlists,
1263	List *refnames_tlist)
1264	{
1265	List *tlist = NIL;
1266	int resno = `1`;
1267	ListCell *curColType;
1268	ListCell *curColCollation;
1269	ListCell *ref_tl_item;
1270	int colindex;
1271	TargetEntry *tle;
1272	Node *expr;
1273	ListCell *tlistl;
1274	int32 *colTypmods;
1275
1276	/*
1277	* First extract typmods to use.
1278	*
1279	* If the inputs all agree on type and typmod of a particular column, use
1280	* that typmod; else use -1.
1281	*/
1282	colTypmods = (int32 ) palloc(list_length(colTypes) sizeof(int32));
1283
1284	foreach(tlistl, input_tlists)
1285	{
1286	List subtlist = (List ) lfirst(tlistl);
1287	ListCell *subtlistl;
1288
1289	curColType = list_head(colTypes);
1290	colindex = `0`;
1291	foreach(subtlistl, subtlist)
1292	{
1293	TargetEntry subtle = (TargetEntry ) lfirst(subtlistl);
1294
1295	if (subtle->resjunk)
1296	continue;
1297	Assert(curColType != NULL);
1298	if (exprType((Node *) subtle->expr) == lfirst_oid(curColType))
1299	{
1300	/ If first subplan, copy the typmod; else compare /
1301	int32 subtypmod = exprTypmod((Node *) subtle->expr);
1302
1303	if (tlistl == list_head(input_tlists))
1304	colTypmods[colindex] = subtypmod;
1305	else if (subtypmod != colTypmods[colindex])
1306	colTypmods[colindex] = -`1`;
1307	}
1308	else
1309	{
1310	/ types disagree, so force typmod to -1 /
1311	colTypmods[colindex] = -`1`;
1312	}
1313	curColType = lnext(curColType);
1314	colindex++;
1315	}
1316	Assert(curColType == NULL);
1317	}
1318
1319	/*
1320	* Now we can build the tlist for the Append.
1321	*/
1322	colindex = `0`;
1323	forthree(curColType, colTypes, curColCollation, colCollations,
1324	ref_tl_item, refnames_tlist)
1325	{
1326	Oid colType = lfirst_oid(curColType);
1327	int32 colTypmod = colTypmods[colindex++];
1328	Oid colColl = lfirst_oid(curColCollation);
1329	TargetEntry reftle = (TargetEntry ) lfirst(ref_tl_item);
1330
1331	Assert(reftle->resno == resno);
1332	Assert(!reftle->resjunk);
1333	expr = (Node *) makeVar(`0`,
1334	resno,
1335	colType,
1336	colTypmod,
1337	colColl,
1338	`0`);
1339	tle = makeTargetEntry((Expr *) expr,
1340	(AttrNumber) resno++,
1341	pstrdup(reftle->resname),
1342	false);
1343
1344	/*
1345	* By convention, all non-resjunk columns in a setop tree have
1346	* ressortgroupref equal to their resno. In some cases the ref isn't
1347	* needed, but this is a cleaner way than modifying the tlist later.
1348	*/
1349	tle->ressortgroupref = tle->resno;
1350
1351	tlist = lappend(tlist, tle);
1352	}
1353
1354	if (flag)
1355	{
1356	/ Add a resjunk flag column /
1357	/ flag value is shown as copied up from subplan /
1358	expr = (Node *) makeVar(`0`,
1359	resno,
1360	INT4OID,
1361	-`1`,
1362	InvalidOid,
1363	`0`);
1364	tle = makeTargetEntry((Expr *) expr,
1365	(AttrNumber) resno++,
1366	pstrdup("flag"),
1367	true);
1368	tlist = lappend(tlist, tle);
1369	}
1370
1371	pfree(colTypmods);
1372
1373	return tlist;
1374	}
1375
1376	/*
1377	* generate_setop_grouplist
1378	* Build a SortGroupClause list defining the sort/grouping properties
1379	* of the setop's output columns.
1380	*
1381	* Parse analysis already determined the properties and built a suitable
1382	* list, except that the entries do not have sortgrouprefs set because
1383	* the parser output representation doesn't include a tlist for each
1384	* setop. So what we need to do here is copy that list and install
1385	* proper sortgrouprefs into it (copying those from the targetlist).
1386	*/
1387	static List *
1388	generate_setop_grouplist(SetOperationStmt op, List targetlist)
1389	{
1390	List *grouplist = copyObject(op->groupClauses);
1391	ListCell *lg;
1392	ListCell *lt;
1393
1394	lg = list_head(grouplist);
1395	foreach(lt, targetlist)
1396	{
1397	TargetEntry tle = (TargetEntry ) lfirst(lt);
1398	SortGroupClause *sgc;
1399
1400	if (tle->resjunk)
1401	{
1402	/ resjunk columns should not have sortgrouprefs /
1403	Assert(tle->ressortgroupref == `0`);
1404	continue; / ignore resjunk columns /
1405	}
1406
1407	/ non-resjunk columns should have sortgroupref = resno /
1408	Assert(tle->ressortgroupref == tle->resno);
1409
1410	/ non-resjunk columns should have grouping clauses /
1411	Assert(lg != NULL);
1412	sgc = (SortGroupClause *) lfirst(lg);
1413	lg = lnext(lg);
1414	Assert(sgc->tleSortGroupRef == `0`);
1415
1416	sgc->tleSortGroupRef = tle->ressortgroupref;
1417	}
1418	Assert(lg == NULL);
1419	return grouplist;
1420	}
1421

Browse the source code of PostgreSQL/src/backend/optimizer/prep/prepunion.c