1 | /*------------------------------------------------------------------------ |
2 | * |
3 | * geqo_eval.c |
4 | * Routines to evaluate query trees |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * src/backend/optimizer/geqo/geqo_eval.c |
10 | * |
11 | *------------------------------------------------------------------------- |
12 | */ |
13 | |
14 | /* contributed by: |
15 | =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= |
16 | * Martin Utesch * Institute of Automatic Control * |
17 | = = University of Mining and Technology = |
18 | * utesch@aut.tu-freiberg.de * Freiberg, Germany * |
19 | =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= |
20 | */ |
21 | |
22 | #include "postgres.h" |
23 | |
24 | #include <float.h> |
25 | #include <limits.h> |
26 | #include <math.h> |
27 | |
28 | #include "optimizer/geqo.h" |
29 | #include "optimizer/joininfo.h" |
30 | #include "optimizer/pathnode.h" |
31 | #include "optimizer/paths.h" |
32 | #include "utils/memutils.h" |
33 | |
34 | |
35 | /* A "clump" of already-joined relations within gimme_tree */ |
36 | typedef struct |
37 | { |
38 | RelOptInfo *joinrel; /* joinrel for the set of relations */ |
39 | int size; /* number of input relations in clump */ |
40 | } Clump; |
41 | |
42 | static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, |
43 | int num_gene, bool force); |
44 | static bool desirable_join(PlannerInfo *root, |
45 | RelOptInfo *outer_rel, RelOptInfo *inner_rel); |
46 | |
47 | |
48 | /* |
49 | * geqo_eval |
50 | * |
51 | * Returns cost of a query tree as an individual of the population. |
52 | * |
53 | * If no legal join order can be extracted from the proposed tour, |
54 | * returns DBL_MAX. |
55 | */ |
56 | Cost |
57 | geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) |
58 | { |
59 | MemoryContext mycontext; |
60 | MemoryContext oldcxt; |
61 | RelOptInfo *joinrel; |
62 | Cost fitness; |
63 | int savelength; |
64 | struct HTAB *savehash; |
65 | |
66 | /* |
67 | * Create a private memory context that will hold all temp storage |
68 | * allocated inside gimme_tree(). |
69 | * |
70 | * Since geqo_eval() will be called many times, we can't afford to let all |
71 | * that memory go unreclaimed until end of statement. Note we make the |
72 | * temp context a child of the planner's normal context, so that it will |
73 | * be freed even if we abort via ereport(ERROR). |
74 | */ |
75 | mycontext = AllocSetContextCreate(CurrentMemoryContext, |
76 | "GEQO" , |
77 | ALLOCSET_DEFAULT_SIZES); |
78 | oldcxt = MemoryContextSwitchTo(mycontext); |
79 | |
80 | /* |
81 | * gimme_tree will add entries to root->join_rel_list, which may or may |
82 | * not already contain some entries. The newly added entries will be |
83 | * recycled by the MemoryContextDelete below, so we must ensure that the |
84 | * list is restored to its former state before exiting. We can do this by |
85 | * truncating the list to its original length. NOTE this assumes that any |
86 | * added entries are appended at the end! |
87 | * |
88 | * We also must take care not to mess up the outer join_rel_hash, if there |
89 | * is one. We can do this by just temporarily setting the link to NULL. |
90 | * (If we are dealing with enough join rels, which we very likely are, a |
91 | * new hash table will get built and used locally.) |
92 | * |
93 | * join_rel_level[] shouldn't be in use, so just Assert it isn't. |
94 | */ |
95 | savelength = list_length(root->join_rel_list); |
96 | savehash = root->join_rel_hash; |
97 | Assert(root->join_rel_level == NULL); |
98 | |
99 | root->join_rel_hash = NULL; |
100 | |
101 | /* construct the best path for the given combination of relations */ |
102 | joinrel = gimme_tree(root, tour, num_gene); |
103 | |
104 | /* |
105 | * compute fitness, if we found a valid join |
106 | * |
107 | * XXX geqo does not currently support optimization for partial result |
108 | * retrieval, nor do we take any cognizance of possible use of |
109 | * parameterized paths --- how to fix? |
110 | */ |
111 | if (joinrel) |
112 | { |
113 | Path *best_path = joinrel->cheapest_total_path; |
114 | |
115 | fitness = best_path->total_cost; |
116 | } |
117 | else |
118 | fitness = DBL_MAX; |
119 | |
120 | /* |
121 | * Restore join_rel_list to its former state, and put back original |
122 | * hashtable if any. |
123 | */ |
124 | root->join_rel_list = list_truncate(root->join_rel_list, |
125 | savelength); |
126 | root->join_rel_hash = savehash; |
127 | |
128 | /* release all the memory acquired within gimme_tree */ |
129 | MemoryContextSwitchTo(oldcxt); |
130 | MemoryContextDelete(mycontext); |
131 | |
132 | return fitness; |
133 | } |
134 | |
135 | /* |
136 | * gimme_tree |
137 | * Form planner estimates for a join tree constructed in the specified |
138 | * order. |
139 | * |
140 | * 'tour' is the proposed join order, of length 'num_gene' |
141 | * |
142 | * Returns a new join relation whose cheapest path is the best plan for |
143 | * this join order. NB: will return NULL if join order is invalid and |
144 | * we can't modify it into a valid order. |
145 | * |
146 | * The original implementation of this routine always joined in the specified |
147 | * order, and so could only build left-sided plans (and right-sided and |
148 | * mixtures, as a byproduct of the fact that make_join_rel() is symmetric). |
149 | * It could never produce a "bushy" plan. This had a couple of big problems, |
150 | * of which the worst was that there are situations involving join order |
151 | * restrictions where the only valid plans are bushy. |
152 | * |
153 | * The present implementation takes the given tour as a guideline, but |
154 | * postpones joins that are illegal or seem unsuitable according to some |
155 | * heuristic rules. This allows correct bushy plans to be generated at need, |
156 | * and as a nice side-effect it seems to materially improve the quality of the |
157 | * generated plans. Note however that since it's just a heuristic, it can |
158 | * still fail in some cases. (In particular, we might clump together |
159 | * relations that actually mustn't be joined yet due to LATERAL restrictions; |
160 | * since there's no provision for un-clumping, this must lead to failure.) |
161 | */ |
162 | RelOptInfo * |
163 | gimme_tree(PlannerInfo *root, Gene *tour, int num_gene) |
164 | { |
165 | GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private; |
166 | List *clumps; |
167 | int rel_count; |
168 | |
169 | /* |
170 | * Sometimes, a relation can't yet be joined to others due to heuristics |
171 | * or actual semantic restrictions. We maintain a list of "clumps" of |
172 | * successfully joined relations, with larger clumps at the front. Each |
173 | * new relation from the tour is added to the first clump it can be joined |
174 | * to; if there is none then it becomes a new clump of its own. When we |
175 | * enlarge an existing clump we check to see if it can now be merged with |
176 | * any other clumps. After the tour is all scanned, we forget about the |
177 | * heuristics and try to forcibly join any remaining clumps. If we are |
178 | * unable to merge all the clumps into one, fail. |
179 | */ |
180 | clumps = NIL; |
181 | |
182 | for (rel_count = 0; rel_count < num_gene; rel_count++) |
183 | { |
184 | int cur_rel_index; |
185 | RelOptInfo *cur_rel; |
186 | Clump *cur_clump; |
187 | |
188 | /* Get the next input relation */ |
189 | cur_rel_index = (int) tour[rel_count]; |
190 | cur_rel = (RelOptInfo *) list_nth(private->initial_rels, |
191 | cur_rel_index - 1); |
192 | |
193 | /* Make it into a single-rel clump */ |
194 | cur_clump = (Clump *) palloc(sizeof(Clump)); |
195 | cur_clump->joinrel = cur_rel; |
196 | cur_clump->size = 1; |
197 | |
198 | /* Merge it into the clumps list, using only desirable joins */ |
199 | clumps = merge_clump(root, clumps, cur_clump, num_gene, false); |
200 | } |
201 | |
202 | if (list_length(clumps) > 1) |
203 | { |
204 | /* Force-join the remaining clumps in some legal order */ |
205 | List *fclumps; |
206 | ListCell *lc; |
207 | |
208 | fclumps = NIL; |
209 | foreach(lc, clumps) |
210 | { |
211 | Clump *clump = (Clump *) lfirst(lc); |
212 | |
213 | fclumps = merge_clump(root, fclumps, clump, num_gene, true); |
214 | } |
215 | clumps = fclumps; |
216 | } |
217 | |
218 | /* Did we succeed in forming a single join relation? */ |
219 | if (list_length(clumps) != 1) |
220 | return NULL; |
221 | |
222 | return ((Clump *) linitial(clumps))->joinrel; |
223 | } |
224 | |
225 | /* |
226 | * Merge a "clump" into the list of existing clumps for gimme_tree. |
227 | * |
228 | * We try to merge the clump into some existing clump, and repeat if |
229 | * successful. When no more merging is possible, insert the clump |
230 | * into the list, preserving the list ordering rule (namely, that |
231 | * clumps of larger size appear earlier). |
232 | * |
233 | * If force is true, merge anywhere a join is legal, even if it causes |
234 | * a cartesian join to be performed. When force is false, do only |
235 | * "desirable" joins. |
236 | */ |
237 | static List * |
238 | merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene, |
239 | bool force) |
240 | { |
241 | ListCell *prev; |
242 | ListCell *lc; |
243 | |
244 | /* Look for a clump that new_clump can join to */ |
245 | prev = NULL; |
246 | foreach(lc, clumps) |
247 | { |
248 | Clump *old_clump = (Clump *) lfirst(lc); |
249 | |
250 | if (force || |
251 | desirable_join(root, old_clump->joinrel, new_clump->joinrel)) |
252 | { |
253 | RelOptInfo *joinrel; |
254 | |
255 | /* |
256 | * Construct a RelOptInfo representing the join of these two input |
257 | * relations. Note that we expect the joinrel not to exist in |
258 | * root->join_rel_list yet, and so the paths constructed for it |
259 | * will only include the ones we want. |
260 | */ |
261 | joinrel = make_join_rel(root, |
262 | old_clump->joinrel, |
263 | new_clump->joinrel); |
264 | |
265 | /* Keep searching if join order is not valid */ |
266 | if (joinrel) |
267 | { |
268 | /* Create paths for partitionwise joins. */ |
269 | generate_partitionwise_join_paths(root, joinrel); |
270 | |
271 | /* |
272 | * Except for the topmost scan/join rel, consider gathering |
273 | * partial paths. We'll do the same for the topmost scan/join |
274 | * rel once we know the final targetlist (see |
275 | * grouping_planner). |
276 | */ |
277 | if (old_clump->size + new_clump->size < num_gene) |
278 | generate_gather_paths(root, joinrel, false); |
279 | |
280 | /* Find and save the cheapest paths for this joinrel */ |
281 | set_cheapest(joinrel); |
282 | |
283 | /* Absorb new clump into old */ |
284 | old_clump->joinrel = joinrel; |
285 | old_clump->size += new_clump->size; |
286 | pfree(new_clump); |
287 | |
288 | /* Remove old_clump from list */ |
289 | clumps = list_delete_cell(clumps, lc, prev); |
290 | |
291 | /* |
292 | * Recursively try to merge the enlarged old_clump with |
293 | * others. When no further merge is possible, we'll reinsert |
294 | * it into the list. |
295 | */ |
296 | return merge_clump(root, clumps, old_clump, num_gene, force); |
297 | } |
298 | } |
299 | prev = lc; |
300 | } |
301 | |
302 | /* |
303 | * No merging is possible, so add new_clump as an independent clump, in |
304 | * proper order according to size. We can be fast for the common case |
305 | * where it has size 1 --- it should always go at the end. |
306 | */ |
307 | if (clumps == NIL || new_clump->size == 1) |
308 | return lappend(clumps, new_clump); |
309 | |
310 | /* Check if it belongs at the front */ |
311 | lc = list_head(clumps); |
312 | if (new_clump->size > ((Clump *) lfirst(lc))->size) |
313 | return lcons(new_clump, clumps); |
314 | |
315 | /* Else search for the place to insert it */ |
316 | for (;;) |
317 | { |
318 | ListCell *nxt = lnext(lc); |
319 | |
320 | if (nxt == NULL || new_clump->size > ((Clump *) lfirst(nxt))->size) |
321 | break; /* it belongs after 'lc', before 'nxt' */ |
322 | lc = nxt; |
323 | } |
324 | lappend_cell(clumps, lc, new_clump); |
325 | |
326 | return clumps; |
327 | } |
328 | |
329 | /* |
330 | * Heuristics for gimme_tree: do we want to join these two relations? |
331 | */ |
332 | static bool |
333 | desirable_join(PlannerInfo *root, |
334 | RelOptInfo *outer_rel, RelOptInfo *inner_rel) |
335 | { |
336 | /* |
337 | * Join if there is an applicable join clause, or if there is a join order |
338 | * restriction forcing these rels to be joined. |
339 | */ |
340 | if (have_relevant_joinclause(root, outer_rel, inner_rel) || |
341 | have_join_order_restriction(root, outer_rel, inner_rel)) |
342 | return true; |
343 | |
344 | /* Otherwise postpone the join till later. */ |
345 | return false; |
346 | } |
347 | |