| 1 | /*------------------------------------------------------------------------ |
| 2 | * |
| 3 | * geqo_eval.c |
| 4 | * Routines to evaluate query trees |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * Portions Copyright (c) 1994, Regents of the University of California |
| 8 | * |
| 9 | * src/backend/optimizer/geqo/geqo_eval.c |
| 10 | * |
| 11 | *------------------------------------------------------------------------- |
| 12 | */ |
| 13 | |
| 14 | /* contributed by: |
| 15 | =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= |
| 16 | * Martin Utesch * Institute of Automatic Control * |
| 17 | = = University of Mining and Technology = |
| 18 | * utesch@aut.tu-freiberg.de * Freiberg, Germany * |
| 19 | =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= |
| 20 | */ |
| 21 | |
| 22 | #include "postgres.h" |
| 23 | |
| 24 | #include <float.h> |
| 25 | #include <limits.h> |
| 26 | #include <math.h> |
| 27 | |
| 28 | #include "optimizer/geqo.h" |
| 29 | #include "optimizer/joininfo.h" |
| 30 | #include "optimizer/pathnode.h" |
| 31 | #include "optimizer/paths.h" |
| 32 | #include "utils/memutils.h" |
| 33 | |
| 34 | |
| 35 | /* A "clump" of already-joined relations within gimme_tree */ |
| 36 | typedef struct |
| 37 | { |
| 38 | RelOptInfo *joinrel; /* joinrel for the set of relations */ |
| 39 | int size; /* number of input relations in clump */ |
| 40 | } Clump; |
| 41 | |
| 42 | static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, |
| 43 | int num_gene, bool force); |
| 44 | static bool desirable_join(PlannerInfo *root, |
| 45 | RelOptInfo *outer_rel, RelOptInfo *inner_rel); |
| 46 | |
| 47 | |
| 48 | /* |
| 49 | * geqo_eval |
| 50 | * |
| 51 | * Returns cost of a query tree as an individual of the population. |
| 52 | * |
| 53 | * If no legal join order can be extracted from the proposed tour, |
| 54 | * returns DBL_MAX. |
| 55 | */ |
| 56 | Cost |
| 57 | geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) |
| 58 | { |
| 59 | MemoryContext mycontext; |
| 60 | MemoryContext oldcxt; |
| 61 | RelOptInfo *joinrel; |
| 62 | Cost fitness; |
| 63 | int savelength; |
| 64 | struct HTAB *savehash; |
| 65 | |
| 66 | /* |
| 67 | * Create a private memory context that will hold all temp storage |
| 68 | * allocated inside gimme_tree(). |
| 69 | * |
| 70 | * Since geqo_eval() will be called many times, we can't afford to let all |
| 71 | * that memory go unreclaimed until end of statement. Note we make the |
| 72 | * temp context a child of the planner's normal context, so that it will |
| 73 | * be freed even if we abort via ereport(ERROR). |
| 74 | */ |
| 75 | mycontext = AllocSetContextCreate(CurrentMemoryContext, |
| 76 | "GEQO" , |
| 77 | ALLOCSET_DEFAULT_SIZES); |
| 78 | oldcxt = MemoryContextSwitchTo(mycontext); |
| 79 | |
| 80 | /* |
| 81 | * gimme_tree will add entries to root->join_rel_list, which may or may |
| 82 | * not already contain some entries. The newly added entries will be |
| 83 | * recycled by the MemoryContextDelete below, so we must ensure that the |
| 84 | * list is restored to its former state before exiting. We can do this by |
| 85 | * truncating the list to its original length. NOTE this assumes that any |
| 86 | * added entries are appended at the end! |
| 87 | * |
| 88 | * We also must take care not to mess up the outer join_rel_hash, if there |
| 89 | * is one. We can do this by just temporarily setting the link to NULL. |
| 90 | * (If we are dealing with enough join rels, which we very likely are, a |
| 91 | * new hash table will get built and used locally.) |
| 92 | * |
| 93 | * join_rel_level[] shouldn't be in use, so just Assert it isn't. |
| 94 | */ |
| 95 | savelength = list_length(root->join_rel_list); |
| 96 | savehash = root->join_rel_hash; |
| 97 | Assert(root->join_rel_level == NULL); |
| 98 | |
| 99 | root->join_rel_hash = NULL; |
| 100 | |
| 101 | /* construct the best path for the given combination of relations */ |
| 102 | joinrel = gimme_tree(root, tour, num_gene); |
| 103 | |
| 104 | /* |
| 105 | * compute fitness, if we found a valid join |
| 106 | * |
| 107 | * XXX geqo does not currently support optimization for partial result |
| 108 | * retrieval, nor do we take any cognizance of possible use of |
| 109 | * parameterized paths --- how to fix? |
| 110 | */ |
| 111 | if (joinrel) |
| 112 | { |
| 113 | Path *best_path = joinrel->cheapest_total_path; |
| 114 | |
| 115 | fitness = best_path->total_cost; |
| 116 | } |
| 117 | else |
| 118 | fitness = DBL_MAX; |
| 119 | |
| 120 | /* |
| 121 | * Restore join_rel_list to its former state, and put back original |
| 122 | * hashtable if any. |
| 123 | */ |
| 124 | root->join_rel_list = list_truncate(root->join_rel_list, |
| 125 | savelength); |
| 126 | root->join_rel_hash = savehash; |
| 127 | |
| 128 | /* release all the memory acquired within gimme_tree */ |
| 129 | MemoryContextSwitchTo(oldcxt); |
| 130 | MemoryContextDelete(mycontext); |
| 131 | |
| 132 | return fitness; |
| 133 | } |
| 134 | |
| 135 | /* |
| 136 | * gimme_tree |
| 137 | * Form planner estimates for a join tree constructed in the specified |
| 138 | * order. |
| 139 | * |
| 140 | * 'tour' is the proposed join order, of length 'num_gene' |
| 141 | * |
| 142 | * Returns a new join relation whose cheapest path is the best plan for |
| 143 | * this join order. NB: will return NULL if join order is invalid and |
| 144 | * we can't modify it into a valid order. |
| 145 | * |
| 146 | * The original implementation of this routine always joined in the specified |
| 147 | * order, and so could only build left-sided plans (and right-sided and |
| 148 | * mixtures, as a byproduct of the fact that make_join_rel() is symmetric). |
| 149 | * It could never produce a "bushy" plan. This had a couple of big problems, |
| 150 | * of which the worst was that there are situations involving join order |
| 151 | * restrictions where the only valid plans are bushy. |
| 152 | * |
| 153 | * The present implementation takes the given tour as a guideline, but |
| 154 | * postpones joins that are illegal or seem unsuitable according to some |
| 155 | * heuristic rules. This allows correct bushy plans to be generated at need, |
| 156 | * and as a nice side-effect it seems to materially improve the quality of the |
| 157 | * generated plans. Note however that since it's just a heuristic, it can |
| 158 | * still fail in some cases. (In particular, we might clump together |
| 159 | * relations that actually mustn't be joined yet due to LATERAL restrictions; |
| 160 | * since there's no provision for un-clumping, this must lead to failure.) |
| 161 | */ |
| 162 | RelOptInfo * |
| 163 | gimme_tree(PlannerInfo *root, Gene *tour, int num_gene) |
| 164 | { |
| 165 | GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private; |
| 166 | List *clumps; |
| 167 | int rel_count; |
| 168 | |
| 169 | /* |
| 170 | * Sometimes, a relation can't yet be joined to others due to heuristics |
| 171 | * or actual semantic restrictions. We maintain a list of "clumps" of |
| 172 | * successfully joined relations, with larger clumps at the front. Each |
| 173 | * new relation from the tour is added to the first clump it can be joined |
| 174 | * to; if there is none then it becomes a new clump of its own. When we |
| 175 | * enlarge an existing clump we check to see if it can now be merged with |
| 176 | * any other clumps. After the tour is all scanned, we forget about the |
| 177 | * heuristics and try to forcibly join any remaining clumps. If we are |
| 178 | * unable to merge all the clumps into one, fail. |
| 179 | */ |
| 180 | clumps = NIL; |
| 181 | |
| 182 | for (rel_count = 0; rel_count < num_gene; rel_count++) |
| 183 | { |
| 184 | int cur_rel_index; |
| 185 | RelOptInfo *cur_rel; |
| 186 | Clump *cur_clump; |
| 187 | |
| 188 | /* Get the next input relation */ |
| 189 | cur_rel_index = (int) tour[rel_count]; |
| 190 | cur_rel = (RelOptInfo *) list_nth(private->initial_rels, |
| 191 | cur_rel_index - 1); |
| 192 | |
| 193 | /* Make it into a single-rel clump */ |
| 194 | cur_clump = (Clump *) palloc(sizeof(Clump)); |
| 195 | cur_clump->joinrel = cur_rel; |
| 196 | cur_clump->size = 1; |
| 197 | |
| 198 | /* Merge it into the clumps list, using only desirable joins */ |
| 199 | clumps = merge_clump(root, clumps, cur_clump, num_gene, false); |
| 200 | } |
| 201 | |
| 202 | if (list_length(clumps) > 1) |
| 203 | { |
| 204 | /* Force-join the remaining clumps in some legal order */ |
| 205 | List *fclumps; |
| 206 | ListCell *lc; |
| 207 | |
| 208 | fclumps = NIL; |
| 209 | foreach(lc, clumps) |
| 210 | { |
| 211 | Clump *clump = (Clump *) lfirst(lc); |
| 212 | |
| 213 | fclumps = merge_clump(root, fclumps, clump, num_gene, true); |
| 214 | } |
| 215 | clumps = fclumps; |
| 216 | } |
| 217 | |
| 218 | /* Did we succeed in forming a single join relation? */ |
| 219 | if (list_length(clumps) != 1) |
| 220 | return NULL; |
| 221 | |
| 222 | return ((Clump *) linitial(clumps))->joinrel; |
| 223 | } |
| 224 | |
| 225 | /* |
| 226 | * Merge a "clump" into the list of existing clumps for gimme_tree. |
| 227 | * |
| 228 | * We try to merge the clump into some existing clump, and repeat if |
| 229 | * successful. When no more merging is possible, insert the clump |
| 230 | * into the list, preserving the list ordering rule (namely, that |
| 231 | * clumps of larger size appear earlier). |
| 232 | * |
| 233 | * If force is true, merge anywhere a join is legal, even if it causes |
| 234 | * a cartesian join to be performed. When force is false, do only |
| 235 | * "desirable" joins. |
| 236 | */ |
| 237 | static List * |
| 238 | merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene, |
| 239 | bool force) |
| 240 | { |
| 241 | ListCell *prev; |
| 242 | ListCell *lc; |
| 243 | |
| 244 | /* Look for a clump that new_clump can join to */ |
| 245 | prev = NULL; |
| 246 | foreach(lc, clumps) |
| 247 | { |
| 248 | Clump *old_clump = (Clump *) lfirst(lc); |
| 249 | |
| 250 | if (force || |
| 251 | desirable_join(root, old_clump->joinrel, new_clump->joinrel)) |
| 252 | { |
| 253 | RelOptInfo *joinrel; |
| 254 | |
| 255 | /* |
| 256 | * Construct a RelOptInfo representing the join of these two input |
| 257 | * relations. Note that we expect the joinrel not to exist in |
| 258 | * root->join_rel_list yet, and so the paths constructed for it |
| 259 | * will only include the ones we want. |
| 260 | */ |
| 261 | joinrel = make_join_rel(root, |
| 262 | old_clump->joinrel, |
| 263 | new_clump->joinrel); |
| 264 | |
| 265 | /* Keep searching if join order is not valid */ |
| 266 | if (joinrel) |
| 267 | { |
| 268 | /* Create paths for partitionwise joins. */ |
| 269 | generate_partitionwise_join_paths(root, joinrel); |
| 270 | |
| 271 | /* |
| 272 | * Except for the topmost scan/join rel, consider gathering |
| 273 | * partial paths. We'll do the same for the topmost scan/join |
| 274 | * rel once we know the final targetlist (see |
| 275 | * grouping_planner). |
| 276 | */ |
| 277 | if (old_clump->size + new_clump->size < num_gene) |
| 278 | generate_gather_paths(root, joinrel, false); |
| 279 | |
| 280 | /* Find and save the cheapest paths for this joinrel */ |
| 281 | set_cheapest(joinrel); |
| 282 | |
| 283 | /* Absorb new clump into old */ |
| 284 | old_clump->joinrel = joinrel; |
| 285 | old_clump->size += new_clump->size; |
| 286 | pfree(new_clump); |
| 287 | |
| 288 | /* Remove old_clump from list */ |
| 289 | clumps = list_delete_cell(clumps, lc, prev); |
| 290 | |
| 291 | /* |
| 292 | * Recursively try to merge the enlarged old_clump with |
| 293 | * others. When no further merge is possible, we'll reinsert |
| 294 | * it into the list. |
| 295 | */ |
| 296 | return merge_clump(root, clumps, old_clump, num_gene, force); |
| 297 | } |
| 298 | } |
| 299 | prev = lc; |
| 300 | } |
| 301 | |
| 302 | /* |
| 303 | * No merging is possible, so add new_clump as an independent clump, in |
| 304 | * proper order according to size. We can be fast for the common case |
| 305 | * where it has size 1 --- it should always go at the end. |
| 306 | */ |
| 307 | if (clumps == NIL || new_clump->size == 1) |
| 308 | return lappend(clumps, new_clump); |
| 309 | |
| 310 | /* Check if it belongs at the front */ |
| 311 | lc = list_head(clumps); |
| 312 | if (new_clump->size > ((Clump *) lfirst(lc))->size) |
| 313 | return lcons(new_clump, clumps); |
| 314 | |
| 315 | /* Else search for the place to insert it */ |
| 316 | for (;;) |
| 317 | { |
| 318 | ListCell *nxt = lnext(lc); |
| 319 | |
| 320 | if (nxt == NULL || new_clump->size > ((Clump *) lfirst(nxt))->size) |
| 321 | break; /* it belongs after 'lc', before 'nxt' */ |
| 322 | lc = nxt; |
| 323 | } |
| 324 | lappend_cell(clumps, lc, new_clump); |
| 325 | |
| 326 | return clumps; |
| 327 | } |
| 328 | |
| 329 | /* |
| 330 | * Heuristics for gimme_tree: do we want to join these two relations? |
| 331 | */ |
| 332 | static bool |
| 333 | desirable_join(PlannerInfo *root, |
| 334 | RelOptInfo *outer_rel, RelOptInfo *inner_rel) |
| 335 | { |
| 336 | /* |
| 337 | * Join if there is an applicable join clause, or if there is a join order |
| 338 | * restriction forcing these rels to be joined. |
| 339 | */ |
| 340 | if (have_relevant_joinclause(root, outer_rel, inner_rel) || |
| 341 | have_join_order_restriction(root, outer_rel, inner_rel)) |
| 342 | return true; |
| 343 | |
| 344 | /* Otherwise postpone the join till later. */ |
| 345 | return false; |
| 346 | } |
| 347 | |