| 1 | /*------------------------------------------------------------------------ | 
|---|
| 2 | * | 
|---|
| 3 | * geqo_eval.c | 
|---|
| 4 | *	  Routines to evaluate query trees | 
|---|
| 5 | * | 
|---|
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group | 
|---|
| 7 | * Portions Copyright (c) 1994, Regents of the University of California | 
|---|
| 8 | * | 
|---|
| 9 | * src/backend/optimizer/geqo/geqo_eval.c | 
|---|
| 10 | * | 
|---|
| 11 | *------------------------------------------------------------------------- | 
|---|
| 12 | */ | 
|---|
| 13 |  | 
|---|
| 14 | /* contributed by: | 
|---|
| 15 | =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= | 
|---|
| 16 | *  Martin Utesch				 * Institute of Automatic Control	   * | 
|---|
| 17 | =							 = University of Mining and Technology = | 
|---|
| 18 | *  utesch@aut.tu-freiberg.de  * Freiberg, Germany				   * | 
|---|
| 19 | =*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*=*= | 
|---|
| 20 | */ | 
|---|
| 21 |  | 
|---|
| 22 | #include "postgres.h" | 
|---|
| 23 |  | 
|---|
| 24 | #include <float.h> | 
|---|
| 25 | #include <limits.h> | 
|---|
| 26 | #include <math.h> | 
|---|
| 27 |  | 
|---|
| 28 | #include "optimizer/geqo.h" | 
|---|
| 29 | #include "optimizer/joininfo.h" | 
|---|
| 30 | #include "optimizer/pathnode.h" | 
|---|
| 31 | #include "optimizer/paths.h" | 
|---|
| 32 | #include "utils/memutils.h" | 
|---|
| 33 |  | 
|---|
| 34 |  | 
|---|
| 35 | /* A "clump" of already-joined relations within gimme_tree */ | 
|---|
| 36 | typedef struct | 
|---|
| 37 | { | 
|---|
| 38 | RelOptInfo *joinrel;		/* joinrel for the set of relations */ | 
|---|
| 39 | int			size;			/* number of input relations in clump */ | 
|---|
| 40 | } Clump; | 
|---|
| 41 |  | 
|---|
| 42 | static List *merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, | 
|---|
| 43 | int num_gene, bool force); | 
|---|
| 44 | static bool desirable_join(PlannerInfo *root, | 
|---|
| 45 | RelOptInfo *outer_rel, RelOptInfo *inner_rel); | 
|---|
| 46 |  | 
|---|
| 47 |  | 
|---|
| 48 | /* | 
|---|
| 49 | * geqo_eval | 
|---|
| 50 | * | 
|---|
| 51 | * Returns cost of a query tree as an individual of the population. | 
|---|
| 52 | * | 
|---|
| 53 | * If no legal join order can be extracted from the proposed tour, | 
|---|
| 54 | * returns DBL_MAX. | 
|---|
| 55 | */ | 
|---|
| 56 | Cost | 
|---|
| 57 | geqo_eval(PlannerInfo *root, Gene *tour, int num_gene) | 
|---|
| 58 | { | 
|---|
| 59 | MemoryContext mycontext; | 
|---|
| 60 | MemoryContext oldcxt; | 
|---|
| 61 | RelOptInfo *joinrel; | 
|---|
| 62 | Cost		fitness; | 
|---|
| 63 | int			savelength; | 
|---|
| 64 | struct HTAB *savehash; | 
|---|
| 65 |  | 
|---|
| 66 | /* | 
|---|
| 67 | * Create a private memory context that will hold all temp storage | 
|---|
| 68 | * allocated inside gimme_tree(). | 
|---|
| 69 | * | 
|---|
| 70 | * Since geqo_eval() will be called many times, we can't afford to let all | 
|---|
| 71 | * that memory go unreclaimed until end of statement.  Note we make the | 
|---|
| 72 | * temp context a child of the planner's normal context, so that it will | 
|---|
| 73 | * be freed even if we abort via ereport(ERROR). | 
|---|
| 74 | */ | 
|---|
| 75 | mycontext = AllocSetContextCreate(CurrentMemoryContext, | 
|---|
| 76 | "GEQO", | 
|---|
| 77 | ALLOCSET_DEFAULT_SIZES); | 
|---|
| 78 | oldcxt = MemoryContextSwitchTo(mycontext); | 
|---|
| 79 |  | 
|---|
| 80 | /* | 
|---|
| 81 | * gimme_tree will add entries to root->join_rel_list, which may or may | 
|---|
| 82 | * not already contain some entries.  The newly added entries will be | 
|---|
| 83 | * recycled by the MemoryContextDelete below, so we must ensure that the | 
|---|
| 84 | * list is restored to its former state before exiting.  We can do this by | 
|---|
| 85 | * truncating the list to its original length.  NOTE this assumes that any | 
|---|
| 86 | * added entries are appended at the end! | 
|---|
| 87 | * | 
|---|
| 88 | * We also must take care not to mess up the outer join_rel_hash, if there | 
|---|
| 89 | * is one.  We can do this by just temporarily setting the link to NULL. | 
|---|
| 90 | * (If we are dealing with enough join rels, which we very likely are, a | 
|---|
| 91 | * new hash table will get built and used locally.) | 
|---|
| 92 | * | 
|---|
| 93 | * join_rel_level[] shouldn't be in use, so just Assert it isn't. | 
|---|
| 94 | */ | 
|---|
| 95 | savelength = list_length(root->join_rel_list); | 
|---|
| 96 | savehash = root->join_rel_hash; | 
|---|
| 97 | Assert(root->join_rel_level == NULL); | 
|---|
| 98 |  | 
|---|
| 99 | root->join_rel_hash = NULL; | 
|---|
| 100 |  | 
|---|
| 101 | /* construct the best path for the given combination of relations */ | 
|---|
| 102 | joinrel = gimme_tree(root, tour, num_gene); | 
|---|
| 103 |  | 
|---|
| 104 | /* | 
|---|
| 105 | * compute fitness, if we found a valid join | 
|---|
| 106 | * | 
|---|
| 107 | * XXX geqo does not currently support optimization for partial result | 
|---|
| 108 | * retrieval, nor do we take any cognizance of possible use of | 
|---|
| 109 | * parameterized paths --- how to fix? | 
|---|
| 110 | */ | 
|---|
| 111 | if (joinrel) | 
|---|
| 112 | { | 
|---|
| 113 | Path	   *best_path = joinrel->cheapest_total_path; | 
|---|
| 114 |  | 
|---|
| 115 | fitness = best_path->total_cost; | 
|---|
| 116 | } | 
|---|
| 117 | else | 
|---|
| 118 | fitness = DBL_MAX; | 
|---|
| 119 |  | 
|---|
| 120 | /* | 
|---|
| 121 | * Restore join_rel_list to its former state, and put back original | 
|---|
| 122 | * hashtable if any. | 
|---|
| 123 | */ | 
|---|
| 124 | root->join_rel_list = list_truncate(root->join_rel_list, | 
|---|
| 125 | savelength); | 
|---|
| 126 | root->join_rel_hash = savehash; | 
|---|
| 127 |  | 
|---|
| 128 | /* release all the memory acquired within gimme_tree */ | 
|---|
| 129 | MemoryContextSwitchTo(oldcxt); | 
|---|
| 130 | MemoryContextDelete(mycontext); | 
|---|
| 131 |  | 
|---|
| 132 | return fitness; | 
|---|
| 133 | } | 
|---|
| 134 |  | 
|---|
| 135 | /* | 
|---|
| 136 | * gimme_tree | 
|---|
| 137 | *	  Form planner estimates for a join tree constructed in the specified | 
|---|
| 138 | *	  order. | 
|---|
| 139 | * | 
|---|
| 140 | *	 'tour' is the proposed join order, of length 'num_gene' | 
|---|
| 141 | * | 
|---|
| 142 | * Returns a new join relation whose cheapest path is the best plan for | 
|---|
| 143 | * this join order.  NB: will return NULL if join order is invalid and | 
|---|
| 144 | * we can't modify it into a valid order. | 
|---|
| 145 | * | 
|---|
| 146 | * The original implementation of this routine always joined in the specified | 
|---|
| 147 | * order, and so could only build left-sided plans (and right-sided and | 
|---|
| 148 | * mixtures, as a byproduct of the fact that make_join_rel() is symmetric). | 
|---|
| 149 | * It could never produce a "bushy" plan.  This had a couple of big problems, | 
|---|
| 150 | * of which the worst was that there are situations involving join order | 
|---|
| 151 | * restrictions where the only valid plans are bushy. | 
|---|
| 152 | * | 
|---|
| 153 | * The present implementation takes the given tour as a guideline, but | 
|---|
| 154 | * postpones joins that are illegal or seem unsuitable according to some | 
|---|
| 155 | * heuristic rules.  This allows correct bushy plans to be generated at need, | 
|---|
| 156 | * and as a nice side-effect it seems to materially improve the quality of the | 
|---|
| 157 | * generated plans.  Note however that since it's just a heuristic, it can | 
|---|
| 158 | * still fail in some cases.  (In particular, we might clump together | 
|---|
| 159 | * relations that actually mustn't be joined yet due to LATERAL restrictions; | 
|---|
| 160 | * since there's no provision for un-clumping, this must lead to failure.) | 
|---|
| 161 | */ | 
|---|
| 162 | RelOptInfo * | 
|---|
| 163 | gimme_tree(PlannerInfo *root, Gene *tour, int num_gene) | 
|---|
| 164 | { | 
|---|
| 165 | GeqoPrivateData *private = (GeqoPrivateData *) root->join_search_private; | 
|---|
| 166 | List	   *clumps; | 
|---|
| 167 | int			rel_count; | 
|---|
| 168 |  | 
|---|
| 169 | /* | 
|---|
| 170 | * Sometimes, a relation can't yet be joined to others due to heuristics | 
|---|
| 171 | * or actual semantic restrictions.  We maintain a list of "clumps" of | 
|---|
| 172 | * successfully joined relations, with larger clumps at the front. Each | 
|---|
| 173 | * new relation from the tour is added to the first clump it can be joined | 
|---|
| 174 | * to; if there is none then it becomes a new clump of its own. When we | 
|---|
| 175 | * enlarge an existing clump we check to see if it can now be merged with | 
|---|
| 176 | * any other clumps.  After the tour is all scanned, we forget about the | 
|---|
| 177 | * heuristics and try to forcibly join any remaining clumps.  If we are | 
|---|
| 178 | * unable to merge all the clumps into one, fail. | 
|---|
| 179 | */ | 
|---|
| 180 | clumps = NIL; | 
|---|
| 181 |  | 
|---|
| 182 | for (rel_count = 0; rel_count < num_gene; rel_count++) | 
|---|
| 183 | { | 
|---|
| 184 | int			cur_rel_index; | 
|---|
| 185 | RelOptInfo *cur_rel; | 
|---|
| 186 | Clump	   *cur_clump; | 
|---|
| 187 |  | 
|---|
| 188 | /* Get the next input relation */ | 
|---|
| 189 | cur_rel_index = (int) tour[rel_count]; | 
|---|
| 190 | cur_rel = (RelOptInfo *) list_nth(private->initial_rels, | 
|---|
| 191 | cur_rel_index - 1); | 
|---|
| 192 |  | 
|---|
| 193 | /* Make it into a single-rel clump */ | 
|---|
| 194 | cur_clump = (Clump *) palloc(sizeof(Clump)); | 
|---|
| 195 | cur_clump->joinrel = cur_rel; | 
|---|
| 196 | cur_clump->size = 1; | 
|---|
| 197 |  | 
|---|
| 198 | /* Merge it into the clumps list, using only desirable joins */ | 
|---|
| 199 | clumps = merge_clump(root, clumps, cur_clump, num_gene, false); | 
|---|
| 200 | } | 
|---|
| 201 |  | 
|---|
| 202 | if (list_length(clumps) > 1) | 
|---|
| 203 | { | 
|---|
| 204 | /* Force-join the remaining clumps in some legal order */ | 
|---|
| 205 | List	   *fclumps; | 
|---|
| 206 | ListCell   *lc; | 
|---|
| 207 |  | 
|---|
| 208 | fclumps = NIL; | 
|---|
| 209 | foreach(lc, clumps) | 
|---|
| 210 | { | 
|---|
| 211 | Clump	   *clump = (Clump *) lfirst(lc); | 
|---|
| 212 |  | 
|---|
| 213 | fclumps = merge_clump(root, fclumps, clump, num_gene, true); | 
|---|
| 214 | } | 
|---|
| 215 | clumps = fclumps; | 
|---|
| 216 | } | 
|---|
| 217 |  | 
|---|
| 218 | /* Did we succeed in forming a single join relation? */ | 
|---|
| 219 | if (list_length(clumps) != 1) | 
|---|
| 220 | return NULL; | 
|---|
| 221 |  | 
|---|
| 222 | return ((Clump *) linitial(clumps))->joinrel; | 
|---|
| 223 | } | 
|---|
| 224 |  | 
|---|
| 225 | /* | 
|---|
| 226 | * Merge a "clump" into the list of existing clumps for gimme_tree. | 
|---|
| 227 | * | 
|---|
| 228 | * We try to merge the clump into some existing clump, and repeat if | 
|---|
| 229 | * successful.  When no more merging is possible, insert the clump | 
|---|
| 230 | * into the list, preserving the list ordering rule (namely, that | 
|---|
| 231 | * clumps of larger size appear earlier). | 
|---|
| 232 | * | 
|---|
| 233 | * If force is true, merge anywhere a join is legal, even if it causes | 
|---|
| 234 | * a cartesian join to be performed.  When force is false, do only | 
|---|
| 235 | * "desirable" joins. | 
|---|
| 236 | */ | 
|---|
| 237 | static List * | 
|---|
| 238 | merge_clump(PlannerInfo *root, List *clumps, Clump *new_clump, int num_gene, | 
|---|
| 239 | bool force) | 
|---|
| 240 | { | 
|---|
| 241 | ListCell   *prev; | 
|---|
| 242 | ListCell   *lc; | 
|---|
| 243 |  | 
|---|
| 244 | /* Look for a clump that new_clump can join to */ | 
|---|
| 245 | prev = NULL; | 
|---|
| 246 | foreach(lc, clumps) | 
|---|
| 247 | { | 
|---|
| 248 | Clump	   *old_clump = (Clump *) lfirst(lc); | 
|---|
| 249 |  | 
|---|
| 250 | if (force || | 
|---|
| 251 | desirable_join(root, old_clump->joinrel, new_clump->joinrel)) | 
|---|
| 252 | { | 
|---|
| 253 | RelOptInfo *joinrel; | 
|---|
| 254 |  | 
|---|
| 255 | /* | 
|---|
| 256 | * Construct a RelOptInfo representing the join of these two input | 
|---|
| 257 | * relations.  Note that we expect the joinrel not to exist in | 
|---|
| 258 | * root->join_rel_list yet, and so the paths constructed for it | 
|---|
| 259 | * will only include the ones we want. | 
|---|
| 260 | */ | 
|---|
| 261 | joinrel = make_join_rel(root, | 
|---|
| 262 | old_clump->joinrel, | 
|---|
| 263 | new_clump->joinrel); | 
|---|
| 264 |  | 
|---|
| 265 | /* Keep searching if join order is not valid */ | 
|---|
| 266 | if (joinrel) | 
|---|
| 267 | { | 
|---|
| 268 | /* Create paths for partitionwise joins. */ | 
|---|
| 269 | generate_partitionwise_join_paths(root, joinrel); | 
|---|
| 270 |  | 
|---|
| 271 | /* | 
|---|
| 272 | * Except for the topmost scan/join rel, consider gathering | 
|---|
| 273 | * partial paths.  We'll do the same for the topmost scan/join | 
|---|
| 274 | * rel once we know the final targetlist (see | 
|---|
| 275 | * grouping_planner). | 
|---|
| 276 | */ | 
|---|
| 277 | if (old_clump->size + new_clump->size < num_gene) | 
|---|
| 278 | generate_gather_paths(root, joinrel, false); | 
|---|
| 279 |  | 
|---|
| 280 | /* Find and save the cheapest paths for this joinrel */ | 
|---|
| 281 | set_cheapest(joinrel); | 
|---|
| 282 |  | 
|---|
| 283 | /* Absorb new clump into old */ | 
|---|
| 284 | old_clump->joinrel = joinrel; | 
|---|
| 285 | old_clump->size += new_clump->size; | 
|---|
| 286 | pfree(new_clump); | 
|---|
| 287 |  | 
|---|
| 288 | /* Remove old_clump from list */ | 
|---|
| 289 | clumps = list_delete_cell(clumps, lc, prev); | 
|---|
| 290 |  | 
|---|
| 291 | /* | 
|---|
| 292 | * Recursively try to merge the enlarged old_clump with | 
|---|
| 293 | * others.  When no further merge is possible, we'll reinsert | 
|---|
| 294 | * it into the list. | 
|---|
| 295 | */ | 
|---|
| 296 | return merge_clump(root, clumps, old_clump, num_gene, force); | 
|---|
| 297 | } | 
|---|
| 298 | } | 
|---|
| 299 | prev = lc; | 
|---|
| 300 | } | 
|---|
| 301 |  | 
|---|
| 302 | /* | 
|---|
| 303 | * No merging is possible, so add new_clump as an independent clump, in | 
|---|
| 304 | * proper order according to size.  We can be fast for the common case | 
|---|
| 305 | * where it has size 1 --- it should always go at the end. | 
|---|
| 306 | */ | 
|---|
| 307 | if (clumps == NIL || new_clump->size == 1) | 
|---|
| 308 | return lappend(clumps, new_clump); | 
|---|
| 309 |  | 
|---|
| 310 | /* Check if it belongs at the front */ | 
|---|
| 311 | lc = list_head(clumps); | 
|---|
| 312 | if (new_clump->size > ((Clump *) lfirst(lc))->size) | 
|---|
| 313 | return lcons(new_clump, clumps); | 
|---|
| 314 |  | 
|---|
| 315 | /* Else search for the place to insert it */ | 
|---|
| 316 | for (;;) | 
|---|
| 317 | { | 
|---|
| 318 | ListCell   *nxt = lnext(lc); | 
|---|
| 319 |  | 
|---|
| 320 | if (nxt == NULL || new_clump->size > ((Clump *) lfirst(nxt))->size) | 
|---|
| 321 | break;				/* it belongs after 'lc', before 'nxt' */ | 
|---|
| 322 | lc = nxt; | 
|---|
| 323 | } | 
|---|
| 324 | lappend_cell(clumps, lc, new_clump); | 
|---|
| 325 |  | 
|---|
| 326 | return clumps; | 
|---|
| 327 | } | 
|---|
| 328 |  | 
|---|
| 329 | /* | 
|---|
| 330 | * Heuristics for gimme_tree: do we want to join these two relations? | 
|---|
| 331 | */ | 
|---|
| 332 | static bool | 
|---|
| 333 | desirable_join(PlannerInfo *root, | 
|---|
| 334 | RelOptInfo *outer_rel, RelOptInfo *inner_rel) | 
|---|
| 335 | { | 
|---|
| 336 | /* | 
|---|
| 337 | * Join if there is an applicable join clause, or if there is a join order | 
|---|
| 338 | * restriction forcing these rels to be joined. | 
|---|
| 339 | */ | 
|---|
| 340 | if (have_relevant_joinclause(root, outer_rel, inner_rel) || | 
|---|
| 341 | have_join_order_restriction(root, outer_rel, inner_rel)) | 
|---|
| 342 | return true; | 
|---|
| 343 |  | 
|---|
| 344 | /* Otherwise postpone the join till later. */ | 
|---|
| 345 | return false; | 
|---|
| 346 | } | 
|---|
| 347 |  | 
|---|