| 1 | /*------------------------------------------------------------------------- | 
|---|
| 2 | * | 
|---|
| 3 | * orclauses.c | 
|---|
| 4 | *	  Routines to extract restriction OR clauses from join OR clauses | 
|---|
| 5 | * | 
|---|
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group | 
|---|
| 7 | * Portions Copyright (c) 1994, Regents of the University of California | 
|---|
| 8 | * | 
|---|
| 9 | * | 
|---|
| 10 | * IDENTIFICATION | 
|---|
| 11 | *	  src/backend/optimizer/util/orclauses.c | 
|---|
| 12 | * | 
|---|
| 13 | *------------------------------------------------------------------------- | 
|---|
| 14 | */ | 
|---|
| 15 |  | 
|---|
| 16 | #include "postgres.h" | 
|---|
| 17 |  | 
|---|
| 18 | #include "nodes/makefuncs.h" | 
|---|
| 19 | #include "nodes/nodeFuncs.h" | 
|---|
| 20 | #include "optimizer/clauses.h" | 
|---|
| 21 | #include "optimizer/cost.h" | 
|---|
| 22 | #include "optimizer/optimizer.h" | 
|---|
| 23 | #include "optimizer/orclauses.h" | 
|---|
| 24 | #include "optimizer/restrictinfo.h" | 
|---|
| 25 |  | 
|---|
| 26 |  | 
|---|
| 27 | static bool is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel); | 
|---|
| 28 | static Expr *extract_or_clause(RestrictInfo *or_rinfo, RelOptInfo *rel); | 
|---|
| 29 | static void consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, | 
|---|
| 30 | Expr *orclause, RestrictInfo *join_or_rinfo); | 
|---|
| 31 |  | 
|---|
| 32 |  | 
|---|
| 33 | /* | 
|---|
| 34 | * extract_restriction_or_clauses | 
|---|
| 35 | *	  Examine join OR-of-AND clauses to see if any useful restriction OR | 
|---|
| 36 | *	  clauses can be extracted.  If so, add them to the query. | 
|---|
| 37 | * | 
|---|
| 38 | * Although a join clause must reference multiple relations overall, | 
|---|
| 39 | * an OR of ANDs clause might contain sub-clauses that reference just one | 
|---|
| 40 | * relation and can be used to build a restriction clause for that rel. | 
|---|
| 41 | * For example consider | 
|---|
| 42 | *		WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)); | 
|---|
| 43 | * We can transform this into | 
|---|
| 44 | *		WHERE ((a.x = 42 AND b.y = 43) OR (a.x = 44 AND b.z = 45)) | 
|---|
| 45 | *			AND (a.x = 42 OR a.x = 44) | 
|---|
| 46 | *			AND (b.y = 43 OR b.z = 45); | 
|---|
| 47 | * which allows the latter clauses to be applied during the scans of a and b, | 
|---|
| 48 | * perhaps as index qualifications, and in any case reducing the number of | 
|---|
| 49 | * rows arriving at the join.  In essence this is a partial transformation to | 
|---|
| 50 | * CNF (AND of ORs format).  It is not complete, however, because we do not | 
|---|
| 51 | * unravel the original OR --- doing so would usually bloat the qualification | 
|---|
| 52 | * expression to little gain. | 
|---|
| 53 | * | 
|---|
| 54 | * The added quals are partially redundant with the original OR, and therefore | 
|---|
| 55 | * would cause the size of the joinrel to be underestimated when it is finally | 
|---|
| 56 | * formed.  (This would be true of a full transformation to CNF as well; the | 
|---|
| 57 | * fault is not really in the transformation, but in clauselist_selectivity's | 
|---|
| 58 | * inability to recognize redundant conditions.)  We can compensate for this | 
|---|
| 59 | * redundancy by changing the cached selectivity of the original OR clause, | 
|---|
| 60 | * canceling out the (valid) reduction in the estimated sizes of the base | 
|---|
| 61 | * relations so that the estimated joinrel size remains the same.  This is | 
|---|
| 62 | * a MAJOR HACK: it depends on the fact that clause selectivities are cached | 
|---|
| 63 | * and on the fact that the same RestrictInfo node will appear in every | 
|---|
| 64 | * joininfo list that might be used when the joinrel is formed. | 
|---|
| 65 | * And it doesn't work in cases where the size estimation is nonlinear | 
|---|
| 66 | * (i.e., outer and IN joins).  But it beats not doing anything. | 
|---|
| 67 | * | 
|---|
| 68 | * We examine each base relation to see if join clauses associated with it | 
|---|
| 69 | * contain extractable restriction conditions.  If so, add those conditions | 
|---|
| 70 | * to the rel's baserestrictinfo and update the cached selectivities of the | 
|---|
| 71 | * join clauses.  Note that the same join clause will be examined afresh | 
|---|
| 72 | * from the point of view of each baserel that participates in it, so its | 
|---|
| 73 | * cached selectivity may get updated multiple times. | 
|---|
| 74 | */ | 
|---|
| 75 | void | 
|---|
| 76 | (PlannerInfo *root) | 
|---|
| 77 | { | 
|---|
| 78 | Index		rti; | 
|---|
| 79 |  | 
|---|
| 80 | /* Examine each baserel for potential join OR clauses */ | 
|---|
| 81 | for (rti = 1; rti < root->simple_rel_array_size; rti++) | 
|---|
| 82 | { | 
|---|
| 83 | RelOptInfo *rel = root->simple_rel_array[rti]; | 
|---|
| 84 | ListCell   *lc; | 
|---|
| 85 |  | 
|---|
| 86 | /* there may be empty slots corresponding to non-baserel RTEs */ | 
|---|
| 87 | if (rel == NULL) | 
|---|
| 88 | continue; | 
|---|
| 89 |  | 
|---|
| 90 | Assert(rel->relid == rti);	/* sanity check on array */ | 
|---|
| 91 |  | 
|---|
| 92 | /* ignore RTEs that are "other rels" */ | 
|---|
| 93 | if (rel->reloptkind != RELOPT_BASEREL) | 
|---|
| 94 | continue; | 
|---|
| 95 |  | 
|---|
| 96 | /* | 
|---|
| 97 | * Find potentially interesting OR joinclauses.  We can use any | 
|---|
| 98 | * joinclause that is considered safe to move to this rel by the | 
|---|
| 99 | * parameterized-path machinery, even though what we are going to do | 
|---|
| 100 | * with it is not exactly a parameterized path. | 
|---|
| 101 | * | 
|---|
| 102 | * However, it seems best to ignore clauses that have been marked | 
|---|
| 103 | * redundant (by setting norm_selec > 1).  That likely can't happen | 
|---|
| 104 | * for OR clauses, but let's be safe. | 
|---|
| 105 | */ | 
|---|
| 106 | foreach(lc, rel->joininfo) | 
|---|
| 107 | { | 
|---|
| 108 | RestrictInfo *rinfo = (RestrictInfo *) lfirst(lc); | 
|---|
| 109 |  | 
|---|
| 110 | if (restriction_is_or_clause(rinfo) && | 
|---|
| 111 | join_clause_is_movable_to(rinfo, rel) && | 
|---|
| 112 | rinfo->norm_selec <= 1) | 
|---|
| 113 | { | 
|---|
| 114 | /* Try to extract a qual for this rel only */ | 
|---|
| 115 | Expr	   *orclause = extract_or_clause(rinfo, rel); | 
|---|
| 116 |  | 
|---|
| 117 | /* | 
|---|
| 118 | * If successful, decide whether we want to use the clause, | 
|---|
| 119 | * and insert it into the rel's restrictinfo list if so. | 
|---|
| 120 | */ | 
|---|
| 121 | if (orclause) | 
|---|
| 122 | consider_new_or_clause(root, rel, orclause, rinfo); | 
|---|
| 123 | } | 
|---|
| 124 | } | 
|---|
| 125 | } | 
|---|
| 126 | } | 
|---|
| 127 |  | 
|---|
| 128 | /* | 
|---|
| 129 | * Is the given primitive (non-OR) RestrictInfo safe to move to the rel? | 
|---|
| 130 | */ | 
|---|
| 131 | static bool | 
|---|
| 132 | is_safe_restriction_clause_for(RestrictInfo *rinfo, RelOptInfo *rel) | 
|---|
| 133 | { | 
|---|
| 134 | /* | 
|---|
| 135 | * We want clauses that mention the rel, and only the rel.  So in | 
|---|
| 136 | * particular pseudoconstant clauses can be rejected quickly.  Then check | 
|---|
| 137 | * the clause's Var membership. | 
|---|
| 138 | */ | 
|---|
| 139 | if (rinfo->pseudoconstant) | 
|---|
| 140 | return false; | 
|---|
| 141 | if (!bms_equal(rinfo->clause_relids, rel->relids)) | 
|---|
| 142 | return false; | 
|---|
| 143 |  | 
|---|
| 144 | /* We don't want extra evaluations of any volatile functions */ | 
|---|
| 145 | if (contain_volatile_functions((Node *) rinfo->clause)) | 
|---|
| 146 | return false; | 
|---|
| 147 |  | 
|---|
| 148 | return true; | 
|---|
| 149 | } | 
|---|
| 150 |  | 
|---|
| 151 | /* | 
|---|
| 152 | * Try to extract a restriction clause mentioning only "rel" from the given | 
|---|
| 153 | * join OR-clause. | 
|---|
| 154 | * | 
|---|
| 155 | * We must be able to extract at least one qual for this rel from each of | 
|---|
| 156 | * the arms of the OR, else we can't use it. | 
|---|
| 157 | * | 
|---|
| 158 | * Returns an OR clause (not a RestrictInfo!) pertaining to rel, or NULL | 
|---|
| 159 | * if no OR clause could be extracted. | 
|---|
| 160 | */ | 
|---|
| 161 | static Expr * | 
|---|
| 162 | (RestrictInfo *or_rinfo, RelOptInfo *rel) | 
|---|
| 163 | { | 
|---|
| 164 | List	   *clauselist = NIL; | 
|---|
| 165 | ListCell   *lc; | 
|---|
| 166 |  | 
|---|
| 167 | /* | 
|---|
| 168 | * Scan each arm of the input OR clause.  Notice we descend into | 
|---|
| 169 | * or_rinfo->orclause, which has RestrictInfo nodes embedded below the | 
|---|
| 170 | * toplevel OR/AND structure.  This is useful because we can use the info | 
|---|
| 171 | * in those nodes to make is_safe_restriction_clause_for()'s checks | 
|---|
| 172 | * cheaper.  We'll strip those nodes from the returned tree, though, | 
|---|
| 173 | * meaning that fresh ones will be built if the clause is accepted as a | 
|---|
| 174 | * restriction clause.  This might seem wasteful --- couldn't we re-use | 
|---|
| 175 | * the existing RestrictInfos?	But that'd require assuming that | 
|---|
| 176 | * selectivity and other cached data is computed exactly the same way for | 
|---|
| 177 | * a restriction clause as for a join clause, which seems undesirable. | 
|---|
| 178 | */ | 
|---|
| 179 | Assert(is_orclause(or_rinfo->orclause)); | 
|---|
| 180 | foreach(lc, ((BoolExpr *) or_rinfo->orclause)->args) | 
|---|
| 181 | { | 
|---|
| 182 | Node	   *orarg = (Node *) lfirst(lc); | 
|---|
| 183 | List	   *subclauses = NIL; | 
|---|
| 184 | Node	   *subclause; | 
|---|
| 185 |  | 
|---|
| 186 | /* OR arguments should be ANDs or sub-RestrictInfos */ | 
|---|
| 187 | if (is_andclause(orarg)) | 
|---|
| 188 | { | 
|---|
| 189 | List	   *andargs = ((BoolExpr *) orarg)->args; | 
|---|
| 190 | ListCell   *lc2; | 
|---|
| 191 |  | 
|---|
| 192 | foreach(lc2, andargs) | 
|---|
| 193 | { | 
|---|
| 194 | RestrictInfo *rinfo = lfirst_node(RestrictInfo, lc2); | 
|---|
| 195 |  | 
|---|
| 196 | if (restriction_is_or_clause(rinfo)) | 
|---|
| 197 | { | 
|---|
| 198 | /* | 
|---|
| 199 | * Recurse to deal with nested OR.  Note we *must* recurse | 
|---|
| 200 | * here, this isn't just overly-tense optimization: we | 
|---|
| 201 | * have to descend far enough to find and strip all | 
|---|
| 202 | * RestrictInfos in the expression. | 
|---|
| 203 | */ | 
|---|
| 204 | Expr	   *suborclause; | 
|---|
| 205 |  | 
|---|
| 206 | suborclause = extract_or_clause(rinfo, rel); | 
|---|
| 207 | if (suborclause) | 
|---|
| 208 | subclauses = lappend(subclauses, suborclause); | 
|---|
| 209 | } | 
|---|
| 210 | else if (is_safe_restriction_clause_for(rinfo, rel)) | 
|---|
| 211 | subclauses = lappend(subclauses, rinfo->clause); | 
|---|
| 212 | } | 
|---|
| 213 | } | 
|---|
| 214 | else | 
|---|
| 215 | { | 
|---|
| 216 | RestrictInfo *rinfo = castNode(RestrictInfo, orarg); | 
|---|
| 217 |  | 
|---|
| 218 | Assert(!restriction_is_or_clause(rinfo)); | 
|---|
| 219 | if (is_safe_restriction_clause_for(rinfo, rel)) | 
|---|
| 220 | subclauses = lappend(subclauses, rinfo->clause); | 
|---|
| 221 | } | 
|---|
| 222 |  | 
|---|
| 223 | /* | 
|---|
| 224 | * If nothing could be extracted from this arm, we can't do anything | 
|---|
| 225 | * with this OR clause. | 
|---|
| 226 | */ | 
|---|
| 227 | if (subclauses == NIL) | 
|---|
| 228 | return NULL; | 
|---|
| 229 |  | 
|---|
| 230 | /* | 
|---|
| 231 | * OK, add subclause(s) to the result OR.  If we found more than one, | 
|---|
| 232 | * we need an AND node.  But if we found only one, and it is itself an | 
|---|
| 233 | * OR node, add its subclauses to the result instead; this is needed | 
|---|
| 234 | * to preserve AND/OR flatness (ie, no OR directly underneath OR). | 
|---|
| 235 | */ | 
|---|
| 236 | subclause = (Node *) make_ands_explicit(subclauses); | 
|---|
| 237 | if (is_orclause(subclause)) | 
|---|
| 238 | clauselist = list_concat(clauselist, | 
|---|
| 239 | list_copy(((BoolExpr *) subclause)->args)); | 
|---|
| 240 | else | 
|---|
| 241 | clauselist = lappend(clauselist, subclause); | 
|---|
| 242 | } | 
|---|
| 243 |  | 
|---|
| 244 | /* | 
|---|
| 245 | * If we got a restriction clause from every arm, wrap them up in an OR | 
|---|
| 246 | * node.  (In theory the OR node might be unnecessary, if there was only | 
|---|
| 247 | * one arm --- but then the input OR node was also redundant.) | 
|---|
| 248 | */ | 
|---|
| 249 | if (clauselist != NIL) | 
|---|
| 250 | return make_orclause(clauselist); | 
|---|
| 251 | return NULL; | 
|---|
| 252 | } | 
|---|
| 253 |  | 
|---|
| 254 | /* | 
|---|
| 255 | * Consider whether a successfully-extracted restriction OR clause is | 
|---|
| 256 | * actually worth using.  If so, add it to the planner's data structures, | 
|---|
| 257 | * and adjust the original join clause (join_or_rinfo) to compensate. | 
|---|
| 258 | */ | 
|---|
| 259 | static void | 
|---|
| 260 | consider_new_or_clause(PlannerInfo *root, RelOptInfo *rel, | 
|---|
| 261 | Expr *orclause, RestrictInfo *join_or_rinfo) | 
|---|
| 262 | { | 
|---|
| 263 | RestrictInfo *or_rinfo; | 
|---|
| 264 | Selectivity or_selec, | 
|---|
| 265 | orig_selec; | 
|---|
| 266 |  | 
|---|
| 267 | /* | 
|---|
| 268 | * Build a RestrictInfo from the new OR clause.  We can assume it's valid | 
|---|
| 269 | * as a base restriction clause. | 
|---|
| 270 | */ | 
|---|
| 271 | or_rinfo = make_restrictinfo(orclause, | 
|---|
| 272 | true, | 
|---|
| 273 | false, | 
|---|
| 274 | false, | 
|---|
| 275 | join_or_rinfo->security_level, | 
|---|
| 276 | NULL, | 
|---|
| 277 | NULL, | 
|---|
| 278 | NULL); | 
|---|
| 279 |  | 
|---|
| 280 | /* | 
|---|
| 281 | * Estimate its selectivity.  (We could have done this earlier, but doing | 
|---|
| 282 | * it on the RestrictInfo representation allows the result to get cached, | 
|---|
| 283 | * saving work later.) | 
|---|
| 284 | */ | 
|---|
| 285 | or_selec = clause_selectivity(root, (Node *) or_rinfo, | 
|---|
| 286 | 0, JOIN_INNER, NULL); | 
|---|
| 287 |  | 
|---|
| 288 | /* | 
|---|
| 289 | * The clause is only worth adding to the query if it rejects a useful | 
|---|
| 290 | * fraction of the base relation's rows; otherwise, it's just going to | 
|---|
| 291 | * cause duplicate computation (since we will still have to check the | 
|---|
| 292 | * original OR clause when the join is formed).  Somewhat arbitrarily, we | 
|---|
| 293 | * set the selectivity threshold at 0.9. | 
|---|
| 294 | */ | 
|---|
| 295 | if (or_selec > 0.9) | 
|---|
| 296 | return;					/* forget it */ | 
|---|
| 297 |  | 
|---|
| 298 | /* | 
|---|
| 299 | * OK, add it to the rel's restriction-clause list. | 
|---|
| 300 | */ | 
|---|
| 301 | rel->baserestrictinfo = lappend(rel->baserestrictinfo, or_rinfo); | 
|---|
| 302 | rel->baserestrict_min_security = Min(rel->baserestrict_min_security, | 
|---|
| 303 | or_rinfo->security_level); | 
|---|
| 304 |  | 
|---|
| 305 | /* | 
|---|
| 306 | * Adjust the original join OR clause's cached selectivity to compensate | 
|---|
| 307 | * for the selectivity of the added (but redundant) lower-level qual. This | 
|---|
| 308 | * should result in the join rel getting approximately the same rows | 
|---|
| 309 | * estimate as it would have gotten without all these shenanigans. | 
|---|
| 310 | * | 
|---|
| 311 | * XXX major hack alert: this depends on the assumption that the | 
|---|
| 312 | * selectivity will stay cached. | 
|---|
| 313 | * | 
|---|
| 314 | * XXX another major hack: we adjust only norm_selec, the cached | 
|---|
| 315 | * selectivity for JOIN_INNER semantics, even though the join clause | 
|---|
| 316 | * might've been an outer-join clause.  This is partly because we can't | 
|---|
| 317 | * easily identify the relevant SpecialJoinInfo here, and partly because | 
|---|
| 318 | * the linearity assumption we're making would fail anyway.  (If it is an | 
|---|
| 319 | * outer-join clause, "rel" must be on the nullable side, else we'd not | 
|---|
| 320 | * have gotten here.  So the computation of the join size is going to be | 
|---|
| 321 | * quite nonlinear with respect to the size of "rel", so it's not clear | 
|---|
| 322 | * how we ought to adjust outer_selec even if we could compute its | 
|---|
| 323 | * original value correctly.) | 
|---|
| 324 | */ | 
|---|
| 325 | if (or_selec > 0) | 
|---|
| 326 | { | 
|---|
| 327 | SpecialJoinInfo sjinfo; | 
|---|
| 328 |  | 
|---|
| 329 | /* | 
|---|
| 330 | * Make up a SpecialJoinInfo for JOIN_INNER semantics.  (Compare | 
|---|
| 331 | * approx_tuple_count() in costsize.c.) | 
|---|
| 332 | */ | 
|---|
| 333 | sjinfo.type = T_SpecialJoinInfo; | 
|---|
| 334 | sjinfo.min_lefthand = bms_difference(join_or_rinfo->clause_relids, | 
|---|
| 335 | rel->relids); | 
|---|
| 336 | sjinfo.min_righthand = rel->relids; | 
|---|
| 337 | sjinfo.syn_lefthand = sjinfo.min_lefthand; | 
|---|
| 338 | sjinfo.syn_righthand = sjinfo.min_righthand; | 
|---|
| 339 | sjinfo.jointype = JOIN_INNER; | 
|---|
| 340 | /* we don't bother trying to make the remaining fields valid */ | 
|---|
| 341 | sjinfo.lhs_strict = false; | 
|---|
| 342 | sjinfo.delay_upper_joins = false; | 
|---|
| 343 | sjinfo.semi_can_btree = false; | 
|---|
| 344 | sjinfo.semi_can_hash = false; | 
|---|
| 345 | sjinfo.semi_operators = NIL; | 
|---|
| 346 | sjinfo.semi_rhs_exprs = NIL; | 
|---|
| 347 |  | 
|---|
| 348 | /* Compute inner-join size */ | 
|---|
| 349 | orig_selec = clause_selectivity(root, (Node *) join_or_rinfo, | 
|---|
| 350 | 0, JOIN_INNER, &sjinfo); | 
|---|
| 351 |  | 
|---|
| 352 | /* And hack cached selectivity so join size remains the same */ | 
|---|
| 353 | join_or_rinfo->norm_selec = orig_selec / or_selec; | 
|---|
| 354 | /* ensure result stays in sane range, in particular not "redundant" */ | 
|---|
| 355 | if (join_or_rinfo->norm_selec > 1) | 
|---|
| 356 | join_or_rinfo->norm_selec = 1; | 
|---|
| 357 | /* as explained above, we don't touch outer_selec */ | 
|---|
| 358 | } | 
|---|
| 359 | } | 
|---|
| 360 |  | 
|---|