1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * clausesel.c |
4 | * Routines to compute clause selectivities |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/optimizer/path/clausesel.c |
12 | * |
13 | *------------------------------------------------------------------------- |
14 | */ |
15 | #include "postgres.h" |
16 | |
17 | #include "nodes/makefuncs.h" |
18 | #include "nodes/nodeFuncs.h" |
19 | #include "optimizer/clauses.h" |
20 | #include "optimizer/cost.h" |
21 | #include "optimizer/optimizer.h" |
22 | #include "optimizer/pathnode.h" |
23 | #include "optimizer/plancat.h" |
24 | #include "utils/fmgroids.h" |
25 | #include "utils/lsyscache.h" |
26 | #include "utils/selfuncs.h" |
27 | #include "statistics/statistics.h" |
28 | |
29 | |
30 | /* |
31 | * Data structure for accumulating info about possible range-query |
32 | * clause pairs in clauselist_selectivity. |
33 | */ |
34 | typedef struct RangeQueryClause |
35 | { |
36 | struct RangeQueryClause *next; /* next in linked list */ |
37 | Node *var; /* The common variable of the clauses */ |
38 | bool have_lobound; /* found a low-bound clause yet? */ |
39 | bool have_hibound; /* found a high-bound clause yet? */ |
40 | Selectivity lobound; /* Selectivity of a var > something clause */ |
41 | Selectivity hibound; /* Selectivity of a var < something clause */ |
42 | } RangeQueryClause; |
43 | |
44 | static void addRangeClause(RangeQueryClause **rqlist, Node *clause, |
45 | bool varonleft, bool isLTsel, Selectivity s2); |
46 | static RelOptInfo *find_single_rel_for_clauses(PlannerInfo *root, |
47 | List *clauses); |
48 | |
49 | /**************************************************************************** |
50 | * ROUTINES TO COMPUTE SELECTIVITIES |
51 | ****************************************************************************/ |
52 | |
53 | /* |
54 | * clauselist_selectivity - |
55 | * Compute the selectivity of an implicitly-ANDed list of boolean |
56 | * expression clauses. The list can be empty, in which case 1.0 |
57 | * must be returned. List elements may be either RestrictInfos |
58 | * or bare expression clauses --- the former is preferred since |
59 | * it allows caching of results. |
60 | * |
61 | * See clause_selectivity() for the meaning of the additional parameters. |
62 | * |
63 | * The basic approach is to apply extended statistics first, on as many |
64 | * clauses as possible, in order to capture cross-column dependencies etc. |
65 | * The remaining clauses are then estimated using regular statistics tracked |
66 | * for individual columns. This is done by simply passing the clauses to |
67 | * clauselist_selectivity_simple. |
68 | */ |
69 | Selectivity |
70 | clauselist_selectivity(PlannerInfo *root, |
71 | List *clauses, |
72 | int varRelid, |
73 | JoinType jointype, |
74 | SpecialJoinInfo *sjinfo) |
75 | { |
76 | Selectivity s1 = 1.0; |
77 | RelOptInfo *rel; |
78 | Bitmapset *estimatedclauses = NULL; |
79 | |
80 | /* |
81 | * Determine if these clauses reference a single relation. If so, and if |
82 | * it has extended statistics, try to apply those. |
83 | */ |
84 | rel = find_single_rel_for_clauses(root, clauses); |
85 | if (rel && rel->rtekind == RTE_RELATION && rel->statlist != NIL) |
86 | { |
87 | /* |
88 | * Estimate as many clauses as possible using extended statistics. |
89 | * |
90 | * 'estimatedclauses' tracks the 0-based list position index of |
91 | * clauses that we've estimated using extended statistics, and that |
92 | * should be ignored. |
93 | */ |
94 | s1 *= statext_clauselist_selectivity(root, clauses, varRelid, |
95 | jointype, sjinfo, rel, |
96 | &estimatedclauses); |
97 | } |
98 | |
99 | /* |
100 | * Apply normal selectivity estimates for the remaining clauses, passing |
101 | * 'estimatedclauses' so that it skips already estimated ones. |
102 | */ |
103 | return s1 * clauselist_selectivity_simple(root, clauses, varRelid, |
104 | jointype, sjinfo, |
105 | estimatedclauses); |
106 | } |
107 | |
108 | /* |
109 | * clauselist_selectivity_simple - |
110 | * Compute the selectivity of an implicitly-ANDed list of boolean |
111 | * expression clauses. The list can be empty, in which case 1.0 |
112 | * must be returned. List elements may be either RestrictInfos |
113 | * or bare expression clauses --- the former is preferred since |
114 | * it allows caching of results. The estimatedclauses bitmap tracks |
115 | * clauses that have already been estimated by other means. |
116 | * |
117 | * See clause_selectivity() for the meaning of the additional parameters. |
118 | * |
119 | * Our basic approach is to take the product of the selectivities of the |
120 | * subclauses. However, that's only right if the subclauses have independent |
121 | * probabilities, and in reality they are often NOT independent. So, |
122 | * we want to be smarter where we can. |
123 | * |
124 | * We also recognize "range queries", such as "x > 34 AND x < 42". Clauses |
125 | * are recognized as possible range query components if they are restriction |
126 | * opclauses whose operators have scalarltsel or a related function as their |
127 | * restriction selectivity estimator. We pair up clauses of this form that |
128 | * refer to the same variable. An unpairable clause of this kind is simply |
129 | * multiplied into the selectivity product in the normal way. But when we |
130 | * find a pair, we know that the selectivities represent the relative |
131 | * positions of the low and high bounds within the column's range, so instead |
132 | * of figuring the selectivity as hisel * losel, we can figure it as hisel + |
133 | * losel - 1. (To visualize this, see that hisel is the fraction of the range |
134 | * below the high bound, while losel is the fraction above the low bound; so |
135 | * hisel can be interpreted directly as a 0..1 value but we need to convert |
136 | * losel to 1-losel before interpreting it as a value. Then the available |
137 | * range is 1-losel to hisel. However, this calculation double-excludes |
138 | * nulls, so really we need hisel + losel + null_frac - 1.) |
139 | * |
140 | * If either selectivity is exactly DEFAULT_INEQ_SEL, we forget this equation |
141 | * and instead use DEFAULT_RANGE_INEQ_SEL. The same applies if the equation |
142 | * yields an impossible (negative) result. |
143 | * |
144 | * A free side-effect is that we can recognize redundant inequalities such |
145 | * as "x < 4 AND x < 5"; only the tighter constraint will be counted. |
146 | * |
147 | * Of course this is all very dependent on the behavior of the inequality |
148 | * selectivity functions; perhaps some day we can generalize the approach. |
149 | */ |
150 | Selectivity |
151 | clauselist_selectivity_simple(PlannerInfo *root, |
152 | List *clauses, |
153 | int varRelid, |
154 | JoinType jointype, |
155 | SpecialJoinInfo *sjinfo, |
156 | Bitmapset *estimatedclauses) |
157 | { |
158 | Selectivity s1 = 1.0; |
159 | RangeQueryClause *rqlist = NULL; |
160 | ListCell *l; |
161 | int listidx; |
162 | |
163 | /* |
164 | * If there's exactly one clause (and it was not estimated yet), just go |
165 | * directly to clause_selectivity(). None of what we might do below is |
166 | * relevant. |
167 | */ |
168 | if ((list_length(clauses) == 1) && |
169 | bms_num_members(estimatedclauses) == 0) |
170 | return clause_selectivity(root, (Node *) linitial(clauses), |
171 | varRelid, jointype, sjinfo); |
172 | |
173 | /* |
174 | * Anything that doesn't look like a potential rangequery clause gets |
175 | * multiplied into s1 and forgotten. Anything that does gets inserted into |
176 | * an rqlist entry. |
177 | */ |
178 | listidx = -1; |
179 | foreach(l, clauses) |
180 | { |
181 | Node *clause = (Node *) lfirst(l); |
182 | RestrictInfo *rinfo; |
183 | Selectivity s2; |
184 | |
185 | listidx++; |
186 | |
187 | /* |
188 | * Skip this clause if it's already been estimated by some other |
189 | * statistics above. |
190 | */ |
191 | if (bms_is_member(listidx, estimatedclauses)) |
192 | continue; |
193 | |
194 | /* Always compute the selectivity using clause_selectivity */ |
195 | s2 = clause_selectivity(root, clause, varRelid, jointype, sjinfo); |
196 | |
197 | /* |
198 | * Check for being passed a RestrictInfo. |
199 | * |
200 | * If it's a pseudoconstant RestrictInfo, then s2 is either 1.0 or |
201 | * 0.0; just use that rather than looking for range pairs. |
202 | */ |
203 | if (IsA(clause, RestrictInfo)) |
204 | { |
205 | rinfo = (RestrictInfo *) clause; |
206 | if (rinfo->pseudoconstant) |
207 | { |
208 | s1 = s1 * s2; |
209 | continue; |
210 | } |
211 | clause = (Node *) rinfo->clause; |
212 | } |
213 | else |
214 | rinfo = NULL; |
215 | |
216 | /* |
217 | * See if it looks like a restriction clause with a pseudoconstant on |
218 | * one side. (Anything more complicated than that might not behave in |
219 | * the simple way we are expecting.) Most of the tests here can be |
220 | * done more efficiently with rinfo than without. |
221 | */ |
222 | if (is_opclause(clause) && list_length(((OpExpr *) clause)->args) == 2) |
223 | { |
224 | OpExpr *expr = (OpExpr *) clause; |
225 | bool varonleft = true; |
226 | bool ok; |
227 | |
228 | if (rinfo) |
229 | { |
230 | ok = (bms_membership(rinfo->clause_relids) == BMS_SINGLETON) && |
231 | (is_pseudo_constant_clause_relids(lsecond(expr->args), |
232 | rinfo->right_relids) || |
233 | (varonleft = false, |
234 | is_pseudo_constant_clause_relids(linitial(expr->args), |
235 | rinfo->left_relids))); |
236 | } |
237 | else |
238 | { |
239 | ok = (NumRelids(clause) == 1) && |
240 | (is_pseudo_constant_clause(lsecond(expr->args)) || |
241 | (varonleft = false, |
242 | is_pseudo_constant_clause(linitial(expr->args)))); |
243 | } |
244 | |
245 | if (ok) |
246 | { |
247 | /* |
248 | * If it's not a "<"/"<="/">"/">=" operator, just merge the |
249 | * selectivity in generically. But if it's the right oprrest, |
250 | * add the clause to rqlist for later processing. |
251 | */ |
252 | switch (get_oprrest(expr->opno)) |
253 | { |
254 | case F_SCALARLTSEL: |
255 | case F_SCALARLESEL: |
256 | addRangeClause(&rqlist, clause, |
257 | varonleft, true, s2); |
258 | break; |
259 | case F_SCALARGTSEL: |
260 | case F_SCALARGESEL: |
261 | addRangeClause(&rqlist, clause, |
262 | varonleft, false, s2); |
263 | break; |
264 | default: |
265 | /* Just merge the selectivity in generically */ |
266 | s1 = s1 * s2; |
267 | break; |
268 | } |
269 | continue; /* drop to loop bottom */ |
270 | } |
271 | } |
272 | |
273 | /* Not the right form, so treat it generically. */ |
274 | s1 = s1 * s2; |
275 | } |
276 | |
277 | /* |
278 | * Now scan the rangequery pair list. |
279 | */ |
280 | while (rqlist != NULL) |
281 | { |
282 | RangeQueryClause *rqnext; |
283 | |
284 | if (rqlist->have_lobound && rqlist->have_hibound) |
285 | { |
286 | /* Successfully matched a pair of range clauses */ |
287 | Selectivity s2; |
288 | |
289 | /* |
290 | * Exact equality to the default value probably means the |
291 | * selectivity function punted. This is not airtight but should |
292 | * be good enough. |
293 | */ |
294 | if (rqlist->hibound == DEFAULT_INEQ_SEL || |
295 | rqlist->lobound == DEFAULT_INEQ_SEL) |
296 | { |
297 | s2 = DEFAULT_RANGE_INEQ_SEL; |
298 | } |
299 | else |
300 | { |
301 | s2 = rqlist->hibound + rqlist->lobound - 1.0; |
302 | |
303 | /* Adjust for double-exclusion of NULLs */ |
304 | s2 += nulltestsel(root, IS_NULL, rqlist->var, |
305 | varRelid, jointype, sjinfo); |
306 | |
307 | /* |
308 | * A zero or slightly negative s2 should be converted into a |
309 | * small positive value; we probably are dealing with a very |
310 | * tight range and got a bogus result due to roundoff errors. |
311 | * However, if s2 is very negative, then we probably have |
312 | * default selectivity estimates on one or both sides of the |
313 | * range that we failed to recognize above for some reason. |
314 | */ |
315 | if (s2 <= 0.0) |
316 | { |
317 | if (s2 < -0.01) |
318 | { |
319 | /* |
320 | * No data available --- use a default estimate that |
321 | * is small, but not real small. |
322 | */ |
323 | s2 = DEFAULT_RANGE_INEQ_SEL; |
324 | } |
325 | else |
326 | { |
327 | /* |
328 | * It's just roundoff error; use a small positive |
329 | * value |
330 | */ |
331 | s2 = 1.0e-10; |
332 | } |
333 | } |
334 | } |
335 | /* Merge in the selectivity of the pair of clauses */ |
336 | s1 *= s2; |
337 | } |
338 | else |
339 | { |
340 | /* Only found one of a pair, merge it in generically */ |
341 | if (rqlist->have_lobound) |
342 | s1 *= rqlist->lobound; |
343 | else |
344 | s1 *= rqlist->hibound; |
345 | } |
346 | /* release storage and advance */ |
347 | rqnext = rqlist->next; |
348 | pfree(rqlist); |
349 | rqlist = rqnext; |
350 | } |
351 | |
352 | return s1; |
353 | } |
354 | |
355 | /* |
356 | * addRangeClause --- add a new range clause for clauselist_selectivity |
357 | * |
358 | * Here is where we try to match up pairs of range-query clauses |
359 | */ |
360 | static void |
361 | addRangeClause(RangeQueryClause **rqlist, Node *clause, |
362 | bool varonleft, bool isLTsel, Selectivity s2) |
363 | { |
364 | RangeQueryClause *rqelem; |
365 | Node *var; |
366 | bool is_lobound; |
367 | |
368 | if (varonleft) |
369 | { |
370 | var = get_leftop((Expr *) clause); |
371 | is_lobound = !isLTsel; /* x < something is high bound */ |
372 | } |
373 | else |
374 | { |
375 | var = get_rightop((Expr *) clause); |
376 | is_lobound = isLTsel; /* something < x is low bound */ |
377 | } |
378 | |
379 | for (rqelem = *rqlist; rqelem; rqelem = rqelem->next) |
380 | { |
381 | /* |
382 | * We use full equal() here because the "var" might be a function of |
383 | * one or more attributes of the same relation... |
384 | */ |
385 | if (!equal(var, rqelem->var)) |
386 | continue; |
387 | /* Found the right group to put this clause in */ |
388 | if (is_lobound) |
389 | { |
390 | if (!rqelem->have_lobound) |
391 | { |
392 | rqelem->have_lobound = true; |
393 | rqelem->lobound = s2; |
394 | } |
395 | else |
396 | { |
397 | |
398 | /*------ |
399 | * We have found two similar clauses, such as |
400 | * x < y AND x <= z. |
401 | * Keep only the more restrictive one. |
402 | *------ |
403 | */ |
404 | if (rqelem->lobound > s2) |
405 | rqelem->lobound = s2; |
406 | } |
407 | } |
408 | else |
409 | { |
410 | if (!rqelem->have_hibound) |
411 | { |
412 | rqelem->have_hibound = true; |
413 | rqelem->hibound = s2; |
414 | } |
415 | else |
416 | { |
417 | |
418 | /*------ |
419 | * We have found two similar clauses, such as |
420 | * x > y AND x >= z. |
421 | * Keep only the more restrictive one. |
422 | *------ |
423 | */ |
424 | if (rqelem->hibound > s2) |
425 | rqelem->hibound = s2; |
426 | } |
427 | } |
428 | return; |
429 | } |
430 | |
431 | /* No matching var found, so make a new clause-pair data structure */ |
432 | rqelem = (RangeQueryClause *) palloc(sizeof(RangeQueryClause)); |
433 | rqelem->var = var; |
434 | if (is_lobound) |
435 | { |
436 | rqelem->have_lobound = true; |
437 | rqelem->have_hibound = false; |
438 | rqelem->lobound = s2; |
439 | } |
440 | else |
441 | { |
442 | rqelem->have_lobound = false; |
443 | rqelem->have_hibound = true; |
444 | rqelem->hibound = s2; |
445 | } |
446 | rqelem->next = *rqlist; |
447 | *rqlist = rqelem; |
448 | } |
449 | |
450 | /* |
451 | * find_single_rel_for_clauses |
452 | * Examine each clause in 'clauses' and determine if all clauses |
453 | * reference only a single relation. If so return that relation, |
454 | * otherwise return NULL. |
455 | */ |
456 | static RelOptInfo * |
457 | find_single_rel_for_clauses(PlannerInfo *root, List *clauses) |
458 | { |
459 | int lastrelid = 0; |
460 | ListCell *l; |
461 | |
462 | foreach(l, clauses) |
463 | { |
464 | RestrictInfo *rinfo = (RestrictInfo *) lfirst(l); |
465 | int relid; |
466 | |
467 | /* |
468 | * If we have a list of bare clauses rather than RestrictInfos, we |
469 | * could pull out their relids the hard way with pull_varnos(). |
470 | * However, currently the extended-stats machinery won't do anything |
471 | * with non-RestrictInfo clauses anyway, so there's no point in |
472 | * spending extra cycles; just fail if that's what we have. |
473 | */ |
474 | if (!IsA(rinfo, RestrictInfo)) |
475 | return NULL; |
476 | |
477 | if (bms_is_empty(rinfo->clause_relids)) |
478 | continue; /* we can ignore variable-free clauses */ |
479 | if (!bms_get_singleton_member(rinfo->clause_relids, &relid)) |
480 | return NULL; /* multiple relations in this clause */ |
481 | if (lastrelid == 0) |
482 | lastrelid = relid; /* first clause referencing a relation */ |
483 | else if (relid != lastrelid) |
484 | return NULL; /* relation not same as last one */ |
485 | } |
486 | |
487 | if (lastrelid != 0) |
488 | return find_base_rel(root, lastrelid); |
489 | |
490 | return NULL; /* no clauses */ |
491 | } |
492 | |
493 | /* |
494 | * bms_is_subset_singleton |
495 | * |
496 | * Same result as bms_is_subset(s, bms_make_singleton(x)), |
497 | * but a little faster and doesn't leak memory. |
498 | * |
499 | * Is this of use anywhere else? If so move to bitmapset.c ... |
500 | */ |
501 | static bool |
502 | bms_is_subset_singleton(const Bitmapset *s, int x) |
503 | { |
504 | switch (bms_membership(s)) |
505 | { |
506 | case BMS_EMPTY_SET: |
507 | return true; |
508 | case BMS_SINGLETON: |
509 | return bms_is_member(x, s); |
510 | case BMS_MULTIPLE: |
511 | return false; |
512 | } |
513 | /* can't get here... */ |
514 | return false; |
515 | } |
516 | |
517 | /* |
518 | * treat_as_join_clause - |
519 | * Decide whether an operator clause is to be handled by the |
520 | * restriction or join estimator. Subroutine for clause_selectivity(). |
521 | */ |
522 | static inline bool |
523 | treat_as_join_clause(Node *clause, RestrictInfo *rinfo, |
524 | int varRelid, SpecialJoinInfo *sjinfo) |
525 | { |
526 | if (varRelid != 0) |
527 | { |
528 | /* |
529 | * Caller is forcing restriction mode (eg, because we are examining an |
530 | * inner indexscan qual). |
531 | */ |
532 | return false; |
533 | } |
534 | else if (sjinfo == NULL) |
535 | { |
536 | /* |
537 | * It must be a restriction clause, since it's being evaluated at a |
538 | * scan node. |
539 | */ |
540 | return false; |
541 | } |
542 | else |
543 | { |
544 | /* |
545 | * Otherwise, it's a join if there's more than one relation used. We |
546 | * can optimize this calculation if an rinfo was passed. |
547 | * |
548 | * XXX Since we know the clause is being evaluated at a join, the |
549 | * only way it could be single-relation is if it was delayed by outer |
550 | * joins. Although we can make use of the restriction qual estimators |
551 | * anyway, it seems likely that we ought to account for the |
552 | * probability of injected nulls somehow. |
553 | */ |
554 | if (rinfo) |
555 | return (bms_membership(rinfo->clause_relids) == BMS_MULTIPLE); |
556 | else |
557 | return (NumRelids(clause) > 1); |
558 | } |
559 | } |
560 | |
561 | |
562 | /* |
563 | * clause_selectivity - |
564 | * Compute the selectivity of a general boolean expression clause. |
565 | * |
566 | * The clause can be either a RestrictInfo or a plain expression. If it's |
567 | * a RestrictInfo, we try to cache the selectivity for possible re-use, |
568 | * so passing RestrictInfos is preferred. |
569 | * |
570 | * varRelid is either 0 or a rangetable index. |
571 | * |
572 | * When varRelid is not 0, only variables belonging to that relation are |
573 | * considered in computing selectivity; other vars are treated as constants |
574 | * of unknown values. This is appropriate for estimating the selectivity of |
575 | * a join clause that is being used as a restriction clause in a scan of a |
576 | * nestloop join's inner relation --- varRelid should then be the ID of the |
577 | * inner relation. |
578 | * |
579 | * When varRelid is 0, all variables are treated as variables. This |
580 | * is appropriate for ordinary join clauses and restriction clauses. |
581 | * |
582 | * jointype is the join type, if the clause is a join clause. Pass JOIN_INNER |
583 | * if the clause isn't a join clause. |
584 | * |
585 | * sjinfo is NULL for a non-join clause, otherwise it provides additional |
586 | * context information about the join being performed. There are some |
587 | * special cases: |
588 | * 1. For a special (not INNER) join, sjinfo is always a member of |
589 | * root->join_info_list. |
590 | * 2. For an INNER join, sjinfo is just a transient struct, and only the |
591 | * relids and jointype fields in it can be trusted. |
592 | * It is possible for jointype to be different from sjinfo->jointype. |
593 | * This indicates we are considering a variant join: either with |
594 | * the LHS and RHS switched, or with one input unique-ified. |
595 | * |
596 | * Note: when passing nonzero varRelid, it's normally appropriate to set |
597 | * jointype == JOIN_INNER, sjinfo == NULL, even if the clause is really a |
598 | * join clause; because we aren't treating it as a join clause. |
599 | */ |
600 | Selectivity |
601 | clause_selectivity(PlannerInfo *root, |
602 | Node *clause, |
603 | int varRelid, |
604 | JoinType jointype, |
605 | SpecialJoinInfo *sjinfo) |
606 | { |
607 | Selectivity s1 = 0.5; /* default for any unhandled clause type */ |
608 | RestrictInfo *rinfo = NULL; |
609 | bool cacheable = false; |
610 | |
611 | if (clause == NULL) /* can this still happen? */ |
612 | return s1; |
613 | |
614 | if (IsA(clause, RestrictInfo)) |
615 | { |
616 | rinfo = (RestrictInfo *) clause; |
617 | |
618 | /* |
619 | * If the clause is marked pseudoconstant, then it will be used as a |
620 | * gating qual and should not affect selectivity estimates; hence |
621 | * return 1.0. The only exception is that a constant FALSE may be |
622 | * taken as having selectivity 0.0, since it will surely mean no rows |
623 | * out of the plan. This case is simple enough that we need not |
624 | * bother caching the result. |
625 | */ |
626 | if (rinfo->pseudoconstant) |
627 | { |
628 | if (!IsA(rinfo->clause, Const)) |
629 | return (Selectivity) 1.0; |
630 | } |
631 | |
632 | /* |
633 | * If the clause is marked redundant, always return 1.0. |
634 | */ |
635 | if (rinfo->norm_selec > 1) |
636 | return (Selectivity) 1.0; |
637 | |
638 | /* |
639 | * If possible, cache the result of the selectivity calculation for |
640 | * the clause. We can cache if varRelid is zero or the clause |
641 | * contains only vars of that relid --- otherwise varRelid will affect |
642 | * the result, so mustn't cache. Outer join quals might be examined |
643 | * with either their join's actual jointype or JOIN_INNER, so we need |
644 | * two cache variables to remember both cases. Note: we assume the |
645 | * result won't change if we are switching the input relations or |
646 | * considering a unique-ified case, so we only need one cache variable |
647 | * for all non-JOIN_INNER cases. |
648 | */ |
649 | if (varRelid == 0 || |
650 | bms_is_subset_singleton(rinfo->clause_relids, varRelid)) |
651 | { |
652 | /* Cacheable --- do we already have the result? */ |
653 | if (jointype == JOIN_INNER) |
654 | { |
655 | if (rinfo->norm_selec >= 0) |
656 | return rinfo->norm_selec; |
657 | } |
658 | else |
659 | { |
660 | if (rinfo->outer_selec >= 0) |
661 | return rinfo->outer_selec; |
662 | } |
663 | cacheable = true; |
664 | } |
665 | |
666 | /* |
667 | * Proceed with examination of contained clause. If the clause is an |
668 | * OR-clause, we want to look at the variant with sub-RestrictInfos, |
669 | * so that per-subclause selectivities can be cached. |
670 | */ |
671 | if (rinfo->orclause) |
672 | clause = (Node *) rinfo->orclause; |
673 | else |
674 | clause = (Node *) rinfo->clause; |
675 | } |
676 | |
677 | if (IsA(clause, Var)) |
678 | { |
679 | Var *var = (Var *) clause; |
680 | |
681 | /* |
682 | * We probably shouldn't ever see an uplevel Var here, but if we do, |
683 | * return the default selectivity... |
684 | */ |
685 | if (var->varlevelsup == 0 && |
686 | (varRelid == 0 || varRelid == (int) var->varno)) |
687 | { |
688 | /* Use the restriction selectivity function for a bool Var */ |
689 | s1 = boolvarsel(root, (Node *) var, varRelid); |
690 | } |
691 | } |
692 | else if (IsA(clause, Const)) |
693 | { |
694 | /* bool constant is pretty easy... */ |
695 | Const *con = (Const *) clause; |
696 | |
697 | s1 = con->constisnull ? 0.0 : |
698 | DatumGetBool(con->constvalue) ? 1.0 : 0.0; |
699 | } |
700 | else if (IsA(clause, Param)) |
701 | { |
702 | /* see if we can replace the Param */ |
703 | Node *subst = estimate_expression_value(root, clause); |
704 | |
705 | if (IsA(subst, Const)) |
706 | { |
707 | /* bool constant is pretty easy... */ |
708 | Const *con = (Const *) subst; |
709 | |
710 | s1 = con->constisnull ? 0.0 : |
711 | DatumGetBool(con->constvalue) ? 1.0 : 0.0; |
712 | } |
713 | else |
714 | { |
715 | /* XXX any way to do better than default? */ |
716 | } |
717 | } |
718 | else if (is_notclause(clause)) |
719 | { |
720 | /* inverse of the selectivity of the underlying clause */ |
721 | s1 = 1.0 - clause_selectivity(root, |
722 | (Node *) get_notclausearg((Expr *) clause), |
723 | varRelid, |
724 | jointype, |
725 | sjinfo); |
726 | } |
727 | else if (is_andclause(clause)) |
728 | { |
729 | /* share code with clauselist_selectivity() */ |
730 | s1 = clauselist_selectivity(root, |
731 | ((BoolExpr *) clause)->args, |
732 | varRelid, |
733 | jointype, |
734 | sjinfo); |
735 | } |
736 | else if (is_orclause(clause)) |
737 | { |
738 | /* |
739 | * Selectivities for an OR clause are computed as s1+s2 - s1*s2 to |
740 | * account for the probable overlap of selected tuple sets. |
741 | * |
742 | * XXX is this too conservative? |
743 | */ |
744 | ListCell *arg; |
745 | |
746 | s1 = 0.0; |
747 | foreach(arg, ((BoolExpr *) clause)->args) |
748 | { |
749 | Selectivity s2 = clause_selectivity(root, |
750 | (Node *) lfirst(arg), |
751 | varRelid, |
752 | jointype, |
753 | sjinfo); |
754 | |
755 | s1 = s1 + s2 - s1 * s2; |
756 | } |
757 | } |
758 | else if (is_opclause(clause) || IsA(clause, DistinctExpr)) |
759 | { |
760 | OpExpr *opclause = (OpExpr *) clause; |
761 | Oid opno = opclause->opno; |
762 | |
763 | if (treat_as_join_clause(clause, rinfo, varRelid, sjinfo)) |
764 | { |
765 | /* Estimate selectivity for a join clause. */ |
766 | s1 = join_selectivity(root, opno, |
767 | opclause->args, |
768 | opclause->inputcollid, |
769 | jointype, |
770 | sjinfo); |
771 | } |
772 | else |
773 | { |
774 | /* Estimate selectivity for a restriction clause. */ |
775 | s1 = restriction_selectivity(root, opno, |
776 | opclause->args, |
777 | opclause->inputcollid, |
778 | varRelid); |
779 | } |
780 | |
781 | /* |
782 | * DistinctExpr has the same representation as OpExpr, but the |
783 | * contained operator is "=" not "<>", so we must negate the result. |
784 | * This estimation method doesn't give the right behavior for nulls, |
785 | * but it's better than doing nothing. |
786 | */ |
787 | if (IsA(clause, DistinctExpr)) |
788 | s1 = 1.0 - s1; |
789 | } |
790 | else if (is_funcclause(clause)) |
791 | { |
792 | FuncExpr *funcclause = (FuncExpr *) clause; |
793 | |
794 | /* Try to get an estimate from the support function, if any */ |
795 | s1 = function_selectivity(root, |
796 | funcclause->funcid, |
797 | funcclause->args, |
798 | funcclause->inputcollid, |
799 | treat_as_join_clause(clause, rinfo, |
800 | varRelid, sjinfo), |
801 | varRelid, |
802 | jointype, |
803 | sjinfo); |
804 | } |
805 | else if (IsA(clause, ScalarArrayOpExpr)) |
806 | { |
807 | /* Use node specific selectivity calculation function */ |
808 | s1 = scalararraysel(root, |
809 | (ScalarArrayOpExpr *) clause, |
810 | treat_as_join_clause(clause, rinfo, |
811 | varRelid, sjinfo), |
812 | varRelid, |
813 | jointype, |
814 | sjinfo); |
815 | } |
816 | else if (IsA(clause, RowCompareExpr)) |
817 | { |
818 | /* Use node specific selectivity calculation function */ |
819 | s1 = rowcomparesel(root, |
820 | (RowCompareExpr *) clause, |
821 | varRelid, |
822 | jointype, |
823 | sjinfo); |
824 | } |
825 | else if (IsA(clause, NullTest)) |
826 | { |
827 | /* Use node specific selectivity calculation function */ |
828 | s1 = nulltestsel(root, |
829 | ((NullTest *) clause)->nulltesttype, |
830 | (Node *) ((NullTest *) clause)->arg, |
831 | varRelid, |
832 | jointype, |
833 | sjinfo); |
834 | } |
835 | else if (IsA(clause, BooleanTest)) |
836 | { |
837 | /* Use node specific selectivity calculation function */ |
838 | s1 = booltestsel(root, |
839 | ((BooleanTest *) clause)->booltesttype, |
840 | (Node *) ((BooleanTest *) clause)->arg, |
841 | varRelid, |
842 | jointype, |
843 | sjinfo); |
844 | } |
845 | else if (IsA(clause, CurrentOfExpr)) |
846 | { |
847 | /* CURRENT OF selects at most one row of its table */ |
848 | CurrentOfExpr *cexpr = (CurrentOfExpr *) clause; |
849 | RelOptInfo *crel = find_base_rel(root, cexpr->cvarno); |
850 | |
851 | if (crel->tuples > 0) |
852 | s1 = 1.0 / crel->tuples; |
853 | } |
854 | else if (IsA(clause, RelabelType)) |
855 | { |
856 | /* Not sure this case is needed, but it can't hurt */ |
857 | s1 = clause_selectivity(root, |
858 | (Node *) ((RelabelType *) clause)->arg, |
859 | varRelid, |
860 | jointype, |
861 | sjinfo); |
862 | } |
863 | else if (IsA(clause, CoerceToDomain)) |
864 | { |
865 | /* Not sure this case is needed, but it can't hurt */ |
866 | s1 = clause_selectivity(root, |
867 | (Node *) ((CoerceToDomain *) clause)->arg, |
868 | varRelid, |
869 | jointype, |
870 | sjinfo); |
871 | } |
872 | else |
873 | { |
874 | /* |
875 | * For anything else, see if we can consider it as a boolean variable. |
876 | * This only works if it's an immutable expression in Vars of a single |
877 | * relation; but there's no point in us checking that here because |
878 | * boolvarsel() will do it internally, and return a suitable default |
879 | * selectivity if not. |
880 | */ |
881 | s1 = boolvarsel(root, clause, varRelid); |
882 | } |
883 | |
884 | /* Cache the result if possible */ |
885 | if (cacheable) |
886 | { |
887 | if (jointype == JOIN_INNER) |
888 | rinfo->norm_selec = s1; |
889 | else |
890 | rinfo->outer_selec = s1; |
891 | } |
892 | |
893 | #ifdef SELECTIVITY_DEBUG |
894 | elog(DEBUG4, "clause_selectivity: s1 %f" , s1); |
895 | #endif /* SELECTIVITY_DEBUG */ |
896 | |
897 | return s1; |
898 | } |
899 | |