1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * subselect.c |
4 | * Planning routines for subselects. |
5 | * |
6 | * This module deals with SubLinks and CTEs, but not subquery RTEs (i.e., |
7 | * not sub-SELECT-in-FROM cases). |
8 | * |
9 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
10 | * Portions Copyright (c) 1994, Regents of the University of California |
11 | * |
12 | * IDENTIFICATION |
13 | * src/backend/optimizer/plan/subselect.c |
14 | * |
15 | *------------------------------------------------------------------------- |
16 | */ |
17 | #include "postgres.h" |
18 | |
19 | #include "access/htup_details.h" |
20 | #include "catalog/pg_operator.h" |
21 | #include "catalog/pg_type.h" |
22 | #include "executor/executor.h" |
23 | #include "miscadmin.h" |
24 | #include "nodes/makefuncs.h" |
25 | #include "nodes/nodeFuncs.h" |
26 | #include "optimizer/clauses.h" |
27 | #include "optimizer/cost.h" |
28 | #include "optimizer/optimizer.h" |
29 | #include "optimizer/paramassign.h" |
30 | #include "optimizer/pathnode.h" |
31 | #include "optimizer/planmain.h" |
32 | #include "optimizer/planner.h" |
33 | #include "optimizer/prep.h" |
34 | #include "optimizer/subselect.h" |
35 | #include "parser/parse_relation.h" |
36 | #include "rewrite/rewriteManip.h" |
37 | #include "utils/builtins.h" |
38 | #include "utils/lsyscache.h" |
39 | #include "utils/syscache.h" |
40 | |
41 | |
42 | typedef struct convert_testexpr_context |
43 | { |
44 | PlannerInfo *root; |
45 | List *subst_nodes; /* Nodes to substitute for Params */ |
46 | } convert_testexpr_context; |
47 | |
48 | typedef struct process_sublinks_context |
49 | { |
50 | PlannerInfo *root; |
51 | bool isTopQual; |
52 | } process_sublinks_context; |
53 | |
54 | typedef struct finalize_primnode_context |
55 | { |
56 | PlannerInfo *root; |
57 | Bitmapset *paramids; /* Non-local PARAM_EXEC paramids found */ |
58 | } finalize_primnode_context; |
59 | |
60 | typedef struct inline_cte_walker_context |
61 | { |
62 | const char *ctename; /* name and relative level of target CTE */ |
63 | int levelsup; |
64 | int refcount; /* number of remaining references */ |
65 | Query *ctequery; /* query to substitute */ |
66 | } inline_cte_walker_context; |
67 | |
68 | |
69 | static Node *build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, |
70 | List *plan_params, |
71 | SubLinkType subLinkType, int subLinkId, |
72 | Node *testexpr, bool adjust_testexpr, |
73 | bool unknownEqFalse); |
74 | static List *generate_subquery_params(PlannerInfo *root, List *tlist, |
75 | List **paramIds); |
76 | static List *generate_subquery_vars(PlannerInfo *root, List *tlist, |
77 | Index varno); |
78 | static Node *convert_testexpr(PlannerInfo *root, |
79 | Node *testexpr, |
80 | List *subst_nodes); |
81 | static Node *convert_testexpr_mutator(Node *node, |
82 | convert_testexpr_context *context); |
83 | static bool subplan_is_hashable(Plan *plan); |
84 | static bool testexpr_is_hashable(Node *testexpr); |
85 | static bool hash_ok_operator(OpExpr *expr); |
86 | static bool contain_dml(Node *node); |
87 | static bool contain_dml_walker(Node *node, void *context); |
88 | static bool contain_outer_selfref(Node *node); |
89 | static bool contain_outer_selfref_walker(Node *node, Index *depth); |
90 | static void inline_cte(PlannerInfo *root, CommonTableExpr *cte); |
91 | static bool inline_cte_walker(Node *node, inline_cte_walker_context *context); |
92 | static bool simplify_EXISTS_query(PlannerInfo *root, Query *query); |
93 | static Query *convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, |
94 | Node **testexpr, List **paramIds); |
95 | static Node *replace_correlation_vars_mutator(Node *node, PlannerInfo *root); |
96 | static Node *process_sublinks_mutator(Node *node, |
97 | process_sublinks_context *context); |
98 | static Bitmapset *finalize_plan(PlannerInfo *root, |
99 | Plan *plan, |
100 | int gather_param, |
101 | Bitmapset *valid_params, |
102 | Bitmapset *scan_params); |
103 | static bool finalize_primnode(Node *node, finalize_primnode_context *context); |
104 | static bool finalize_agg_primnode(Node *node, finalize_primnode_context *context); |
105 | |
106 | |
107 | /* |
108 | * Get the datatype/typmod/collation of the first column of the plan's output. |
109 | * |
110 | * This information is stored for ARRAY_SUBLINK execution and for |
111 | * exprType()/exprTypmod()/exprCollation(), which have no way to get at the |
112 | * plan associated with a SubPlan node. We really only need the info for |
113 | * EXPR_SUBLINK and ARRAY_SUBLINK subplans, but for consistency we save it |
114 | * always. |
115 | */ |
116 | static void |
117 | get_first_col_type(Plan *plan, Oid *coltype, int32 *coltypmod, |
118 | Oid *colcollation) |
119 | { |
120 | /* In cases such as EXISTS, tlist might be empty; arbitrarily use VOID */ |
121 | if (plan->targetlist) |
122 | { |
123 | TargetEntry *tent = linitial_node(TargetEntry, plan->targetlist); |
124 | |
125 | if (!tent->resjunk) |
126 | { |
127 | *coltype = exprType((Node *) tent->expr); |
128 | *coltypmod = exprTypmod((Node *) tent->expr); |
129 | *colcollation = exprCollation((Node *) tent->expr); |
130 | return; |
131 | } |
132 | } |
133 | *coltype = VOIDOID; |
134 | *coltypmod = -1; |
135 | *colcollation = InvalidOid; |
136 | } |
137 | |
138 | /* |
139 | * Convert a SubLink (as created by the parser) into a SubPlan. |
140 | * |
141 | * We are given the SubLink's contained query, type, ID, and testexpr. We are |
142 | * also told if this expression appears at top level of a WHERE/HAVING qual. |
143 | * |
144 | * Note: we assume that the testexpr has been AND/OR flattened (actually, |
145 | * it's been through eval_const_expressions), but not converted to |
146 | * implicit-AND form; and any SubLinks in it should already have been |
147 | * converted to SubPlans. The subquery is as yet untouched, however. |
148 | * |
149 | * The result is whatever we need to substitute in place of the SubLink node |
150 | * in the executable expression. If we're going to do the subplan as a |
151 | * regular subplan, this will be the constructed SubPlan node. If we're going |
152 | * to do the subplan as an InitPlan, the SubPlan node instead goes into |
153 | * root->init_plans, and what we return here is an expression tree |
154 | * representing the InitPlan's result: usually just a Param node representing |
155 | * a single scalar result, but possibly a row comparison tree containing |
156 | * multiple Param nodes, or for a MULTIEXPR subquery a simple NULL constant |
157 | * (since the real output Params are elsewhere in the tree, and the MULTIEXPR |
158 | * subquery itself is in a resjunk tlist entry whose value is uninteresting). |
159 | */ |
160 | static Node * |
161 | make_subplan(PlannerInfo *root, Query *orig_subquery, |
162 | SubLinkType subLinkType, int subLinkId, |
163 | Node *testexpr, bool isTopQual) |
164 | { |
165 | Query *subquery; |
166 | bool simple_exists = false; |
167 | double tuple_fraction; |
168 | PlannerInfo *subroot; |
169 | RelOptInfo *final_rel; |
170 | Path *best_path; |
171 | Plan *plan; |
172 | List *plan_params; |
173 | Node *result; |
174 | |
175 | /* |
176 | * Copy the source Query node. This is a quick and dirty kluge to resolve |
177 | * the fact that the parser can generate trees with multiple links to the |
178 | * same sub-Query node, but the planner wants to scribble on the Query. |
179 | * Try to clean this up when we do querytree redesign... |
180 | */ |
181 | subquery = copyObject(orig_subquery); |
182 | |
183 | /* |
184 | * If it's an EXISTS subplan, we might be able to simplify it. |
185 | */ |
186 | if (subLinkType == EXISTS_SUBLINK) |
187 | simple_exists = simplify_EXISTS_query(root, subquery); |
188 | |
189 | /* |
190 | * For an EXISTS subplan, tell lower-level planner to expect that only the |
191 | * first tuple will be retrieved. For ALL and ANY subplans, we will be |
192 | * able to stop evaluating if the test condition fails or matches, so very |
193 | * often not all the tuples will be retrieved; for lack of a better idea, |
194 | * specify 50% retrieval. For EXPR, MULTIEXPR, and ROWCOMPARE subplans, |
195 | * use default behavior (we're only expecting one row out, anyway). |
196 | * |
197 | * NOTE: if you change these numbers, also change cost_subplan() in |
198 | * path/costsize.c. |
199 | * |
200 | * XXX If an ANY subplan is uncorrelated, build_subplan may decide to hash |
201 | * its output. In that case it would've been better to specify full |
202 | * retrieval. At present, however, we can only check hashability after |
203 | * we've made the subplan :-(. (Determining whether it'll fit in work_mem |
204 | * is the really hard part.) Therefore, we don't want to be too |
205 | * optimistic about the percentage of tuples retrieved, for fear of |
206 | * selecting a plan that's bad for the materialization case. |
207 | */ |
208 | if (subLinkType == EXISTS_SUBLINK) |
209 | tuple_fraction = 1.0; /* just like a LIMIT 1 */ |
210 | else if (subLinkType == ALL_SUBLINK || |
211 | subLinkType == ANY_SUBLINK) |
212 | tuple_fraction = 0.5; /* 50% */ |
213 | else |
214 | tuple_fraction = 0.0; /* default behavior */ |
215 | |
216 | /* plan_params should not be in use in current query level */ |
217 | Assert(root->plan_params == NIL); |
218 | |
219 | /* Generate Paths for the subquery */ |
220 | subroot = subquery_planner(root->glob, subquery, |
221 | root, |
222 | false, tuple_fraction); |
223 | |
224 | /* Isolate the params needed by this specific subplan */ |
225 | plan_params = root->plan_params; |
226 | root->plan_params = NIL; |
227 | |
228 | /* |
229 | * Select best Path and turn it into a Plan. At least for now, there |
230 | * seems no reason to postpone doing that. |
231 | */ |
232 | final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); |
233 | best_path = get_cheapest_fractional_path(final_rel, tuple_fraction); |
234 | |
235 | plan = create_plan(subroot, best_path); |
236 | |
237 | /* And convert to SubPlan or InitPlan format. */ |
238 | result = build_subplan(root, plan, subroot, plan_params, |
239 | subLinkType, subLinkId, |
240 | testexpr, true, isTopQual); |
241 | |
242 | /* |
243 | * If it's a correlated EXISTS with an unimportant targetlist, we might be |
244 | * able to transform it to the equivalent of an IN and then implement it |
245 | * by hashing. We don't have enough information yet to tell which way is |
246 | * likely to be better (it depends on the expected number of executions of |
247 | * the EXISTS qual, and we are much too early in planning the outer query |
248 | * to be able to guess that). So we generate both plans, if possible, and |
249 | * leave it to the executor to decide which to use. |
250 | */ |
251 | if (simple_exists && IsA(result, SubPlan)) |
252 | { |
253 | Node *newtestexpr; |
254 | List *paramIds; |
255 | |
256 | /* Make a second copy of the original subquery */ |
257 | subquery = copyObject(orig_subquery); |
258 | /* and re-simplify */ |
259 | simple_exists = simplify_EXISTS_query(root, subquery); |
260 | Assert(simple_exists); |
261 | /* See if it can be converted to an ANY query */ |
262 | subquery = convert_EXISTS_to_ANY(root, subquery, |
263 | &newtestexpr, ¶mIds); |
264 | if (subquery) |
265 | { |
266 | /* Generate Paths for the ANY subquery; we'll need all rows */ |
267 | subroot = subquery_planner(root->glob, subquery, |
268 | root, |
269 | false, 0.0); |
270 | |
271 | /* Isolate the params needed by this specific subplan */ |
272 | plan_params = root->plan_params; |
273 | root->plan_params = NIL; |
274 | |
275 | /* Select best Path and turn it into a Plan */ |
276 | final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); |
277 | best_path = final_rel->cheapest_total_path; |
278 | |
279 | plan = create_plan(subroot, best_path); |
280 | |
281 | /* Now we can check if it'll fit in work_mem */ |
282 | /* XXX can we check this at the Path stage? */ |
283 | if (subplan_is_hashable(plan)) |
284 | { |
285 | SubPlan *hashplan; |
286 | AlternativeSubPlan *asplan; |
287 | |
288 | /* OK, convert to SubPlan format. */ |
289 | hashplan = castNode(SubPlan, |
290 | build_subplan(root, plan, subroot, |
291 | plan_params, |
292 | ANY_SUBLINK, 0, |
293 | newtestexpr, |
294 | false, true)); |
295 | /* Check we got what we expected */ |
296 | Assert(hashplan->parParam == NIL); |
297 | Assert(hashplan->useHashTable); |
298 | /* build_subplan won't have filled in paramIds */ |
299 | hashplan->paramIds = paramIds; |
300 | |
301 | /* Leave it to the executor to decide which plan to use */ |
302 | asplan = makeNode(AlternativeSubPlan); |
303 | asplan->subplans = list_make2(result, hashplan); |
304 | result = (Node *) asplan; |
305 | } |
306 | } |
307 | } |
308 | |
309 | return result; |
310 | } |
311 | |
312 | /* |
313 | * Build a SubPlan node given the raw inputs --- subroutine for make_subplan |
314 | * |
315 | * Returns either the SubPlan, or a replacement expression if we decide to |
316 | * make it an InitPlan, as explained in the comments for make_subplan. |
317 | */ |
318 | static Node * |
319 | build_subplan(PlannerInfo *root, Plan *plan, PlannerInfo *subroot, |
320 | List *plan_params, |
321 | SubLinkType subLinkType, int subLinkId, |
322 | Node *testexpr, bool adjust_testexpr, |
323 | bool unknownEqFalse) |
324 | { |
325 | Node *result; |
326 | SubPlan *splan; |
327 | bool isInitPlan; |
328 | ListCell *lc; |
329 | |
330 | /* |
331 | * Initialize the SubPlan node. Note plan_id, plan_name, and cost fields |
332 | * are set further down. |
333 | */ |
334 | splan = makeNode(SubPlan); |
335 | splan->subLinkType = subLinkType; |
336 | splan->testexpr = NULL; |
337 | splan->paramIds = NIL; |
338 | get_first_col_type(plan, &splan->firstColType, &splan->firstColTypmod, |
339 | &splan->firstColCollation); |
340 | splan->useHashTable = false; |
341 | splan->unknownEqFalse = unknownEqFalse; |
342 | splan->parallel_safe = plan->parallel_safe; |
343 | splan->setParam = NIL; |
344 | splan->parParam = NIL; |
345 | splan->args = NIL; |
346 | |
347 | /* |
348 | * Make parParam and args lists of param IDs and expressions that current |
349 | * query level will pass to this child plan. |
350 | */ |
351 | foreach(lc, plan_params) |
352 | { |
353 | PlannerParamItem *pitem = (PlannerParamItem *) lfirst(lc); |
354 | Node *arg = pitem->item; |
355 | |
356 | /* |
357 | * The Var, PlaceHolderVar, or Aggref has already been adjusted to |
358 | * have the correct varlevelsup, phlevelsup, or agglevelsup. |
359 | * |
360 | * If it's a PlaceHolderVar or Aggref, its arguments might contain |
361 | * SubLinks, which have not yet been processed (see the comments for |
362 | * SS_replace_correlation_vars). Do that now. |
363 | */ |
364 | if (IsA(arg, PlaceHolderVar) || |
365 | IsA(arg, Aggref)) |
366 | arg = SS_process_sublinks(root, arg, false); |
367 | |
368 | splan->parParam = lappend_int(splan->parParam, pitem->paramId); |
369 | splan->args = lappend(splan->args, arg); |
370 | } |
371 | |
372 | /* |
373 | * Un-correlated or undirect correlated plans of EXISTS, EXPR, ARRAY, |
374 | * ROWCOMPARE, or MULTIEXPR types can be used as initPlans. For EXISTS, |
375 | * EXPR, or ARRAY, we return a Param referring to the result of evaluating |
376 | * the initPlan. For ROWCOMPARE, we must modify the testexpr tree to |
377 | * contain PARAM_EXEC Params instead of the PARAM_SUBLINK Params emitted |
378 | * by the parser, and then return that tree. For MULTIEXPR, we return a |
379 | * null constant: the resjunk targetlist item containing the SubLink does |
380 | * not need to return anything useful, since the referencing Params are |
381 | * elsewhere. |
382 | */ |
383 | if (splan->parParam == NIL && subLinkType == EXISTS_SUBLINK) |
384 | { |
385 | Param *prm; |
386 | |
387 | Assert(testexpr == NULL); |
388 | prm = generate_new_exec_param(root, BOOLOID, -1, InvalidOid); |
389 | splan->setParam = list_make1_int(prm->paramid); |
390 | isInitPlan = true; |
391 | result = (Node *) prm; |
392 | } |
393 | else if (splan->parParam == NIL && subLinkType == EXPR_SUBLINK) |
394 | { |
395 | TargetEntry *te = linitial(plan->targetlist); |
396 | Param *prm; |
397 | |
398 | Assert(!te->resjunk); |
399 | Assert(testexpr == NULL); |
400 | prm = generate_new_exec_param(root, |
401 | exprType((Node *) te->expr), |
402 | exprTypmod((Node *) te->expr), |
403 | exprCollation((Node *) te->expr)); |
404 | splan->setParam = list_make1_int(prm->paramid); |
405 | isInitPlan = true; |
406 | result = (Node *) prm; |
407 | } |
408 | else if (splan->parParam == NIL && subLinkType == ARRAY_SUBLINK) |
409 | { |
410 | TargetEntry *te = linitial(plan->targetlist); |
411 | Oid arraytype; |
412 | Param *prm; |
413 | |
414 | Assert(!te->resjunk); |
415 | Assert(testexpr == NULL); |
416 | arraytype = get_promoted_array_type(exprType((Node *) te->expr)); |
417 | if (!OidIsValid(arraytype)) |
418 | elog(ERROR, "could not find array type for datatype %s" , |
419 | format_type_be(exprType((Node *) te->expr))); |
420 | prm = generate_new_exec_param(root, |
421 | arraytype, |
422 | exprTypmod((Node *) te->expr), |
423 | exprCollation((Node *) te->expr)); |
424 | splan->setParam = list_make1_int(prm->paramid); |
425 | isInitPlan = true; |
426 | result = (Node *) prm; |
427 | } |
428 | else if (splan->parParam == NIL && subLinkType == ROWCOMPARE_SUBLINK) |
429 | { |
430 | /* Adjust the Params */ |
431 | List *params; |
432 | |
433 | Assert(testexpr != NULL); |
434 | params = generate_subquery_params(root, |
435 | plan->targetlist, |
436 | &splan->paramIds); |
437 | result = convert_testexpr(root, |
438 | testexpr, |
439 | params); |
440 | splan->setParam = list_copy(splan->paramIds); |
441 | isInitPlan = true; |
442 | |
443 | /* |
444 | * The executable expression is returned to become part of the outer |
445 | * plan's expression tree; it is not kept in the initplan node. |
446 | */ |
447 | } |
448 | else if (subLinkType == MULTIEXPR_SUBLINK) |
449 | { |
450 | /* |
451 | * Whether it's an initplan or not, it needs to set a PARAM_EXEC Param |
452 | * for each output column. |
453 | */ |
454 | List *params; |
455 | |
456 | Assert(testexpr == NULL); |
457 | params = generate_subquery_params(root, |
458 | plan->targetlist, |
459 | &splan->setParam); |
460 | |
461 | /* |
462 | * Save the list of replacement Params in the n'th cell of |
463 | * root->multiexpr_params; setrefs.c will use it to replace |
464 | * PARAM_MULTIEXPR Params. |
465 | */ |
466 | while (list_length(root->multiexpr_params) < subLinkId) |
467 | root->multiexpr_params = lappend(root->multiexpr_params, NIL); |
468 | lc = list_nth_cell(root->multiexpr_params, subLinkId - 1); |
469 | Assert(lfirst(lc) == NIL); |
470 | lfirst(lc) = params; |
471 | |
472 | /* It can be an initplan if there are no parParams. */ |
473 | if (splan->parParam == NIL) |
474 | { |
475 | isInitPlan = true; |
476 | result = (Node *) makeNullConst(RECORDOID, -1, InvalidOid); |
477 | } |
478 | else |
479 | { |
480 | isInitPlan = false; |
481 | result = (Node *) splan; |
482 | } |
483 | } |
484 | else |
485 | { |
486 | /* |
487 | * Adjust the Params in the testexpr, unless caller said it's not |
488 | * needed. |
489 | */ |
490 | if (testexpr && adjust_testexpr) |
491 | { |
492 | List *params; |
493 | |
494 | params = generate_subquery_params(root, |
495 | plan->targetlist, |
496 | &splan->paramIds); |
497 | splan->testexpr = convert_testexpr(root, |
498 | testexpr, |
499 | params); |
500 | } |
501 | else |
502 | splan->testexpr = testexpr; |
503 | |
504 | /* |
505 | * We can't convert subplans of ALL_SUBLINK or ANY_SUBLINK types to |
506 | * initPlans, even when they are uncorrelated or undirect correlated, |
507 | * because we need to scan the output of the subplan for each outer |
508 | * tuple. But if it's a not-direct-correlated IN (= ANY) test, we |
509 | * might be able to use a hashtable to avoid comparing all the tuples. |
510 | */ |
511 | if (subLinkType == ANY_SUBLINK && |
512 | splan->parParam == NIL && |
513 | subplan_is_hashable(plan) && |
514 | testexpr_is_hashable(splan->testexpr)) |
515 | splan->useHashTable = true; |
516 | |
517 | /* |
518 | * Otherwise, we have the option to tack a Material node onto the top |
519 | * of the subplan, to reduce the cost of reading it repeatedly. This |
520 | * is pointless for a direct-correlated subplan, since we'd have to |
521 | * recompute its results each time anyway. For uncorrelated/undirect |
522 | * correlated subplans, we add Material unless the subplan's top plan |
523 | * node would materialize its output anyway. Also, if enable_material |
524 | * is false, then the user does not want us to materialize anything |
525 | * unnecessarily, so we don't. |
526 | */ |
527 | else if (splan->parParam == NIL && enable_material && |
528 | !ExecMaterializesOutput(nodeTag(plan))) |
529 | plan = materialize_finished_plan(plan); |
530 | |
531 | result = (Node *) splan; |
532 | isInitPlan = false; |
533 | } |
534 | |
535 | /* |
536 | * Add the subplan and its PlannerInfo to the global lists. |
537 | */ |
538 | root->glob->subplans = lappend(root->glob->subplans, plan); |
539 | root->glob->subroots = lappend(root->glob->subroots, subroot); |
540 | splan->plan_id = list_length(root->glob->subplans); |
541 | |
542 | if (isInitPlan) |
543 | root->init_plans = lappend(root->init_plans, splan); |
544 | |
545 | /* |
546 | * A parameterless subplan (not initplan) should be prepared to handle |
547 | * REWIND efficiently. If it has direct parameters then there's no point |
548 | * since it'll be reset on each scan anyway; and if it's an initplan then |
549 | * there's no point since it won't get re-run without parameter changes |
550 | * anyway. The input of a hashed subplan doesn't need REWIND either. |
551 | */ |
552 | if (splan->parParam == NIL && !isInitPlan && !splan->useHashTable) |
553 | root->glob->rewindPlanIDs = bms_add_member(root->glob->rewindPlanIDs, |
554 | splan->plan_id); |
555 | |
556 | /* Label the subplan for EXPLAIN purposes */ |
557 | splan->plan_name = palloc(32 + 12 * list_length(splan->setParam)); |
558 | sprintf(splan->plan_name, "%s %d" , |
559 | isInitPlan ? "InitPlan" : "SubPlan" , |
560 | splan->plan_id); |
561 | if (splan->setParam) |
562 | { |
563 | char *ptr = splan->plan_name + strlen(splan->plan_name); |
564 | |
565 | ptr += sprintf(ptr, " (returns " ); |
566 | foreach(lc, splan->setParam) |
567 | { |
568 | ptr += sprintf(ptr, "$%d%s" , |
569 | lfirst_int(lc), |
570 | lnext(lc) ? "," : ")" ); |
571 | } |
572 | } |
573 | |
574 | /* Lastly, fill in the cost estimates for use later */ |
575 | cost_subplan(root, splan, plan); |
576 | |
577 | return result; |
578 | } |
579 | |
580 | /* |
581 | * generate_subquery_params: build a list of Params representing the output |
582 | * columns of a sublink's sub-select, given the sub-select's targetlist. |
583 | * |
584 | * We also return an integer list of the paramids of the Params. |
585 | */ |
586 | static List * |
587 | generate_subquery_params(PlannerInfo *root, List *tlist, List **paramIds) |
588 | { |
589 | List *result; |
590 | List *ids; |
591 | ListCell *lc; |
592 | |
593 | result = ids = NIL; |
594 | foreach(lc, tlist) |
595 | { |
596 | TargetEntry *tent = (TargetEntry *) lfirst(lc); |
597 | Param *param; |
598 | |
599 | if (tent->resjunk) |
600 | continue; |
601 | |
602 | param = generate_new_exec_param(root, |
603 | exprType((Node *) tent->expr), |
604 | exprTypmod((Node *) tent->expr), |
605 | exprCollation((Node *) tent->expr)); |
606 | result = lappend(result, param); |
607 | ids = lappend_int(ids, param->paramid); |
608 | } |
609 | |
610 | *paramIds = ids; |
611 | return result; |
612 | } |
613 | |
614 | /* |
615 | * generate_subquery_vars: build a list of Vars representing the output |
616 | * columns of a sublink's sub-select, given the sub-select's targetlist. |
617 | * The Vars have the specified varno (RTE index). |
618 | */ |
619 | static List * |
620 | generate_subquery_vars(PlannerInfo *root, List *tlist, Index varno) |
621 | { |
622 | List *result; |
623 | ListCell *lc; |
624 | |
625 | result = NIL; |
626 | foreach(lc, tlist) |
627 | { |
628 | TargetEntry *tent = (TargetEntry *) lfirst(lc); |
629 | Var *var; |
630 | |
631 | if (tent->resjunk) |
632 | continue; |
633 | |
634 | var = makeVarFromTargetEntry(varno, tent); |
635 | result = lappend(result, var); |
636 | } |
637 | |
638 | return result; |
639 | } |
640 | |
641 | /* |
642 | * convert_testexpr: convert the testexpr given by the parser into |
643 | * actually executable form. This entails replacing PARAM_SUBLINK Params |
644 | * with Params or Vars representing the results of the sub-select. The |
645 | * nodes to be substituted are passed in as the List result from |
646 | * generate_subquery_params or generate_subquery_vars. |
647 | */ |
648 | static Node * |
649 | convert_testexpr(PlannerInfo *root, |
650 | Node *testexpr, |
651 | List *subst_nodes) |
652 | { |
653 | convert_testexpr_context context; |
654 | |
655 | context.root = root; |
656 | context.subst_nodes = subst_nodes; |
657 | return convert_testexpr_mutator(testexpr, &context); |
658 | } |
659 | |
660 | static Node * |
661 | convert_testexpr_mutator(Node *node, |
662 | convert_testexpr_context *context) |
663 | { |
664 | if (node == NULL) |
665 | return NULL; |
666 | if (IsA(node, Param)) |
667 | { |
668 | Param *param = (Param *) node; |
669 | |
670 | if (param->paramkind == PARAM_SUBLINK) |
671 | { |
672 | if (param->paramid <= 0 || |
673 | param->paramid > list_length(context->subst_nodes)) |
674 | elog(ERROR, "unexpected PARAM_SUBLINK ID: %d" , param->paramid); |
675 | |
676 | /* |
677 | * We copy the list item to avoid having doubly-linked |
678 | * substructure in the modified parse tree. This is probably |
679 | * unnecessary when it's a Param, but be safe. |
680 | */ |
681 | return (Node *) copyObject(list_nth(context->subst_nodes, |
682 | param->paramid - 1)); |
683 | } |
684 | } |
685 | if (IsA(node, SubLink)) |
686 | { |
687 | /* |
688 | * If we come across a nested SubLink, it is neither necessary nor |
689 | * correct to recurse into it: any PARAM_SUBLINKs we might find inside |
690 | * belong to the inner SubLink not the outer. So just return it as-is. |
691 | * |
692 | * This reasoning depends on the assumption that nothing will pull |
693 | * subexpressions into or out of the testexpr field of a SubLink, at |
694 | * least not without replacing PARAM_SUBLINKs first. If we did want |
695 | * to do that we'd need to rethink the parser-output representation |
696 | * altogether, since currently PARAM_SUBLINKs are only unique per |
697 | * SubLink not globally across the query. The whole point of |
698 | * replacing them with Vars or PARAM_EXEC nodes is to make them |
699 | * globally unique before they escape from the SubLink's testexpr. |
700 | * |
701 | * Note: this can't happen when called during SS_process_sublinks, |
702 | * because that recursively processes inner SubLinks first. It can |
703 | * happen when called from convert_ANY_sublink_to_join, though. |
704 | */ |
705 | return node; |
706 | } |
707 | return expression_tree_mutator(node, |
708 | convert_testexpr_mutator, |
709 | (void *) context); |
710 | } |
711 | |
712 | /* |
713 | * subplan_is_hashable: can we implement an ANY subplan by hashing? |
714 | */ |
715 | static bool |
716 | subplan_is_hashable(Plan *plan) |
717 | { |
718 | double subquery_size; |
719 | |
720 | /* |
721 | * The estimated size of the subquery result must fit in work_mem. (Note: |
722 | * we use heap tuple overhead here even though the tuples will actually be |
723 | * stored as MinimalTuples; this provides some fudge factor for hashtable |
724 | * overhead.) |
725 | */ |
726 | subquery_size = plan->plan_rows * |
727 | (MAXALIGN(plan->plan_width) + MAXALIGN(SizeofHeapTupleHeader)); |
728 | if (subquery_size > work_mem * 1024L) |
729 | return false; |
730 | |
731 | return true; |
732 | } |
733 | |
734 | /* |
735 | * testexpr_is_hashable: is an ANY SubLink's test expression hashable? |
736 | */ |
737 | static bool |
738 | testexpr_is_hashable(Node *testexpr) |
739 | { |
740 | /* |
741 | * The testexpr must be a single OpExpr, or an AND-clause containing only |
742 | * OpExprs. |
743 | * |
744 | * The combining operators must be hashable and strict. The need for |
745 | * hashability is obvious, since we want to use hashing. Without |
746 | * strictness, behavior in the presence of nulls is too unpredictable. We |
747 | * actually must assume even more than plain strictness: they can't yield |
748 | * NULL for non-null inputs, either (see nodeSubplan.c). However, hash |
749 | * indexes and hash joins assume that too. |
750 | */ |
751 | if (testexpr && IsA(testexpr, OpExpr)) |
752 | { |
753 | if (hash_ok_operator((OpExpr *) testexpr)) |
754 | return true; |
755 | } |
756 | else if (is_andclause(testexpr)) |
757 | { |
758 | ListCell *l; |
759 | |
760 | foreach(l, ((BoolExpr *) testexpr)->args) |
761 | { |
762 | Node *andarg = (Node *) lfirst(l); |
763 | |
764 | if (!IsA(andarg, OpExpr)) |
765 | return false; |
766 | if (!hash_ok_operator((OpExpr *) andarg)) |
767 | return false; |
768 | } |
769 | return true; |
770 | } |
771 | |
772 | return false; |
773 | } |
774 | |
775 | /* |
776 | * Check expression is hashable + strict |
777 | * |
778 | * We could use op_hashjoinable() and op_strict(), but do it like this to |
779 | * avoid a redundant cache lookup. |
780 | */ |
781 | static bool |
782 | hash_ok_operator(OpExpr *expr) |
783 | { |
784 | Oid opid = expr->opno; |
785 | |
786 | /* quick out if not a binary operator */ |
787 | if (list_length(expr->args) != 2) |
788 | return false; |
789 | if (opid == ARRAY_EQ_OP) |
790 | { |
791 | /* array_eq is strict, but must check input type to ensure hashable */ |
792 | /* XXX record_eq will need same treatment when it becomes hashable */ |
793 | Node *leftarg = linitial(expr->args); |
794 | |
795 | return op_hashjoinable(opid, exprType(leftarg)); |
796 | } |
797 | else |
798 | { |
799 | /* else must look up the operator properties */ |
800 | HeapTuple tup; |
801 | Form_pg_operator optup; |
802 | |
803 | tup = SearchSysCache1(OPEROID, ObjectIdGetDatum(opid)); |
804 | if (!HeapTupleIsValid(tup)) |
805 | elog(ERROR, "cache lookup failed for operator %u" , opid); |
806 | optup = (Form_pg_operator) GETSTRUCT(tup); |
807 | if (!optup->oprcanhash || !func_strict(optup->oprcode)) |
808 | { |
809 | ReleaseSysCache(tup); |
810 | return false; |
811 | } |
812 | ReleaseSysCache(tup); |
813 | return true; |
814 | } |
815 | } |
816 | |
817 | |
818 | /* |
819 | * SS_process_ctes: process a query's WITH list |
820 | * |
821 | * Consider each CTE in the WITH list and either ignore it (if it's an |
822 | * unreferenced SELECT), "inline" it to create a regular sub-SELECT-in-FROM, |
823 | * or convert it to an initplan. |
824 | * |
825 | * A side effect is to fill in root->cte_plan_ids with a list that |
826 | * parallels root->parse->cteList and provides the subplan ID for |
827 | * each CTE's initplan, or a dummy ID (-1) if we didn't make an initplan. |
828 | */ |
829 | void |
830 | SS_process_ctes(PlannerInfo *root) |
831 | { |
832 | ListCell *lc; |
833 | |
834 | Assert(root->cte_plan_ids == NIL); |
835 | |
836 | foreach(lc, root->parse->cteList) |
837 | { |
838 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); |
839 | CmdType cmdType = ((Query *) cte->ctequery)->commandType; |
840 | Query *subquery; |
841 | PlannerInfo *subroot; |
842 | RelOptInfo *final_rel; |
843 | Path *best_path; |
844 | Plan *plan; |
845 | SubPlan *splan; |
846 | int paramid; |
847 | |
848 | /* |
849 | * Ignore SELECT CTEs that are not actually referenced anywhere. |
850 | */ |
851 | if (cte->cterefcount == 0 && cmdType == CMD_SELECT) |
852 | { |
853 | /* Make a dummy entry in cte_plan_ids */ |
854 | root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1); |
855 | continue; |
856 | } |
857 | |
858 | /* |
859 | * Consider inlining the CTE (creating RTE_SUBQUERY RTE(s)) instead of |
860 | * implementing it as a separately-planned CTE. |
861 | * |
862 | * We cannot inline if any of these conditions hold: |
863 | * |
864 | * 1. The user said not to (the CTEMaterializeAlways option). |
865 | * |
866 | * 2. The CTE is recursive. |
867 | * |
868 | * 3. The CTE has side-effects; this includes either not being a plain |
869 | * SELECT, or containing volatile functions. Inlining might change |
870 | * the side-effects, which would be bad. |
871 | * |
872 | * 4. The CTE is multiply-referenced and contains a self-reference to |
873 | * a recursive CTE outside itself. Inlining would result in multiple |
874 | * recursive self-references, which we don't support. |
875 | * |
876 | * Otherwise, we have an option whether to inline or not. That should |
877 | * always be a win if there's just a single reference, but if the CTE |
878 | * is multiply-referenced then it's unclear: inlining adds duplicate |
879 | * computations, but the ability to absorb restrictions from the outer |
880 | * query level could outweigh that. We do not have nearly enough |
881 | * information at this point to tell whether that's true, so we let |
882 | * the user express a preference. Our default behavior is to inline |
883 | * only singly-referenced CTEs, but a CTE marked CTEMaterializeNever |
884 | * will be inlined even if multiply referenced. |
885 | * |
886 | * Note: we check for volatile functions last, because that's more |
887 | * expensive than the other tests needed. |
888 | */ |
889 | if ((cte->ctematerialized == CTEMaterializeNever || |
890 | (cte->ctematerialized == CTEMaterializeDefault && |
891 | cte->cterefcount == 1)) && |
892 | !cte->cterecursive && |
893 | cmdType == CMD_SELECT && |
894 | !contain_dml(cte->ctequery) && |
895 | (cte->cterefcount <= 1 || |
896 | !contain_outer_selfref(cte->ctequery)) && |
897 | !contain_volatile_functions(cte->ctequery)) |
898 | { |
899 | inline_cte(root, cte); |
900 | /* Make a dummy entry in cte_plan_ids */ |
901 | root->cte_plan_ids = lappend_int(root->cte_plan_ids, -1); |
902 | continue; |
903 | } |
904 | |
905 | /* |
906 | * Copy the source Query node. Probably not necessary, but let's keep |
907 | * this similar to make_subplan. |
908 | */ |
909 | subquery = (Query *) copyObject(cte->ctequery); |
910 | |
911 | /* plan_params should not be in use in current query level */ |
912 | Assert(root->plan_params == NIL); |
913 | |
914 | /* |
915 | * Generate Paths for the CTE query. Always plan for full retrieval |
916 | * --- we don't have enough info to predict otherwise. |
917 | */ |
918 | subroot = subquery_planner(root->glob, subquery, |
919 | root, |
920 | cte->cterecursive, 0.0); |
921 | |
922 | /* |
923 | * Since the current query level doesn't yet contain any RTEs, it |
924 | * should not be possible for the CTE to have requested parameters of |
925 | * this level. |
926 | */ |
927 | if (root->plan_params) |
928 | elog(ERROR, "unexpected outer reference in CTE query" ); |
929 | |
930 | /* |
931 | * Select best Path and turn it into a Plan. At least for now, there |
932 | * seems no reason to postpone doing that. |
933 | */ |
934 | final_rel = fetch_upper_rel(subroot, UPPERREL_FINAL, NULL); |
935 | best_path = final_rel->cheapest_total_path; |
936 | |
937 | plan = create_plan(subroot, best_path); |
938 | |
939 | /* |
940 | * Make a SubPlan node for it. This is just enough unlike |
941 | * build_subplan that we can't share code. |
942 | * |
943 | * Note plan_id, plan_name, and cost fields are set further down. |
944 | */ |
945 | splan = makeNode(SubPlan); |
946 | splan->subLinkType = CTE_SUBLINK; |
947 | splan->testexpr = NULL; |
948 | splan->paramIds = NIL; |
949 | get_first_col_type(plan, &splan->firstColType, &splan->firstColTypmod, |
950 | &splan->firstColCollation); |
951 | splan->useHashTable = false; |
952 | splan->unknownEqFalse = false; |
953 | |
954 | /* |
955 | * CTE scans are not considered for parallelism (cf |
956 | * set_rel_consider_parallel), and even if they were, initPlans aren't |
957 | * parallel-safe. |
958 | */ |
959 | splan->parallel_safe = false; |
960 | splan->setParam = NIL; |
961 | splan->parParam = NIL; |
962 | splan->args = NIL; |
963 | |
964 | /* |
965 | * The node can't have any inputs (since it's an initplan), so the |
966 | * parParam and args lists remain empty. (It could contain references |
967 | * to earlier CTEs' output param IDs, but CTE outputs are not |
968 | * propagated via the args list.) |
969 | */ |
970 | |
971 | /* |
972 | * Assign a param ID to represent the CTE's output. No ordinary |
973 | * "evaluation" of this param slot ever happens, but we use the param |
974 | * ID for setParam/chgParam signaling just as if the CTE plan were |
975 | * returning a simple scalar output. (Also, the executor abuses the |
976 | * ParamExecData slot for this param ID for communication among |
977 | * multiple CteScan nodes that might be scanning this CTE.) |
978 | */ |
979 | paramid = assign_special_exec_param(root); |
980 | splan->setParam = list_make1_int(paramid); |
981 | |
982 | /* |
983 | * Add the subplan and its PlannerInfo to the global lists. |
984 | */ |
985 | root->glob->subplans = lappend(root->glob->subplans, plan); |
986 | root->glob->subroots = lappend(root->glob->subroots, subroot); |
987 | splan->plan_id = list_length(root->glob->subplans); |
988 | |
989 | root->init_plans = lappend(root->init_plans, splan); |
990 | |
991 | root->cte_plan_ids = lappend_int(root->cte_plan_ids, splan->plan_id); |
992 | |
993 | /* Label the subplan for EXPLAIN purposes */ |
994 | splan->plan_name = psprintf("CTE %s" , cte->ctename); |
995 | |
996 | /* Lastly, fill in the cost estimates for use later */ |
997 | cost_subplan(root, splan, plan); |
998 | } |
999 | } |
1000 | |
1001 | /* |
1002 | * contain_dml: is any subquery not a plain SELECT? |
1003 | * |
1004 | * We reject SELECT FOR UPDATE/SHARE as well as INSERT etc. |
1005 | */ |
1006 | static bool |
1007 | contain_dml(Node *node) |
1008 | { |
1009 | return contain_dml_walker(node, NULL); |
1010 | } |
1011 | |
1012 | static bool |
1013 | contain_dml_walker(Node *node, void *context) |
1014 | { |
1015 | if (node == NULL) |
1016 | return false; |
1017 | if (IsA(node, Query)) |
1018 | { |
1019 | Query *query = (Query *) node; |
1020 | |
1021 | if (query->commandType != CMD_SELECT || |
1022 | query->rowMarks != NIL) |
1023 | return true; |
1024 | |
1025 | return query_tree_walker(query, contain_dml_walker, context, 0); |
1026 | } |
1027 | return expression_tree_walker(node, contain_dml_walker, context); |
1028 | } |
1029 | |
1030 | /* |
1031 | * contain_outer_selfref: is there an external recursive self-reference? |
1032 | */ |
1033 | static bool |
1034 | contain_outer_selfref(Node *node) |
1035 | { |
1036 | Index depth = 0; |
1037 | |
1038 | /* |
1039 | * We should be starting with a Query, so that depth will be 1 while |
1040 | * examining its immediate contents. |
1041 | */ |
1042 | Assert(IsA(node, Query)); |
1043 | |
1044 | return contain_outer_selfref_walker(node, &depth); |
1045 | } |
1046 | |
1047 | static bool |
1048 | contain_outer_selfref_walker(Node *node, Index *depth) |
1049 | { |
1050 | if (node == NULL) |
1051 | return false; |
1052 | if (IsA(node, RangeTblEntry)) |
1053 | { |
1054 | RangeTblEntry *rte = (RangeTblEntry *) node; |
1055 | |
1056 | /* |
1057 | * Check for a self-reference to a CTE that's above the Query that our |
1058 | * search started at. |
1059 | */ |
1060 | if (rte->rtekind == RTE_CTE && |
1061 | rte->self_reference && |
1062 | rte->ctelevelsup >= *depth) |
1063 | return true; |
1064 | return false; /* allow range_table_walker to continue */ |
1065 | } |
1066 | if (IsA(node, Query)) |
1067 | { |
1068 | /* Recurse into subquery, tracking nesting depth properly */ |
1069 | Query *query = (Query *) node; |
1070 | bool result; |
1071 | |
1072 | (*depth)++; |
1073 | |
1074 | result = query_tree_walker(query, contain_outer_selfref_walker, |
1075 | (void *) depth, QTW_EXAMINE_RTES_BEFORE); |
1076 | |
1077 | (*depth)--; |
1078 | |
1079 | return result; |
1080 | } |
1081 | return expression_tree_walker(node, contain_outer_selfref_walker, |
1082 | (void *) depth); |
1083 | } |
1084 | |
1085 | /* |
1086 | * inline_cte: convert RTE_CTE references to given CTE into RTE_SUBQUERYs |
1087 | */ |
1088 | static void |
1089 | inline_cte(PlannerInfo *root, CommonTableExpr *cte) |
1090 | { |
1091 | struct inline_cte_walker_context context; |
1092 | |
1093 | context.ctename = cte->ctename; |
1094 | /* Start at levelsup = -1 because we'll immediately increment it */ |
1095 | context.levelsup = -1; |
1096 | context.refcount = cte->cterefcount; |
1097 | context.ctequery = castNode(Query, cte->ctequery); |
1098 | |
1099 | (void) inline_cte_walker((Node *) root->parse, &context); |
1100 | |
1101 | /* Assert we replaced all references */ |
1102 | Assert(context.refcount == 0); |
1103 | } |
1104 | |
1105 | static bool |
1106 | inline_cte_walker(Node *node, inline_cte_walker_context *context) |
1107 | { |
1108 | if (node == NULL) |
1109 | return false; |
1110 | if (IsA(node, Query)) |
1111 | { |
1112 | Query *query = (Query *) node; |
1113 | |
1114 | context->levelsup++; |
1115 | |
1116 | /* |
1117 | * Visit the query's RTE nodes after their contents; otherwise |
1118 | * query_tree_walker would descend into the newly inlined CTE query, |
1119 | * which we don't want. |
1120 | */ |
1121 | (void) query_tree_walker(query, inline_cte_walker, context, |
1122 | QTW_EXAMINE_RTES_AFTER); |
1123 | |
1124 | context->levelsup--; |
1125 | |
1126 | return false; |
1127 | } |
1128 | else if (IsA(node, RangeTblEntry)) |
1129 | { |
1130 | RangeTblEntry *rte = (RangeTblEntry *) node; |
1131 | |
1132 | if (rte->rtekind == RTE_CTE && |
1133 | strcmp(rte->ctename, context->ctename) == 0 && |
1134 | rte->ctelevelsup == context->levelsup) |
1135 | { |
1136 | /* |
1137 | * Found a reference to replace. Generate a copy of the CTE query |
1138 | * with appropriate level adjustment for outer references (e.g., |
1139 | * to other CTEs). |
1140 | */ |
1141 | Query *newquery = copyObject(context->ctequery); |
1142 | |
1143 | if (context->levelsup > 0) |
1144 | IncrementVarSublevelsUp((Node *) newquery, context->levelsup, 1); |
1145 | |
1146 | /* |
1147 | * Convert the RTE_CTE RTE into a RTE_SUBQUERY. |
1148 | * |
1149 | * Historically, a FOR UPDATE clause has been treated as extending |
1150 | * into views and subqueries, but not into CTEs. We preserve this |
1151 | * distinction by not trying to push rowmarks into the new |
1152 | * subquery. |
1153 | */ |
1154 | rte->rtekind = RTE_SUBQUERY; |
1155 | rte->subquery = newquery; |
1156 | rte->security_barrier = false; |
1157 | |
1158 | /* Zero out CTE-specific fields */ |
1159 | rte->ctename = NULL; |
1160 | rte->ctelevelsup = 0; |
1161 | rte->self_reference = false; |
1162 | rte->coltypes = NIL; |
1163 | rte->coltypmods = NIL; |
1164 | rte->colcollations = NIL; |
1165 | |
1166 | /* Count the number of replacements we've done */ |
1167 | context->refcount--; |
1168 | } |
1169 | |
1170 | return false; |
1171 | } |
1172 | |
1173 | return expression_tree_walker(node, inline_cte_walker, context); |
1174 | } |
1175 | |
1176 | |
1177 | /* |
1178 | * convert_ANY_sublink_to_join: try to convert an ANY SubLink to a join |
1179 | * |
1180 | * The caller has found an ANY SubLink at the top level of one of the query's |
1181 | * qual clauses, but has not checked the properties of the SubLink further. |
1182 | * Decide whether it is appropriate to process this SubLink in join style. |
1183 | * If so, form a JoinExpr and return it. Return NULL if the SubLink cannot |
1184 | * be converted to a join. |
1185 | * |
1186 | * The only non-obvious input parameter is available_rels: this is the set |
1187 | * of query rels that can safely be referenced in the sublink expression. |
1188 | * (We must restrict this to avoid changing the semantics when a sublink |
1189 | * is present in an outer join's ON qual.) The conversion must fail if |
1190 | * the converted qual would reference any but these parent-query relids. |
1191 | * |
1192 | * On success, the returned JoinExpr has larg = NULL and rarg = the jointree |
1193 | * item representing the pulled-up subquery. The caller must set larg to |
1194 | * represent the relation(s) on the lefthand side of the new join, and insert |
1195 | * the JoinExpr into the upper query's jointree at an appropriate place |
1196 | * (typically, where the lefthand relation(s) had been). Note that the |
1197 | * passed-in SubLink must also be removed from its original position in the |
1198 | * query quals, since the quals of the returned JoinExpr replace it. |
1199 | * (Notionally, we replace the SubLink with a constant TRUE, then elide the |
1200 | * redundant constant from the qual.) |
1201 | * |
1202 | * On success, the caller is also responsible for recursively applying |
1203 | * pull_up_sublinks processing to the rarg and quals of the returned JoinExpr. |
1204 | * (On failure, there is no need to do anything, since pull_up_sublinks will |
1205 | * be applied when we recursively plan the sub-select.) |
1206 | * |
1207 | * Side effects of a successful conversion include adding the SubLink's |
1208 | * subselect to the query's rangetable, so that it can be referenced in |
1209 | * the JoinExpr's rarg. |
1210 | */ |
1211 | JoinExpr * |
1212 | convert_ANY_sublink_to_join(PlannerInfo *root, SubLink *sublink, |
1213 | Relids available_rels) |
1214 | { |
1215 | JoinExpr *result; |
1216 | Query *parse = root->parse; |
1217 | Query *subselect = (Query *) sublink->subselect; |
1218 | Relids upper_varnos; |
1219 | int rtindex; |
1220 | RangeTblEntry *rte; |
1221 | RangeTblRef *rtr; |
1222 | List *subquery_vars; |
1223 | Node *quals; |
1224 | ParseState *pstate; |
1225 | |
1226 | Assert(sublink->subLinkType == ANY_SUBLINK); |
1227 | |
1228 | /* |
1229 | * The sub-select must not refer to any Vars of the parent query. (Vars of |
1230 | * higher levels should be okay, though.) |
1231 | */ |
1232 | if (contain_vars_of_level((Node *) subselect, 1)) |
1233 | return NULL; |
1234 | |
1235 | /* |
1236 | * The test expression must contain some Vars of the parent query, else |
1237 | * it's not gonna be a join. (Note that it won't have Vars referring to |
1238 | * the subquery, rather Params.) |
1239 | */ |
1240 | upper_varnos = pull_varnos(sublink->testexpr); |
1241 | if (bms_is_empty(upper_varnos)) |
1242 | return NULL; |
1243 | |
1244 | /* |
1245 | * However, it can't refer to anything outside available_rels. |
1246 | */ |
1247 | if (!bms_is_subset(upper_varnos, available_rels)) |
1248 | return NULL; |
1249 | |
1250 | /* |
1251 | * The combining operators and left-hand expressions mustn't be volatile. |
1252 | */ |
1253 | if (contain_volatile_functions(sublink->testexpr)) |
1254 | return NULL; |
1255 | |
1256 | /* Create a dummy ParseState for addRangeTableEntryForSubquery */ |
1257 | pstate = make_parsestate(NULL); |
1258 | |
1259 | /* |
1260 | * Okay, pull up the sub-select into upper range table. |
1261 | * |
1262 | * We rely here on the assumption that the outer query has no references |
1263 | * to the inner (necessarily true, other than the Vars that we build |
1264 | * below). Therefore this is a lot easier than what pull_up_subqueries has |
1265 | * to go through. |
1266 | */ |
1267 | rte = addRangeTableEntryForSubquery(pstate, |
1268 | subselect, |
1269 | makeAlias("ANY_subquery" , NIL), |
1270 | false, |
1271 | false); |
1272 | parse->rtable = lappend(parse->rtable, rte); |
1273 | rtindex = list_length(parse->rtable); |
1274 | |
1275 | /* |
1276 | * Form a RangeTblRef for the pulled-up sub-select. |
1277 | */ |
1278 | rtr = makeNode(RangeTblRef); |
1279 | rtr->rtindex = rtindex; |
1280 | |
1281 | /* |
1282 | * Build a list of Vars representing the subselect outputs. |
1283 | */ |
1284 | subquery_vars = generate_subquery_vars(root, |
1285 | subselect->targetList, |
1286 | rtindex); |
1287 | |
1288 | /* |
1289 | * Build the new join's qual expression, replacing Params with these Vars. |
1290 | */ |
1291 | quals = convert_testexpr(root, sublink->testexpr, subquery_vars); |
1292 | |
1293 | /* |
1294 | * And finally, build the JoinExpr node. |
1295 | */ |
1296 | result = makeNode(JoinExpr); |
1297 | result->jointype = JOIN_SEMI; |
1298 | result->isNatural = false; |
1299 | result->larg = NULL; /* caller must fill this in */ |
1300 | result->rarg = (Node *) rtr; |
1301 | result->usingClause = NIL; |
1302 | result->quals = quals; |
1303 | result->alias = NULL; |
1304 | result->rtindex = 0; /* we don't need an RTE for it */ |
1305 | |
1306 | return result; |
1307 | } |
1308 | |
1309 | /* |
1310 | * convert_EXISTS_sublink_to_join: try to convert an EXISTS SubLink to a join |
1311 | * |
1312 | * The API of this function is identical to convert_ANY_sublink_to_join's, |
1313 | * except that we also support the case where the caller has found NOT EXISTS, |
1314 | * so we need an additional input parameter "under_not". |
1315 | */ |
1316 | JoinExpr * |
1317 | convert_EXISTS_sublink_to_join(PlannerInfo *root, SubLink *sublink, |
1318 | bool under_not, Relids available_rels) |
1319 | { |
1320 | JoinExpr *result; |
1321 | Query *parse = root->parse; |
1322 | Query *subselect = (Query *) sublink->subselect; |
1323 | Node *whereClause; |
1324 | int rtoffset; |
1325 | int varno; |
1326 | Relids clause_varnos; |
1327 | Relids upper_varnos; |
1328 | |
1329 | Assert(sublink->subLinkType == EXISTS_SUBLINK); |
1330 | |
1331 | /* |
1332 | * Can't flatten if it contains WITH. (We could arrange to pull up the |
1333 | * WITH into the parent query's cteList, but that risks changing the |
1334 | * semantics, since a WITH ought to be executed once per associated query |
1335 | * call.) Note that convert_ANY_sublink_to_join doesn't have to reject |
1336 | * this case, since it just produces a subquery RTE that doesn't have to |
1337 | * get flattened into the parent query. |
1338 | */ |
1339 | if (subselect->cteList) |
1340 | return NULL; |
1341 | |
1342 | /* |
1343 | * Copy the subquery so we can modify it safely (see comments in |
1344 | * make_subplan). |
1345 | */ |
1346 | subselect = copyObject(subselect); |
1347 | |
1348 | /* |
1349 | * See if the subquery can be simplified based on the knowledge that it's |
1350 | * being used in EXISTS(). If we aren't able to get rid of its |
1351 | * targetlist, we have to fail, because the pullup operation leaves us |
1352 | * with noplace to evaluate the targetlist. |
1353 | */ |
1354 | if (!simplify_EXISTS_query(root, subselect)) |
1355 | return NULL; |
1356 | |
1357 | /* |
1358 | * Separate out the WHERE clause. (We could theoretically also remove |
1359 | * top-level plain JOIN/ON clauses, but it's probably not worth the |
1360 | * trouble.) |
1361 | */ |
1362 | whereClause = subselect->jointree->quals; |
1363 | subselect->jointree->quals = NULL; |
1364 | |
1365 | /* |
1366 | * The rest of the sub-select must not refer to any Vars of the parent |
1367 | * query. (Vars of higher levels should be okay, though.) |
1368 | */ |
1369 | if (contain_vars_of_level((Node *) subselect, 1)) |
1370 | return NULL; |
1371 | |
1372 | /* |
1373 | * On the other hand, the WHERE clause must contain some Vars of the |
1374 | * parent query, else it's not gonna be a join. |
1375 | */ |
1376 | if (!contain_vars_of_level(whereClause, 1)) |
1377 | return NULL; |
1378 | |
1379 | /* |
1380 | * We don't risk optimizing if the WHERE clause is volatile, either. |
1381 | */ |
1382 | if (contain_volatile_functions(whereClause)) |
1383 | return NULL; |
1384 | |
1385 | /* |
1386 | * The subquery must have a nonempty jointree, but we can make it so. |
1387 | */ |
1388 | replace_empty_jointree(subselect); |
1389 | |
1390 | /* |
1391 | * Prepare to pull up the sub-select into top range table. |
1392 | * |
1393 | * We rely here on the assumption that the outer query has no references |
1394 | * to the inner (necessarily true). Therefore this is a lot easier than |
1395 | * what pull_up_subqueries has to go through. |
1396 | * |
1397 | * In fact, it's even easier than what convert_ANY_sublink_to_join has to |
1398 | * do. The machinations of simplify_EXISTS_query ensured that there is |
1399 | * nothing interesting in the subquery except an rtable and jointree, and |
1400 | * even the jointree FromExpr no longer has quals. So we can just append |
1401 | * the rtable to our own and use the FromExpr in our jointree. But first, |
1402 | * adjust all level-zero varnos in the subquery to account for the rtable |
1403 | * merger. |
1404 | */ |
1405 | rtoffset = list_length(parse->rtable); |
1406 | OffsetVarNodes((Node *) subselect, rtoffset, 0); |
1407 | OffsetVarNodes(whereClause, rtoffset, 0); |
1408 | |
1409 | /* |
1410 | * Upper-level vars in subquery will now be one level closer to their |
1411 | * parent than before; in particular, anything that had been level 1 |
1412 | * becomes level zero. |
1413 | */ |
1414 | IncrementVarSublevelsUp((Node *) subselect, -1, 1); |
1415 | IncrementVarSublevelsUp(whereClause, -1, 1); |
1416 | |
1417 | /* |
1418 | * Now that the WHERE clause is adjusted to match the parent query |
1419 | * environment, we can easily identify all the level-zero rels it uses. |
1420 | * The ones <= rtoffset belong to the upper query; the ones > rtoffset do |
1421 | * not. |
1422 | */ |
1423 | clause_varnos = pull_varnos(whereClause); |
1424 | upper_varnos = NULL; |
1425 | while ((varno = bms_first_member(clause_varnos)) >= 0) |
1426 | { |
1427 | if (varno <= rtoffset) |
1428 | upper_varnos = bms_add_member(upper_varnos, varno); |
1429 | } |
1430 | bms_free(clause_varnos); |
1431 | Assert(!bms_is_empty(upper_varnos)); |
1432 | |
1433 | /* |
1434 | * Now that we've got the set of upper-level varnos, we can make the last |
1435 | * check: only available_rels can be referenced. |
1436 | */ |
1437 | if (!bms_is_subset(upper_varnos, available_rels)) |
1438 | return NULL; |
1439 | |
1440 | /* Now we can attach the modified subquery rtable to the parent */ |
1441 | parse->rtable = list_concat(parse->rtable, subselect->rtable); |
1442 | |
1443 | /* |
1444 | * And finally, build the JoinExpr node. |
1445 | */ |
1446 | result = makeNode(JoinExpr); |
1447 | result->jointype = under_not ? JOIN_ANTI : JOIN_SEMI; |
1448 | result->isNatural = false; |
1449 | result->larg = NULL; /* caller must fill this in */ |
1450 | /* flatten out the FromExpr node if it's useless */ |
1451 | if (list_length(subselect->jointree->fromlist) == 1) |
1452 | result->rarg = (Node *) linitial(subselect->jointree->fromlist); |
1453 | else |
1454 | result->rarg = (Node *) subselect->jointree; |
1455 | result->usingClause = NIL; |
1456 | result->quals = whereClause; |
1457 | result->alias = NULL; |
1458 | result->rtindex = 0; /* we don't need an RTE for it */ |
1459 | |
1460 | return result; |
1461 | } |
1462 | |
1463 | /* |
1464 | * simplify_EXISTS_query: remove any useless stuff in an EXISTS's subquery |
1465 | * |
1466 | * The only thing that matters about an EXISTS query is whether it returns |
1467 | * zero or more than zero rows. Therefore, we can remove certain SQL features |
1468 | * that won't affect that. The only part that is really likely to matter in |
1469 | * typical usage is simplifying the targetlist: it's a common habit to write |
1470 | * "SELECT * FROM" even though there is no need to evaluate any columns. |
1471 | * |
1472 | * Note: by suppressing the targetlist we could cause an observable behavioral |
1473 | * change, namely that any errors that might occur in evaluating the tlist |
1474 | * won't occur, nor will other side-effects of volatile functions. This seems |
1475 | * unlikely to bother anyone in practice. |
1476 | * |
1477 | * Returns true if was able to discard the targetlist, else false. |
1478 | */ |
1479 | static bool |
1480 | simplify_EXISTS_query(PlannerInfo *root, Query *query) |
1481 | { |
1482 | /* |
1483 | * We don't try to simplify at all if the query uses set operations, |
1484 | * aggregates, grouping sets, SRFs, modifying CTEs, HAVING, OFFSET, or FOR |
1485 | * UPDATE/SHARE; none of these seem likely in normal usage and their |
1486 | * possible effects are complex. (Note: we could ignore an "OFFSET 0" |
1487 | * clause, but that traditionally is used as an optimization fence, so we |
1488 | * don't.) |
1489 | */ |
1490 | if (query->commandType != CMD_SELECT || |
1491 | query->setOperations || |
1492 | query->hasAggs || |
1493 | query->groupingSets || |
1494 | query->hasWindowFuncs || |
1495 | query->hasTargetSRFs || |
1496 | query->hasModifyingCTE || |
1497 | query->havingQual || |
1498 | query->limitOffset || |
1499 | query->rowMarks) |
1500 | return false; |
1501 | |
1502 | /* |
1503 | * LIMIT with a constant positive (or NULL) value doesn't affect the |
1504 | * semantics of EXISTS, so let's ignore such clauses. This is worth doing |
1505 | * because people accustomed to certain other DBMSes may be in the habit |
1506 | * of writing EXISTS(SELECT ... LIMIT 1) as an optimization. If there's a |
1507 | * LIMIT with anything else as argument, though, we can't simplify. |
1508 | */ |
1509 | if (query->limitCount) |
1510 | { |
1511 | /* |
1512 | * The LIMIT clause has not yet been through eval_const_expressions, |
1513 | * so we have to apply that here. It might seem like this is a waste |
1514 | * of cycles, since the only case plausibly worth worrying about is |
1515 | * "LIMIT 1" ... but what we'll actually see is "LIMIT int8(1::int4)", |
1516 | * so we have to fold constants or we're not going to recognize it. |
1517 | */ |
1518 | Node *node = eval_const_expressions(root, query->limitCount); |
1519 | Const *limit; |
1520 | |
1521 | /* Might as well update the query if we simplified the clause. */ |
1522 | query->limitCount = node; |
1523 | |
1524 | if (!IsA(node, Const)) |
1525 | return false; |
1526 | |
1527 | limit = (Const *) node; |
1528 | Assert(limit->consttype == INT8OID); |
1529 | if (!limit->constisnull && DatumGetInt64(limit->constvalue) <= 0) |
1530 | return false; |
1531 | |
1532 | /* Whether or not the targetlist is safe, we can drop the LIMIT. */ |
1533 | query->limitCount = NULL; |
1534 | } |
1535 | |
1536 | /* |
1537 | * Otherwise, we can throw away the targetlist, as well as any GROUP, |
1538 | * WINDOW, DISTINCT, and ORDER BY clauses; none of those clauses will |
1539 | * change a nonzero-rows result to zero rows or vice versa. (Furthermore, |
1540 | * since our parsetree representation of these clauses depends on the |
1541 | * targetlist, we'd better throw them away if we drop the targetlist.) |
1542 | */ |
1543 | query->targetList = NIL; |
1544 | query->groupClause = NIL; |
1545 | query->windowClause = NIL; |
1546 | query->distinctClause = NIL; |
1547 | query->sortClause = NIL; |
1548 | query->hasDistinctOn = false; |
1549 | |
1550 | return true; |
1551 | } |
1552 | |
1553 | /* |
1554 | * convert_EXISTS_to_ANY: try to convert EXISTS to a hashable ANY sublink |
1555 | * |
1556 | * The subselect is expected to be a fresh copy that we can munge up, |
1557 | * and to have been successfully passed through simplify_EXISTS_query. |
1558 | * |
1559 | * On success, the modified subselect is returned, and we store a suitable |
1560 | * upper-level test expression at *testexpr, plus a list of the subselect's |
1561 | * output Params at *paramIds. (The test expression is already Param-ified |
1562 | * and hence need not go through convert_testexpr, which is why we have to |
1563 | * deal with the Param IDs specially.) |
1564 | * |
1565 | * On failure, returns NULL. |
1566 | */ |
1567 | static Query * |
1568 | convert_EXISTS_to_ANY(PlannerInfo *root, Query *subselect, |
1569 | Node **testexpr, List **paramIds) |
1570 | { |
1571 | Node *whereClause; |
1572 | List *leftargs, |
1573 | *rightargs, |
1574 | *opids, |
1575 | *opcollations, |
1576 | *newWhere, |
1577 | *tlist, |
1578 | *testlist, |
1579 | *paramids; |
1580 | ListCell *lc, |
1581 | *rc, |
1582 | *oc, |
1583 | *cc; |
1584 | AttrNumber resno; |
1585 | |
1586 | /* |
1587 | * Query must not require a targetlist, since we have to insert a new one. |
1588 | * Caller should have dealt with the case already. |
1589 | */ |
1590 | Assert(subselect->targetList == NIL); |
1591 | |
1592 | /* |
1593 | * Separate out the WHERE clause. (We could theoretically also remove |
1594 | * top-level plain JOIN/ON clauses, but it's probably not worth the |
1595 | * trouble.) |
1596 | */ |
1597 | whereClause = subselect->jointree->quals; |
1598 | subselect->jointree->quals = NULL; |
1599 | |
1600 | /* |
1601 | * The rest of the sub-select must not refer to any Vars of the parent |
1602 | * query. (Vars of higher levels should be okay, though.) |
1603 | * |
1604 | * Note: we need not check for Aggrefs separately because we know the |
1605 | * sub-select is as yet unoptimized; any uplevel Aggref must therefore |
1606 | * contain an uplevel Var reference. This is not the case below ... |
1607 | */ |
1608 | if (contain_vars_of_level((Node *) subselect, 1)) |
1609 | return NULL; |
1610 | |
1611 | /* |
1612 | * We don't risk optimizing if the WHERE clause is volatile, either. |
1613 | */ |
1614 | if (contain_volatile_functions(whereClause)) |
1615 | return NULL; |
1616 | |
1617 | /* |
1618 | * Clean up the WHERE clause by doing const-simplification etc on it. |
1619 | * Aside from simplifying the processing we're about to do, this is |
1620 | * important for being able to pull chunks of the WHERE clause up into the |
1621 | * parent query. Since we are invoked partway through the parent's |
1622 | * preprocess_expression() work, earlier steps of preprocess_expression() |
1623 | * wouldn't get applied to the pulled-up stuff unless we do them here. For |
1624 | * the parts of the WHERE clause that get put back into the child query, |
1625 | * this work is partially duplicative, but it shouldn't hurt. |
1626 | * |
1627 | * Note: we do not run flatten_join_alias_vars. This is OK because any |
1628 | * parent aliases were flattened already, and we're not going to pull any |
1629 | * child Vars (of any description) into the parent. |
1630 | * |
1631 | * Note: passing the parent's root to eval_const_expressions is |
1632 | * technically wrong, but we can get away with it since only the |
1633 | * boundParams (if any) are used, and those would be the same in a |
1634 | * subroot. |
1635 | */ |
1636 | whereClause = eval_const_expressions(root, whereClause); |
1637 | whereClause = (Node *) canonicalize_qual((Expr *) whereClause, false); |
1638 | whereClause = (Node *) make_ands_implicit((Expr *) whereClause); |
1639 | |
1640 | /* |
1641 | * We now have a flattened implicit-AND list of clauses, which we try to |
1642 | * break apart into "outervar = innervar" hash clauses. Anything that |
1643 | * can't be broken apart just goes back into the newWhere list. Note that |
1644 | * we aren't trying hard yet to ensure that we have only outer or only |
1645 | * inner on each side; we'll check that if we get to the end. |
1646 | */ |
1647 | leftargs = rightargs = opids = opcollations = newWhere = NIL; |
1648 | foreach(lc, (List *) whereClause) |
1649 | { |
1650 | OpExpr *expr = (OpExpr *) lfirst(lc); |
1651 | |
1652 | if (IsA(expr, OpExpr) && |
1653 | hash_ok_operator(expr)) |
1654 | { |
1655 | Node *leftarg = (Node *) linitial(expr->args); |
1656 | Node *rightarg = (Node *) lsecond(expr->args); |
1657 | |
1658 | if (contain_vars_of_level(leftarg, 1)) |
1659 | { |
1660 | leftargs = lappend(leftargs, leftarg); |
1661 | rightargs = lappend(rightargs, rightarg); |
1662 | opids = lappend_oid(opids, expr->opno); |
1663 | opcollations = lappend_oid(opcollations, expr->inputcollid); |
1664 | continue; |
1665 | } |
1666 | if (contain_vars_of_level(rightarg, 1)) |
1667 | { |
1668 | /* |
1669 | * We must commute the clause to put the outer var on the |
1670 | * left, because the hashing code in nodeSubplan.c expects |
1671 | * that. This probably shouldn't ever fail, since hashable |
1672 | * operators ought to have commutators, but be paranoid. |
1673 | */ |
1674 | expr->opno = get_commutator(expr->opno); |
1675 | if (OidIsValid(expr->opno) && hash_ok_operator(expr)) |
1676 | { |
1677 | leftargs = lappend(leftargs, rightarg); |
1678 | rightargs = lappend(rightargs, leftarg); |
1679 | opids = lappend_oid(opids, expr->opno); |
1680 | opcollations = lappend_oid(opcollations, expr->inputcollid); |
1681 | continue; |
1682 | } |
1683 | /* If no commutator, no chance to optimize the WHERE clause */ |
1684 | return NULL; |
1685 | } |
1686 | } |
1687 | /* Couldn't handle it as a hash clause */ |
1688 | newWhere = lappend(newWhere, expr); |
1689 | } |
1690 | |
1691 | /* |
1692 | * If we didn't find anything we could convert, fail. |
1693 | */ |
1694 | if (leftargs == NIL) |
1695 | return NULL; |
1696 | |
1697 | /* |
1698 | * There mustn't be any parent Vars or Aggs in the stuff that we intend to |
1699 | * put back into the child query. Note: you might think we don't need to |
1700 | * check for Aggs separately, because an uplevel Agg must contain an |
1701 | * uplevel Var in its argument. But it is possible that the uplevel Var |
1702 | * got optimized away by eval_const_expressions. Consider |
1703 | * |
1704 | * SUM(CASE WHEN false THEN uplevelvar ELSE 0 END) |
1705 | */ |
1706 | if (contain_vars_of_level((Node *) newWhere, 1) || |
1707 | contain_vars_of_level((Node *) rightargs, 1)) |
1708 | return NULL; |
1709 | if (root->parse->hasAggs && |
1710 | (contain_aggs_of_level((Node *) newWhere, 1) || |
1711 | contain_aggs_of_level((Node *) rightargs, 1))) |
1712 | return NULL; |
1713 | |
1714 | /* |
1715 | * And there can't be any child Vars in the stuff we intend to pull up. |
1716 | * (Note: we'd need to check for child Aggs too, except we know the child |
1717 | * has no aggs at all because of simplify_EXISTS_query's check. The same |
1718 | * goes for window functions.) |
1719 | */ |
1720 | if (contain_vars_of_level((Node *) leftargs, 0)) |
1721 | return NULL; |
1722 | |
1723 | /* |
1724 | * Also reject sublinks in the stuff we intend to pull up. (It might be |
1725 | * possible to support this, but doesn't seem worth the complication.) |
1726 | */ |
1727 | if (contain_subplans((Node *) leftargs)) |
1728 | return NULL; |
1729 | |
1730 | /* |
1731 | * Okay, adjust the sublevelsup in the stuff we're pulling up. |
1732 | */ |
1733 | IncrementVarSublevelsUp((Node *) leftargs, -1, 1); |
1734 | |
1735 | /* |
1736 | * Put back any child-level-only WHERE clauses. |
1737 | */ |
1738 | if (newWhere) |
1739 | subselect->jointree->quals = (Node *) make_ands_explicit(newWhere); |
1740 | |
1741 | /* |
1742 | * Build a new targetlist for the child that emits the expressions we |
1743 | * need. Concurrently, build a testexpr for the parent using Params to |
1744 | * reference the child outputs. (Since we generate Params directly here, |
1745 | * there will be no need to convert the testexpr in build_subplan.) |
1746 | */ |
1747 | tlist = testlist = paramids = NIL; |
1748 | resno = 1; |
1749 | forfour(lc, leftargs, rc, rightargs, oc, opids, cc, opcollations) |
1750 | { |
1751 | Node *leftarg = (Node *) lfirst(lc); |
1752 | Node *rightarg = (Node *) lfirst(rc); |
1753 | Oid opid = lfirst_oid(oc); |
1754 | Oid opcollation = lfirst_oid(cc); |
1755 | Param *param; |
1756 | |
1757 | param = generate_new_exec_param(root, |
1758 | exprType(rightarg), |
1759 | exprTypmod(rightarg), |
1760 | exprCollation(rightarg)); |
1761 | tlist = lappend(tlist, |
1762 | makeTargetEntry((Expr *) rightarg, |
1763 | resno++, |
1764 | NULL, |
1765 | false)); |
1766 | testlist = lappend(testlist, |
1767 | make_opclause(opid, BOOLOID, false, |
1768 | (Expr *) leftarg, (Expr *) param, |
1769 | InvalidOid, opcollation)); |
1770 | paramids = lappend_int(paramids, param->paramid); |
1771 | } |
1772 | |
1773 | /* Put everything where it should go, and we're done */ |
1774 | subselect->targetList = tlist; |
1775 | *testexpr = (Node *) make_ands_explicit(testlist); |
1776 | *paramIds = paramids; |
1777 | |
1778 | return subselect; |
1779 | } |
1780 | |
1781 | |
1782 | /* |
1783 | * Replace correlation vars (uplevel vars) with Params. |
1784 | * |
1785 | * Uplevel PlaceHolderVars and aggregates are replaced, too. |
1786 | * |
1787 | * Note: it is critical that this runs immediately after SS_process_sublinks. |
1788 | * Since we do not recurse into the arguments of uplevel PHVs and aggregates, |
1789 | * they will get copied to the appropriate subplan args list in the parent |
1790 | * query with uplevel vars not replaced by Params, but only adjusted in level |
1791 | * (see replace_outer_placeholdervar and replace_outer_agg). That's exactly |
1792 | * what we want for the vars of the parent level --- but if a PHV's or |
1793 | * aggregate's argument contains any further-up variables, they have to be |
1794 | * replaced with Params in their turn. That will happen when the parent level |
1795 | * runs SS_replace_correlation_vars. Therefore it must do so after expanding |
1796 | * its sublinks to subplans. And we don't want any steps in between, else |
1797 | * those steps would never get applied to the argument expressions, either in |
1798 | * the parent or the child level. |
1799 | * |
1800 | * Another fairly tricky thing going on here is the handling of SubLinks in |
1801 | * the arguments of uplevel PHVs/aggregates. Those are not touched inside the |
1802 | * intermediate query level, either. Instead, SS_process_sublinks recurses on |
1803 | * them after copying the PHV or Aggref expression into the parent plan level |
1804 | * (this is actually taken care of in build_subplan). |
1805 | */ |
1806 | Node * |
1807 | SS_replace_correlation_vars(PlannerInfo *root, Node *expr) |
1808 | { |
1809 | /* No setup needed for tree walk, so away we go */ |
1810 | return replace_correlation_vars_mutator(expr, root); |
1811 | } |
1812 | |
1813 | static Node * |
1814 | replace_correlation_vars_mutator(Node *node, PlannerInfo *root) |
1815 | { |
1816 | if (node == NULL) |
1817 | return NULL; |
1818 | if (IsA(node, Var)) |
1819 | { |
1820 | if (((Var *) node)->varlevelsup > 0) |
1821 | return (Node *) replace_outer_var(root, (Var *) node); |
1822 | } |
1823 | if (IsA(node, PlaceHolderVar)) |
1824 | { |
1825 | if (((PlaceHolderVar *) node)->phlevelsup > 0) |
1826 | return (Node *) replace_outer_placeholdervar(root, |
1827 | (PlaceHolderVar *) node); |
1828 | } |
1829 | if (IsA(node, Aggref)) |
1830 | { |
1831 | if (((Aggref *) node)->agglevelsup > 0) |
1832 | return (Node *) replace_outer_agg(root, (Aggref *) node); |
1833 | } |
1834 | if (IsA(node, GroupingFunc)) |
1835 | { |
1836 | if (((GroupingFunc *) node)->agglevelsup > 0) |
1837 | return (Node *) replace_outer_grouping(root, (GroupingFunc *) node); |
1838 | } |
1839 | return expression_tree_mutator(node, |
1840 | replace_correlation_vars_mutator, |
1841 | (void *) root); |
1842 | } |
1843 | |
1844 | /* |
1845 | * Expand SubLinks to SubPlans in the given expression. |
1846 | * |
1847 | * The isQual argument tells whether or not this expression is a WHERE/HAVING |
1848 | * qualifier expression. If it is, any sublinks appearing at top level need |
1849 | * not distinguish FALSE from UNKNOWN return values. |
1850 | */ |
1851 | Node * |
1852 | SS_process_sublinks(PlannerInfo *root, Node *expr, bool isQual) |
1853 | { |
1854 | process_sublinks_context context; |
1855 | |
1856 | context.root = root; |
1857 | context.isTopQual = isQual; |
1858 | return process_sublinks_mutator(expr, &context); |
1859 | } |
1860 | |
1861 | static Node * |
1862 | process_sublinks_mutator(Node *node, process_sublinks_context *context) |
1863 | { |
1864 | process_sublinks_context locContext; |
1865 | |
1866 | locContext.root = context->root; |
1867 | |
1868 | if (node == NULL) |
1869 | return NULL; |
1870 | if (IsA(node, SubLink)) |
1871 | { |
1872 | SubLink *sublink = (SubLink *) node; |
1873 | Node *testexpr; |
1874 | |
1875 | /* |
1876 | * First, recursively process the lefthand-side expressions, if any. |
1877 | * They're not top-level anymore. |
1878 | */ |
1879 | locContext.isTopQual = false; |
1880 | testexpr = process_sublinks_mutator(sublink->testexpr, &locContext); |
1881 | |
1882 | /* |
1883 | * Now build the SubPlan node and make the expr to return. |
1884 | */ |
1885 | return make_subplan(context->root, |
1886 | (Query *) sublink->subselect, |
1887 | sublink->subLinkType, |
1888 | sublink->subLinkId, |
1889 | testexpr, |
1890 | context->isTopQual); |
1891 | } |
1892 | |
1893 | /* |
1894 | * Don't recurse into the arguments of an outer PHV or aggregate here. Any |
1895 | * SubLinks in the arguments have to be dealt with at the outer query |
1896 | * level; they'll be handled when build_subplan collects the PHV or Aggref |
1897 | * into the arguments to be passed down to the current subplan. |
1898 | */ |
1899 | if (IsA(node, PlaceHolderVar)) |
1900 | { |
1901 | if (((PlaceHolderVar *) node)->phlevelsup > 0) |
1902 | return node; |
1903 | } |
1904 | else if (IsA(node, Aggref)) |
1905 | { |
1906 | if (((Aggref *) node)->agglevelsup > 0) |
1907 | return node; |
1908 | } |
1909 | |
1910 | /* |
1911 | * We should never see a SubPlan expression in the input (since this is |
1912 | * the very routine that creates 'em to begin with). We shouldn't find |
1913 | * ourselves invoked directly on a Query, either. |
1914 | */ |
1915 | Assert(!IsA(node, SubPlan)); |
1916 | Assert(!IsA(node, AlternativeSubPlan)); |
1917 | Assert(!IsA(node, Query)); |
1918 | |
1919 | /* |
1920 | * Because make_subplan() could return an AND or OR clause, we have to |
1921 | * take steps to preserve AND/OR flatness of a qual. We assume the input |
1922 | * has been AND/OR flattened and so we need no recursion here. |
1923 | * |
1924 | * (Due to the coding here, we will not get called on the List subnodes of |
1925 | * an AND; and the input is *not* yet in implicit-AND format. So no check |
1926 | * is needed for a bare List.) |
1927 | * |
1928 | * Anywhere within the top-level AND/OR clause structure, we can tell |
1929 | * make_subplan() that NULL and FALSE are interchangeable. So isTopQual |
1930 | * propagates down in both cases. (Note that this is unlike the meaning |
1931 | * of "top level qual" used in most other places in Postgres.) |
1932 | */ |
1933 | if (is_andclause(node)) |
1934 | { |
1935 | List *newargs = NIL; |
1936 | ListCell *l; |
1937 | |
1938 | /* Still at qual top-level */ |
1939 | locContext.isTopQual = context->isTopQual; |
1940 | |
1941 | foreach(l, ((BoolExpr *) node)->args) |
1942 | { |
1943 | Node *newarg; |
1944 | |
1945 | newarg = process_sublinks_mutator(lfirst(l), &locContext); |
1946 | if (is_andclause(newarg)) |
1947 | newargs = list_concat(newargs, ((BoolExpr *) newarg)->args); |
1948 | else |
1949 | newargs = lappend(newargs, newarg); |
1950 | } |
1951 | return (Node *) make_andclause(newargs); |
1952 | } |
1953 | |
1954 | if (is_orclause(node)) |
1955 | { |
1956 | List *newargs = NIL; |
1957 | ListCell *l; |
1958 | |
1959 | /* Still at qual top-level */ |
1960 | locContext.isTopQual = context->isTopQual; |
1961 | |
1962 | foreach(l, ((BoolExpr *) node)->args) |
1963 | { |
1964 | Node *newarg; |
1965 | |
1966 | newarg = process_sublinks_mutator(lfirst(l), &locContext); |
1967 | if (is_orclause(newarg)) |
1968 | newargs = list_concat(newargs, ((BoolExpr *) newarg)->args); |
1969 | else |
1970 | newargs = lappend(newargs, newarg); |
1971 | } |
1972 | return (Node *) make_orclause(newargs); |
1973 | } |
1974 | |
1975 | /* |
1976 | * If we recurse down through anything other than an AND or OR node, we |
1977 | * are definitely not at top qual level anymore. |
1978 | */ |
1979 | locContext.isTopQual = false; |
1980 | |
1981 | return expression_tree_mutator(node, |
1982 | process_sublinks_mutator, |
1983 | (void *) &locContext); |
1984 | } |
1985 | |
1986 | /* |
1987 | * SS_identify_outer_params - identify the Params available from outer levels |
1988 | * |
1989 | * This must be run after SS_replace_correlation_vars and SS_process_sublinks |
1990 | * processing is complete in a given query level as well as all of its |
1991 | * descendant levels (which means it's most practical to do it at the end of |
1992 | * processing the query level). We compute the set of paramIds that outer |
1993 | * levels will make available to this level+descendants, and record it in |
1994 | * root->outer_params for use while computing extParam/allParam sets in final |
1995 | * plan cleanup. (We can't just compute it then, because the upper levels' |
1996 | * plan_params lists are transient and will be gone by then.) |
1997 | */ |
1998 | void |
1999 | SS_identify_outer_params(PlannerInfo *root) |
2000 | { |
2001 | Bitmapset *outer_params; |
2002 | PlannerInfo *proot; |
2003 | ListCell *l; |
2004 | |
2005 | /* |
2006 | * If no parameters have been assigned anywhere in the tree, we certainly |
2007 | * don't need to do anything here. |
2008 | */ |
2009 | if (root->glob->paramExecTypes == NIL) |
2010 | return; |
2011 | |
2012 | /* |
2013 | * Scan all query levels above this one to see which parameters are due to |
2014 | * be available from them, either because lower query levels have |
2015 | * requested them (via plan_params) or because they will be available from |
2016 | * initPlans of those levels. |
2017 | */ |
2018 | outer_params = NULL; |
2019 | for (proot = root->parent_root; proot != NULL; proot = proot->parent_root) |
2020 | { |
2021 | /* Include ordinary Var/PHV/Aggref params */ |
2022 | foreach(l, proot->plan_params) |
2023 | { |
2024 | PlannerParamItem *pitem = (PlannerParamItem *) lfirst(l); |
2025 | |
2026 | outer_params = bms_add_member(outer_params, pitem->paramId); |
2027 | } |
2028 | /* Include any outputs of outer-level initPlans */ |
2029 | foreach(l, proot->init_plans) |
2030 | { |
2031 | SubPlan *initsubplan = (SubPlan *) lfirst(l); |
2032 | ListCell *l2; |
2033 | |
2034 | foreach(l2, initsubplan->setParam) |
2035 | { |
2036 | outer_params = bms_add_member(outer_params, lfirst_int(l2)); |
2037 | } |
2038 | } |
2039 | /* Include worktable ID, if a recursive query is being planned */ |
2040 | if (proot->wt_param_id >= 0) |
2041 | outer_params = bms_add_member(outer_params, proot->wt_param_id); |
2042 | } |
2043 | root->outer_params = outer_params; |
2044 | } |
2045 | |
2046 | /* |
2047 | * SS_charge_for_initplans - account for initplans in Path costs & parallelism |
2048 | * |
2049 | * If any initPlans have been created in the current query level, they will |
2050 | * get attached to the Plan tree created from whichever Path we select from |
2051 | * the given rel. Increment all that rel's Paths' costs to account for them, |
2052 | * and make sure the paths get marked as parallel-unsafe, since we can't |
2053 | * currently transmit initPlans to parallel workers. |
2054 | * |
2055 | * This is separate from SS_attach_initplans because we might conditionally |
2056 | * create more initPlans during create_plan(), depending on which Path we |
2057 | * select. However, Paths that would generate such initPlans are expected |
2058 | * to have included their cost already. |
2059 | */ |
2060 | void |
2061 | SS_charge_for_initplans(PlannerInfo *root, RelOptInfo *final_rel) |
2062 | { |
2063 | Cost initplan_cost; |
2064 | ListCell *lc; |
2065 | |
2066 | /* Nothing to do if no initPlans */ |
2067 | if (root->init_plans == NIL) |
2068 | return; |
2069 | |
2070 | /* |
2071 | * Compute the cost increment just once, since it will be the same for all |
2072 | * Paths. We assume each initPlan gets run once during top plan startup. |
2073 | * This is a conservative overestimate, since in fact an initPlan might be |
2074 | * executed later than plan startup, or even not at all. |
2075 | */ |
2076 | initplan_cost = 0; |
2077 | foreach(lc, root->init_plans) |
2078 | { |
2079 | SubPlan *initsubplan = (SubPlan *) lfirst(lc); |
2080 | |
2081 | initplan_cost += initsubplan->startup_cost + initsubplan->per_call_cost; |
2082 | } |
2083 | |
2084 | /* |
2085 | * Now adjust the costs and parallel_safe flags. |
2086 | */ |
2087 | foreach(lc, final_rel->pathlist) |
2088 | { |
2089 | Path *path = (Path *) lfirst(lc); |
2090 | |
2091 | path->startup_cost += initplan_cost; |
2092 | path->total_cost += initplan_cost; |
2093 | path->parallel_safe = false; |
2094 | } |
2095 | |
2096 | /* |
2097 | * Forget about any partial paths and clear consider_parallel, too; |
2098 | * they're not usable if we attached an initPlan. |
2099 | */ |
2100 | final_rel->partial_pathlist = NIL; |
2101 | final_rel->consider_parallel = false; |
2102 | |
2103 | /* We needn't do set_cheapest() here, caller will do it */ |
2104 | } |
2105 | |
2106 | /* |
2107 | * SS_attach_initplans - attach initplans to topmost plan node |
2108 | * |
2109 | * Attach any initplans created in the current query level to the specified |
2110 | * plan node, which should normally be the topmost node for the query level. |
2111 | * (In principle the initPlans could go in any node at or above where they're |
2112 | * referenced; but there seems no reason to put them any lower than the |
2113 | * topmost node, so we don't bother to track exactly where they came from.) |
2114 | * We do not touch the plan node's cost; the initplans should have been |
2115 | * accounted for in path costing. |
2116 | */ |
2117 | void |
2118 | SS_attach_initplans(PlannerInfo *root, Plan *plan) |
2119 | { |
2120 | plan->initPlan = root->init_plans; |
2121 | } |
2122 | |
2123 | /* |
2124 | * SS_finalize_plan - do final parameter processing for a completed Plan. |
2125 | * |
2126 | * This recursively computes the extParam and allParam sets for every Plan |
2127 | * node in the given plan tree. (Oh, and RangeTblFunction.funcparams too.) |
2128 | * |
2129 | * We assume that SS_finalize_plan has already been run on any initplans or |
2130 | * subplans the plan tree could reference. |
2131 | */ |
2132 | void |
2133 | SS_finalize_plan(PlannerInfo *root, Plan *plan) |
2134 | { |
2135 | /* No setup needed, just recurse through plan tree. */ |
2136 | (void) finalize_plan(root, plan, -1, root->outer_params, NULL); |
2137 | } |
2138 | |
2139 | /* |
2140 | * Recursive processing of all nodes in the plan tree |
2141 | * |
2142 | * gather_param is the rescan_param of an ancestral Gather/GatherMerge, |
2143 | * or -1 if there is none. |
2144 | * |
2145 | * valid_params is the set of param IDs supplied by outer plan levels |
2146 | * that are valid to reference in this plan node or its children. |
2147 | * |
2148 | * scan_params is a set of param IDs to force scan plan nodes to reference. |
2149 | * This is for EvalPlanQual support, and is always NULL at the top of the |
2150 | * recursion. |
2151 | * |
2152 | * The return value is the computed allParam set for the given Plan node. |
2153 | * This is just an internal notational convenience: we can add a child |
2154 | * plan's allParams to the set of param IDs of interest to this level |
2155 | * in the same statement that recurses to that child. |
2156 | * |
2157 | * Do not scribble on caller's values of valid_params or scan_params! |
2158 | * |
2159 | * Note: although we attempt to deal with initPlans anywhere in the tree, the |
2160 | * logic is not really right. The problem is that a plan node might return an |
2161 | * output Param of its initPlan as a targetlist item, in which case it's valid |
2162 | * for the parent plan level to reference that same Param; the parent's usage |
2163 | * will be converted into a Var referencing the child plan node by setrefs.c. |
2164 | * But this function would see the parent's reference as out of scope and |
2165 | * complain about it. For now, this does not matter because the planner only |
2166 | * attaches initPlans to the topmost plan node in a query level, so the case |
2167 | * doesn't arise. If we ever merge this processing into setrefs.c, maybe it |
2168 | * can be handled more cleanly. |
2169 | */ |
2170 | static Bitmapset * |
2171 | finalize_plan(PlannerInfo *root, Plan *plan, |
2172 | int gather_param, |
2173 | Bitmapset *valid_params, |
2174 | Bitmapset *scan_params) |
2175 | { |
2176 | finalize_primnode_context context; |
2177 | int locally_added_param; |
2178 | Bitmapset *nestloop_params; |
2179 | Bitmapset *initExtParam; |
2180 | Bitmapset *initSetParam; |
2181 | Bitmapset *child_params; |
2182 | ListCell *l; |
2183 | |
2184 | if (plan == NULL) |
2185 | return NULL; |
2186 | |
2187 | context.root = root; |
2188 | context.paramids = NULL; /* initialize set to empty */ |
2189 | locally_added_param = -1; /* there isn't one */ |
2190 | nestloop_params = NULL; /* there aren't any */ |
2191 | |
2192 | /* |
2193 | * Examine any initPlans to determine the set of external params they |
2194 | * reference and the set of output params they supply. (We assume |
2195 | * SS_finalize_plan was run on them already.) |
2196 | */ |
2197 | initExtParam = initSetParam = NULL; |
2198 | foreach(l, plan->initPlan) |
2199 | { |
2200 | SubPlan *initsubplan = (SubPlan *) lfirst(l); |
2201 | Plan *initplan = planner_subplan_get_plan(root, initsubplan); |
2202 | ListCell *l2; |
2203 | |
2204 | initExtParam = bms_add_members(initExtParam, initplan->extParam); |
2205 | foreach(l2, initsubplan->setParam) |
2206 | { |
2207 | initSetParam = bms_add_member(initSetParam, lfirst_int(l2)); |
2208 | } |
2209 | } |
2210 | |
2211 | /* Any setParams are validly referenceable in this node and children */ |
2212 | if (initSetParam) |
2213 | valid_params = bms_union(valid_params, initSetParam); |
2214 | |
2215 | /* |
2216 | * When we call finalize_primnode, context.paramids sets are automatically |
2217 | * merged together. But when recursing to self, we have to do it the hard |
2218 | * way. We want the paramids set to include params in subplans as well as |
2219 | * at this level. |
2220 | */ |
2221 | |
2222 | /* Find params in targetlist and qual */ |
2223 | finalize_primnode((Node *) plan->targetlist, &context); |
2224 | finalize_primnode((Node *) plan->qual, &context); |
2225 | |
2226 | /* |
2227 | * If it's a parallel-aware scan node, mark it as dependent on the parent |
2228 | * Gather/GatherMerge's rescan Param. |
2229 | */ |
2230 | if (plan->parallel_aware) |
2231 | { |
2232 | if (gather_param < 0) |
2233 | elog(ERROR, "parallel-aware plan node is not below a Gather" ); |
2234 | context.paramids = |
2235 | bms_add_member(context.paramids, gather_param); |
2236 | } |
2237 | |
2238 | /* Check additional node-type-specific fields */ |
2239 | switch (nodeTag(plan)) |
2240 | { |
2241 | case T_Result: |
2242 | finalize_primnode(((Result *) plan)->resconstantqual, |
2243 | &context); |
2244 | break; |
2245 | |
2246 | case T_SeqScan: |
2247 | context.paramids = bms_add_members(context.paramids, scan_params); |
2248 | break; |
2249 | |
2250 | case T_SampleScan: |
2251 | finalize_primnode((Node *) ((SampleScan *) plan)->tablesample, |
2252 | &context); |
2253 | context.paramids = bms_add_members(context.paramids, scan_params); |
2254 | break; |
2255 | |
2256 | case T_IndexScan: |
2257 | finalize_primnode((Node *) ((IndexScan *) plan)->indexqual, |
2258 | &context); |
2259 | finalize_primnode((Node *) ((IndexScan *) plan)->indexorderby, |
2260 | &context); |
2261 | |
2262 | /* |
2263 | * we need not look at indexqualorig, since it will have the same |
2264 | * param references as indexqual. Likewise, we can ignore |
2265 | * indexorderbyorig. |
2266 | */ |
2267 | context.paramids = bms_add_members(context.paramids, scan_params); |
2268 | break; |
2269 | |
2270 | case T_IndexOnlyScan: |
2271 | finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexqual, |
2272 | &context); |
2273 | finalize_primnode((Node *) ((IndexOnlyScan *) plan)->indexorderby, |
2274 | &context); |
2275 | |
2276 | /* |
2277 | * we need not look at indextlist, since it cannot contain Params. |
2278 | */ |
2279 | context.paramids = bms_add_members(context.paramids, scan_params); |
2280 | break; |
2281 | |
2282 | case T_BitmapIndexScan: |
2283 | finalize_primnode((Node *) ((BitmapIndexScan *) plan)->indexqual, |
2284 | &context); |
2285 | |
2286 | /* |
2287 | * we need not look at indexqualorig, since it will have the same |
2288 | * param references as indexqual. |
2289 | */ |
2290 | break; |
2291 | |
2292 | case T_BitmapHeapScan: |
2293 | finalize_primnode((Node *) ((BitmapHeapScan *) plan)->bitmapqualorig, |
2294 | &context); |
2295 | context.paramids = bms_add_members(context.paramids, scan_params); |
2296 | break; |
2297 | |
2298 | case T_TidScan: |
2299 | finalize_primnode((Node *) ((TidScan *) plan)->tidquals, |
2300 | &context); |
2301 | context.paramids = bms_add_members(context.paramids, scan_params); |
2302 | break; |
2303 | |
2304 | case T_SubqueryScan: |
2305 | { |
2306 | SubqueryScan *sscan = (SubqueryScan *) plan; |
2307 | RelOptInfo *rel; |
2308 | Bitmapset *subquery_params; |
2309 | |
2310 | /* We must run finalize_plan on the subquery */ |
2311 | rel = find_base_rel(root, sscan->scan.scanrelid); |
2312 | subquery_params = rel->subroot->outer_params; |
2313 | if (gather_param >= 0) |
2314 | subquery_params = bms_add_member(bms_copy(subquery_params), |
2315 | gather_param); |
2316 | finalize_plan(rel->subroot, sscan->subplan, gather_param, |
2317 | subquery_params, NULL); |
2318 | |
2319 | /* Now we can add its extParams to the parent's params */ |
2320 | context.paramids = bms_add_members(context.paramids, |
2321 | sscan->subplan->extParam); |
2322 | /* We need scan_params too, though */ |
2323 | context.paramids = bms_add_members(context.paramids, |
2324 | scan_params); |
2325 | } |
2326 | break; |
2327 | |
2328 | case T_FunctionScan: |
2329 | { |
2330 | FunctionScan *fscan = (FunctionScan *) plan; |
2331 | ListCell *lc; |
2332 | |
2333 | /* |
2334 | * Call finalize_primnode independently on each function |
2335 | * expression, so that we can record which params are |
2336 | * referenced in each, in order to decide which need |
2337 | * re-evaluating during rescan. |
2338 | */ |
2339 | foreach(lc, fscan->functions) |
2340 | { |
2341 | RangeTblFunction *rtfunc = (RangeTblFunction *) lfirst(lc); |
2342 | finalize_primnode_context funccontext; |
2343 | |
2344 | funccontext = context; |
2345 | funccontext.paramids = NULL; |
2346 | |
2347 | finalize_primnode(rtfunc->funcexpr, &funccontext); |
2348 | |
2349 | /* remember results for execution */ |
2350 | rtfunc->funcparams = funccontext.paramids; |
2351 | |
2352 | /* add the function's params to the overall set */ |
2353 | context.paramids = bms_add_members(context.paramids, |
2354 | funccontext.paramids); |
2355 | } |
2356 | |
2357 | context.paramids = bms_add_members(context.paramids, |
2358 | scan_params); |
2359 | } |
2360 | break; |
2361 | |
2362 | case T_TableFuncScan: |
2363 | finalize_primnode((Node *) ((TableFuncScan *) plan)->tablefunc, |
2364 | &context); |
2365 | context.paramids = bms_add_members(context.paramids, scan_params); |
2366 | break; |
2367 | |
2368 | case T_ValuesScan: |
2369 | finalize_primnode((Node *) ((ValuesScan *) plan)->values_lists, |
2370 | &context); |
2371 | context.paramids = bms_add_members(context.paramids, scan_params); |
2372 | break; |
2373 | |
2374 | case T_CteScan: |
2375 | { |
2376 | /* |
2377 | * You might think we should add the node's cteParam to |
2378 | * paramids, but we shouldn't because that param is just a |
2379 | * linkage mechanism for multiple CteScan nodes for the same |
2380 | * CTE; it is never used for changed-param signaling. What we |
2381 | * have to do instead is to find the referenced CTE plan and |
2382 | * incorporate its external paramids, so that the correct |
2383 | * things will happen if the CTE references outer-level |
2384 | * variables. See test cases for bug #4902. (We assume |
2385 | * SS_finalize_plan was run on the CTE plan already.) |
2386 | */ |
2387 | int plan_id = ((CteScan *) plan)->ctePlanId; |
2388 | Plan *cteplan; |
2389 | |
2390 | /* so, do this ... */ |
2391 | if (plan_id < 1 || plan_id > list_length(root->glob->subplans)) |
2392 | elog(ERROR, "could not find plan for CteScan referencing plan ID %d" , |
2393 | plan_id); |
2394 | cteplan = (Plan *) list_nth(root->glob->subplans, plan_id - 1); |
2395 | context.paramids = |
2396 | bms_add_members(context.paramids, cteplan->extParam); |
2397 | |
2398 | #ifdef NOT_USED |
2399 | /* ... but not this */ |
2400 | context.paramids = |
2401 | bms_add_member(context.paramids, |
2402 | ((CteScan *) plan)->cteParam); |
2403 | #endif |
2404 | |
2405 | context.paramids = bms_add_members(context.paramids, |
2406 | scan_params); |
2407 | } |
2408 | break; |
2409 | |
2410 | case T_WorkTableScan: |
2411 | context.paramids = |
2412 | bms_add_member(context.paramids, |
2413 | ((WorkTableScan *) plan)->wtParam); |
2414 | context.paramids = bms_add_members(context.paramids, scan_params); |
2415 | break; |
2416 | |
2417 | case T_NamedTuplestoreScan: |
2418 | context.paramids = bms_add_members(context.paramids, scan_params); |
2419 | break; |
2420 | |
2421 | case T_ForeignScan: |
2422 | { |
2423 | ForeignScan *fscan = (ForeignScan *) plan; |
2424 | |
2425 | finalize_primnode((Node *) fscan->fdw_exprs, |
2426 | &context); |
2427 | finalize_primnode((Node *) fscan->fdw_recheck_quals, |
2428 | &context); |
2429 | |
2430 | /* We assume fdw_scan_tlist cannot contain Params */ |
2431 | context.paramids = bms_add_members(context.paramids, |
2432 | scan_params); |
2433 | } |
2434 | break; |
2435 | |
2436 | case T_CustomScan: |
2437 | { |
2438 | CustomScan *cscan = (CustomScan *) plan; |
2439 | ListCell *lc; |
2440 | |
2441 | finalize_primnode((Node *) cscan->custom_exprs, |
2442 | &context); |
2443 | /* We assume custom_scan_tlist cannot contain Params */ |
2444 | context.paramids = |
2445 | bms_add_members(context.paramids, scan_params); |
2446 | |
2447 | /* child nodes if any */ |
2448 | foreach(lc, cscan->custom_plans) |
2449 | { |
2450 | context.paramids = |
2451 | bms_add_members(context.paramids, |
2452 | finalize_plan(root, |
2453 | (Plan *) lfirst(lc), |
2454 | gather_param, |
2455 | valid_params, |
2456 | scan_params)); |
2457 | } |
2458 | } |
2459 | break; |
2460 | |
2461 | case T_ModifyTable: |
2462 | { |
2463 | ModifyTable *mtplan = (ModifyTable *) plan; |
2464 | ListCell *l; |
2465 | |
2466 | /* Force descendant scan nodes to reference epqParam */ |
2467 | locally_added_param = mtplan->epqParam; |
2468 | valid_params = bms_add_member(bms_copy(valid_params), |
2469 | locally_added_param); |
2470 | scan_params = bms_add_member(bms_copy(scan_params), |
2471 | locally_added_param); |
2472 | finalize_primnode((Node *) mtplan->returningLists, |
2473 | &context); |
2474 | finalize_primnode((Node *) mtplan->onConflictSet, |
2475 | &context); |
2476 | finalize_primnode((Node *) mtplan->onConflictWhere, |
2477 | &context); |
2478 | /* exclRelTlist contains only Vars, doesn't need examination */ |
2479 | foreach(l, mtplan->plans) |
2480 | { |
2481 | context.paramids = |
2482 | bms_add_members(context.paramids, |
2483 | finalize_plan(root, |
2484 | (Plan *) lfirst(l), |
2485 | gather_param, |
2486 | valid_params, |
2487 | scan_params)); |
2488 | } |
2489 | } |
2490 | break; |
2491 | |
2492 | case T_Append: |
2493 | { |
2494 | ListCell *l; |
2495 | |
2496 | foreach(l, ((Append *) plan)->appendplans) |
2497 | { |
2498 | context.paramids = |
2499 | bms_add_members(context.paramids, |
2500 | finalize_plan(root, |
2501 | (Plan *) lfirst(l), |
2502 | gather_param, |
2503 | valid_params, |
2504 | scan_params)); |
2505 | } |
2506 | } |
2507 | break; |
2508 | |
2509 | case T_MergeAppend: |
2510 | { |
2511 | ListCell *l; |
2512 | |
2513 | foreach(l, ((MergeAppend *) plan)->mergeplans) |
2514 | { |
2515 | context.paramids = |
2516 | bms_add_members(context.paramids, |
2517 | finalize_plan(root, |
2518 | (Plan *) lfirst(l), |
2519 | gather_param, |
2520 | valid_params, |
2521 | scan_params)); |
2522 | } |
2523 | } |
2524 | break; |
2525 | |
2526 | case T_BitmapAnd: |
2527 | { |
2528 | ListCell *l; |
2529 | |
2530 | foreach(l, ((BitmapAnd *) plan)->bitmapplans) |
2531 | { |
2532 | context.paramids = |
2533 | bms_add_members(context.paramids, |
2534 | finalize_plan(root, |
2535 | (Plan *) lfirst(l), |
2536 | gather_param, |
2537 | valid_params, |
2538 | scan_params)); |
2539 | } |
2540 | } |
2541 | break; |
2542 | |
2543 | case T_BitmapOr: |
2544 | { |
2545 | ListCell *l; |
2546 | |
2547 | foreach(l, ((BitmapOr *) plan)->bitmapplans) |
2548 | { |
2549 | context.paramids = |
2550 | bms_add_members(context.paramids, |
2551 | finalize_plan(root, |
2552 | (Plan *) lfirst(l), |
2553 | gather_param, |
2554 | valid_params, |
2555 | scan_params)); |
2556 | } |
2557 | } |
2558 | break; |
2559 | |
2560 | case T_NestLoop: |
2561 | { |
2562 | ListCell *l; |
2563 | |
2564 | finalize_primnode((Node *) ((Join *) plan)->joinqual, |
2565 | &context); |
2566 | /* collect set of params that will be passed to right child */ |
2567 | foreach(l, ((NestLoop *) plan)->nestParams) |
2568 | { |
2569 | NestLoopParam *nlp = (NestLoopParam *) lfirst(l); |
2570 | |
2571 | nestloop_params = bms_add_member(nestloop_params, |
2572 | nlp->paramno); |
2573 | } |
2574 | } |
2575 | break; |
2576 | |
2577 | case T_MergeJoin: |
2578 | finalize_primnode((Node *) ((Join *) plan)->joinqual, |
2579 | &context); |
2580 | finalize_primnode((Node *) ((MergeJoin *) plan)->mergeclauses, |
2581 | &context); |
2582 | break; |
2583 | |
2584 | case T_HashJoin: |
2585 | finalize_primnode((Node *) ((Join *) plan)->joinqual, |
2586 | &context); |
2587 | finalize_primnode((Node *) ((HashJoin *) plan)->hashclauses, |
2588 | &context); |
2589 | break; |
2590 | |
2591 | case T_Limit: |
2592 | finalize_primnode(((Limit *) plan)->limitOffset, |
2593 | &context); |
2594 | finalize_primnode(((Limit *) plan)->limitCount, |
2595 | &context); |
2596 | break; |
2597 | |
2598 | case T_RecursiveUnion: |
2599 | /* child nodes are allowed to reference wtParam */ |
2600 | locally_added_param = ((RecursiveUnion *) plan)->wtParam; |
2601 | valid_params = bms_add_member(bms_copy(valid_params), |
2602 | locally_added_param); |
2603 | /* wtParam does *not* get added to scan_params */ |
2604 | break; |
2605 | |
2606 | case T_LockRows: |
2607 | /* Force descendant scan nodes to reference epqParam */ |
2608 | locally_added_param = ((LockRows *) plan)->epqParam; |
2609 | valid_params = bms_add_member(bms_copy(valid_params), |
2610 | locally_added_param); |
2611 | scan_params = bms_add_member(bms_copy(scan_params), |
2612 | locally_added_param); |
2613 | break; |
2614 | |
2615 | case T_Agg: |
2616 | { |
2617 | Agg *agg = (Agg *) plan; |
2618 | |
2619 | /* |
2620 | * AGG_HASHED plans need to know which Params are referenced |
2621 | * in aggregate calls. Do a separate scan to identify them. |
2622 | */ |
2623 | if (agg->aggstrategy == AGG_HASHED) |
2624 | { |
2625 | finalize_primnode_context aggcontext; |
2626 | |
2627 | aggcontext.root = root; |
2628 | aggcontext.paramids = NULL; |
2629 | finalize_agg_primnode((Node *) agg->plan.targetlist, |
2630 | &aggcontext); |
2631 | finalize_agg_primnode((Node *) agg->plan.qual, |
2632 | &aggcontext); |
2633 | agg->aggParams = aggcontext.paramids; |
2634 | } |
2635 | } |
2636 | break; |
2637 | |
2638 | case T_WindowAgg: |
2639 | finalize_primnode(((WindowAgg *) plan)->startOffset, |
2640 | &context); |
2641 | finalize_primnode(((WindowAgg *) plan)->endOffset, |
2642 | &context); |
2643 | break; |
2644 | |
2645 | case T_Gather: |
2646 | /* child nodes are allowed to reference rescan_param, if any */ |
2647 | locally_added_param = ((Gather *) plan)->rescan_param; |
2648 | if (locally_added_param >= 0) |
2649 | { |
2650 | valid_params = bms_add_member(bms_copy(valid_params), |
2651 | locally_added_param); |
2652 | |
2653 | /* |
2654 | * We currently don't support nested Gathers. The issue so |
2655 | * far as this function is concerned would be how to identify |
2656 | * which child nodes depend on which Gather. |
2657 | */ |
2658 | Assert(gather_param < 0); |
2659 | /* Pass down rescan_param to child parallel-aware nodes */ |
2660 | gather_param = locally_added_param; |
2661 | } |
2662 | /* rescan_param does *not* get added to scan_params */ |
2663 | break; |
2664 | |
2665 | case T_GatherMerge: |
2666 | /* child nodes are allowed to reference rescan_param, if any */ |
2667 | locally_added_param = ((GatherMerge *) plan)->rescan_param; |
2668 | if (locally_added_param >= 0) |
2669 | { |
2670 | valid_params = bms_add_member(bms_copy(valid_params), |
2671 | locally_added_param); |
2672 | |
2673 | /* |
2674 | * We currently don't support nested Gathers. The issue so |
2675 | * far as this function is concerned would be how to identify |
2676 | * which child nodes depend on which Gather. |
2677 | */ |
2678 | Assert(gather_param < 0); |
2679 | /* Pass down rescan_param to child parallel-aware nodes */ |
2680 | gather_param = locally_added_param; |
2681 | } |
2682 | /* rescan_param does *not* get added to scan_params */ |
2683 | break; |
2684 | |
2685 | case T_ProjectSet: |
2686 | case T_Hash: |
2687 | case T_Material: |
2688 | case T_Sort: |
2689 | case T_Unique: |
2690 | case T_SetOp: |
2691 | case T_Group: |
2692 | /* no node-type-specific fields need fixing */ |
2693 | break; |
2694 | |
2695 | default: |
2696 | elog(ERROR, "unrecognized node type: %d" , |
2697 | (int) nodeTag(plan)); |
2698 | } |
2699 | |
2700 | /* Process left and right child plans, if any */ |
2701 | child_params = finalize_plan(root, |
2702 | plan->lefttree, |
2703 | gather_param, |
2704 | valid_params, |
2705 | scan_params); |
2706 | context.paramids = bms_add_members(context.paramids, child_params); |
2707 | |
2708 | if (nestloop_params) |
2709 | { |
2710 | /* right child can reference nestloop_params as well as valid_params */ |
2711 | child_params = finalize_plan(root, |
2712 | plan->righttree, |
2713 | gather_param, |
2714 | bms_union(nestloop_params, valid_params), |
2715 | scan_params); |
2716 | /* ... and they don't count as parameters used at my level */ |
2717 | child_params = bms_difference(child_params, nestloop_params); |
2718 | bms_free(nestloop_params); |
2719 | } |
2720 | else |
2721 | { |
2722 | /* easy case */ |
2723 | child_params = finalize_plan(root, |
2724 | plan->righttree, |
2725 | gather_param, |
2726 | valid_params, |
2727 | scan_params); |
2728 | } |
2729 | context.paramids = bms_add_members(context.paramids, child_params); |
2730 | |
2731 | /* |
2732 | * Any locally generated parameter doesn't count towards its generating |
2733 | * plan node's external dependencies. (Note: if we changed valid_params |
2734 | * and/or scan_params, we leak those bitmapsets; not worth the notational |
2735 | * trouble to clean them up.) |
2736 | */ |
2737 | if (locally_added_param >= 0) |
2738 | { |
2739 | context.paramids = bms_del_member(context.paramids, |
2740 | locally_added_param); |
2741 | } |
2742 | |
2743 | /* Now we have all the paramids referenced in this node and children */ |
2744 | |
2745 | if (!bms_is_subset(context.paramids, valid_params)) |
2746 | elog(ERROR, "plan should not reference subplan's variable" ); |
2747 | |
2748 | /* |
2749 | * The plan node's allParam and extParam fields should include all its |
2750 | * referenced paramids, plus contributions from any child initPlans. |
2751 | * However, any setParams of the initPlans should not be present in the |
2752 | * parent node's extParams, only in its allParams. (It's possible that |
2753 | * some initPlans have extParams that are setParams of other initPlans.) |
2754 | */ |
2755 | |
2756 | /* allParam must include initplans' extParams and setParams */ |
2757 | plan->allParam = bms_union(context.paramids, initExtParam); |
2758 | plan->allParam = bms_add_members(plan->allParam, initSetParam); |
2759 | /* extParam must include any initplan extParams */ |
2760 | plan->extParam = bms_union(context.paramids, initExtParam); |
2761 | /* but not any initplan setParams */ |
2762 | plan->extParam = bms_del_members(plan->extParam, initSetParam); |
2763 | |
2764 | /* |
2765 | * For speed at execution time, make sure extParam/allParam are actually |
2766 | * NULL if they are empty sets. |
2767 | */ |
2768 | if (bms_is_empty(plan->extParam)) |
2769 | plan->extParam = NULL; |
2770 | if (bms_is_empty(plan->allParam)) |
2771 | plan->allParam = NULL; |
2772 | |
2773 | return plan->allParam; |
2774 | } |
2775 | |
2776 | /* |
2777 | * finalize_primnode: add IDs of all PARAM_EXEC params appearing in the given |
2778 | * expression tree to the result set. |
2779 | */ |
2780 | static bool |
2781 | finalize_primnode(Node *node, finalize_primnode_context *context) |
2782 | { |
2783 | if (node == NULL) |
2784 | return false; |
2785 | if (IsA(node, Param)) |
2786 | { |
2787 | if (((Param *) node)->paramkind == PARAM_EXEC) |
2788 | { |
2789 | int paramid = ((Param *) node)->paramid; |
2790 | |
2791 | context->paramids = bms_add_member(context->paramids, paramid); |
2792 | } |
2793 | return false; /* no more to do here */ |
2794 | } |
2795 | if (IsA(node, SubPlan)) |
2796 | { |
2797 | SubPlan *subplan = (SubPlan *) node; |
2798 | Plan *plan = planner_subplan_get_plan(context->root, subplan); |
2799 | ListCell *lc; |
2800 | Bitmapset *subparamids; |
2801 | |
2802 | /* Recurse into the testexpr, but not into the Plan */ |
2803 | finalize_primnode(subplan->testexpr, context); |
2804 | |
2805 | /* |
2806 | * Remove any param IDs of output parameters of the subplan that were |
2807 | * referenced in the testexpr. These are not interesting for |
2808 | * parameter change signaling since we always re-evaluate the subplan. |
2809 | * Note that this wouldn't work too well if there might be uses of the |
2810 | * same param IDs elsewhere in the plan, but that can't happen because |
2811 | * generate_new_exec_param never tries to merge params. |
2812 | */ |
2813 | foreach(lc, subplan->paramIds) |
2814 | { |
2815 | context->paramids = bms_del_member(context->paramids, |
2816 | lfirst_int(lc)); |
2817 | } |
2818 | |
2819 | /* Also examine args list */ |
2820 | finalize_primnode((Node *) subplan->args, context); |
2821 | |
2822 | /* |
2823 | * Add params needed by the subplan to paramids, but excluding those |
2824 | * we will pass down to it. (We assume SS_finalize_plan was run on |
2825 | * the subplan already.) |
2826 | */ |
2827 | subparamids = bms_copy(plan->extParam); |
2828 | foreach(lc, subplan->parParam) |
2829 | { |
2830 | subparamids = bms_del_member(subparamids, lfirst_int(lc)); |
2831 | } |
2832 | context->paramids = bms_join(context->paramids, subparamids); |
2833 | |
2834 | return false; /* no more to do here */ |
2835 | } |
2836 | return expression_tree_walker(node, finalize_primnode, |
2837 | (void *) context); |
2838 | } |
2839 | |
2840 | /* |
2841 | * finalize_agg_primnode: find all Aggref nodes in the given expression tree, |
2842 | * and add IDs of all PARAM_EXEC params appearing within their aggregated |
2843 | * arguments to the result set. |
2844 | */ |
2845 | static bool |
2846 | finalize_agg_primnode(Node *node, finalize_primnode_context *context) |
2847 | { |
2848 | if (node == NULL) |
2849 | return false; |
2850 | if (IsA(node, Aggref)) |
2851 | { |
2852 | Aggref *agg = (Aggref *) node; |
2853 | |
2854 | /* we should not consider the direct arguments, if any */ |
2855 | finalize_primnode((Node *) agg->args, context); |
2856 | finalize_primnode((Node *) agg->aggfilter, context); |
2857 | return false; /* there can't be any Aggrefs below here */ |
2858 | } |
2859 | return expression_tree_walker(node, finalize_agg_primnode, |
2860 | (void *) context); |
2861 | } |
2862 | |
2863 | /* |
2864 | * SS_make_initplan_output_param - make a Param for an initPlan's output |
2865 | * |
2866 | * The plan is expected to return a scalar value of the given type/collation. |
2867 | * |
2868 | * Note that in some cases the initplan may not ever appear in the finished |
2869 | * plan tree. If that happens, we'll have wasted a PARAM_EXEC slot, which |
2870 | * is no big deal. |
2871 | */ |
2872 | Param * |
2873 | SS_make_initplan_output_param(PlannerInfo *root, |
2874 | Oid resulttype, int32 resulttypmod, |
2875 | Oid resultcollation) |
2876 | { |
2877 | return generate_new_exec_param(root, resulttype, |
2878 | resulttypmod, resultcollation); |
2879 | } |
2880 | |
2881 | /* |
2882 | * SS_make_initplan_from_plan - given a plan tree, make it an InitPlan |
2883 | * |
2884 | * We build an EXPR_SUBLINK SubPlan node and put it into the initplan |
2885 | * list for the outer query level. A Param that represents the initplan's |
2886 | * output has already been assigned using SS_make_initplan_output_param. |
2887 | */ |
2888 | void |
2889 | SS_make_initplan_from_plan(PlannerInfo *root, |
2890 | PlannerInfo *subroot, Plan *plan, |
2891 | Param *prm) |
2892 | { |
2893 | SubPlan *node; |
2894 | |
2895 | /* |
2896 | * Add the subplan and its PlannerInfo to the global lists. |
2897 | */ |
2898 | root->glob->subplans = lappend(root->glob->subplans, plan); |
2899 | root->glob->subroots = lappend(root->glob->subroots, subroot); |
2900 | |
2901 | /* |
2902 | * Create a SubPlan node and add it to the outer list of InitPlans. Note |
2903 | * it has to appear after any other InitPlans it might depend on (see |
2904 | * comments in ExecReScan). |
2905 | */ |
2906 | node = makeNode(SubPlan); |
2907 | node->subLinkType = EXPR_SUBLINK; |
2908 | node->plan_id = list_length(root->glob->subplans); |
2909 | node->plan_name = psprintf("InitPlan %d (returns $%d)" , |
2910 | node->plan_id, prm->paramid); |
2911 | get_first_col_type(plan, &node->firstColType, &node->firstColTypmod, |
2912 | &node->firstColCollation); |
2913 | node->setParam = list_make1_int(prm->paramid); |
2914 | |
2915 | root->init_plans = lappend(root->init_plans, node); |
2916 | |
2917 | /* |
2918 | * The node can't have any inputs (since it's an initplan), so the |
2919 | * parParam and args lists remain empty. |
2920 | */ |
2921 | |
2922 | /* Set costs of SubPlan using info from the plan tree */ |
2923 | cost_subplan(subroot, node, plan); |
2924 | } |
2925 | |