1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * parse_cte.c |
4 | * handle CTEs (common table expressions) in parser |
5 | * |
6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
7 | * Portions Copyright (c) 1994, Regents of the University of California |
8 | * |
9 | * |
10 | * IDENTIFICATION |
11 | * src/backend/parser/parse_cte.c |
12 | * |
13 | *------------------------------------------------------------------------- |
14 | */ |
15 | #include "postgres.h" |
16 | |
17 | #include "catalog/pg_collation.h" |
18 | #include "catalog/pg_type.h" |
19 | #include "nodes/nodeFuncs.h" |
20 | #include "parser/analyze.h" |
21 | #include "parser/parse_cte.h" |
22 | #include "utils/builtins.h" |
23 | #include "utils/lsyscache.h" |
24 | |
25 | |
26 | /* Enumeration of contexts in which a self-reference is disallowed */ |
27 | typedef enum |
28 | { |
29 | RECURSION_OK, |
30 | RECURSION_NONRECURSIVETERM, /* inside the left-hand term */ |
31 | RECURSION_SUBLINK, /* inside a sublink */ |
32 | RECURSION_OUTERJOIN, /* inside nullable side of an outer join */ |
33 | RECURSION_INTERSECT, /* underneath INTERSECT (ALL) */ |
34 | RECURSION_EXCEPT /* underneath EXCEPT (ALL) */ |
35 | } RecursionContext; |
36 | |
37 | /* Associated error messages --- each must have one %s for CTE name */ |
38 | static const char *const recursion_errormsgs[] = { |
39 | /* RECURSION_OK */ |
40 | NULL, |
41 | /* RECURSION_NONRECURSIVETERM */ |
42 | gettext_noop("recursive reference to query \"%s\" must not appear within its non-recursive term" ), |
43 | /* RECURSION_SUBLINK */ |
44 | gettext_noop("recursive reference to query \"%s\" must not appear within a subquery" ), |
45 | /* RECURSION_OUTERJOIN */ |
46 | gettext_noop("recursive reference to query \"%s\" must not appear within an outer join" ), |
47 | /* RECURSION_INTERSECT */ |
48 | gettext_noop("recursive reference to query \"%s\" must not appear within INTERSECT" ), |
49 | /* RECURSION_EXCEPT */ |
50 | gettext_noop("recursive reference to query \"%s\" must not appear within EXCEPT" ) |
51 | }; |
52 | |
53 | /* |
54 | * For WITH RECURSIVE, we have to find an ordering of the clause members |
55 | * with no forward references, and determine which members are recursive |
56 | * (i.e., self-referential). It is convenient to do this with an array |
57 | * of CteItems instead of a list of CommonTableExprs. |
58 | */ |
59 | typedef struct CteItem |
60 | { |
61 | CommonTableExpr *cte; /* One CTE to examine */ |
62 | int id; /* Its ID number for dependencies */ |
63 | Bitmapset *depends_on; /* CTEs depended on (not including self) */ |
64 | } CteItem; |
65 | |
66 | /* CteState is what we need to pass around in the tree walkers */ |
67 | typedef struct CteState |
68 | { |
69 | /* global state: */ |
70 | ParseState *pstate; /* global parse state */ |
71 | CteItem *items; /* array of CTEs and extra data */ |
72 | int numitems; /* number of CTEs */ |
73 | /* working state during a tree walk: */ |
74 | int curitem; /* index of item currently being examined */ |
75 | List *innerwiths; /* list of lists of CommonTableExpr */ |
76 | /* working state for checkWellFormedRecursion walk only: */ |
77 | int selfrefcount; /* number of self-references detected */ |
78 | RecursionContext context; /* context to allow or disallow self-ref */ |
79 | } CteState; |
80 | |
81 | |
82 | static void analyzeCTE(ParseState *pstate, CommonTableExpr *cte); |
83 | |
84 | /* Dependency processing functions */ |
85 | static void makeDependencyGraph(CteState *cstate); |
86 | static bool makeDependencyGraphWalker(Node *node, CteState *cstate); |
87 | static void TopologicalSort(ParseState *pstate, CteItem *items, int numitems); |
88 | |
89 | /* Recursion validity checker functions */ |
90 | static void checkWellFormedRecursion(CteState *cstate); |
91 | static bool checkWellFormedRecursionWalker(Node *node, CteState *cstate); |
92 | static void checkWellFormedSelectStmt(SelectStmt *stmt, CteState *cstate); |
93 | |
94 | |
95 | /* |
96 | * transformWithClause - |
97 | * Transform the list of WITH clause "common table expressions" into |
98 | * Query nodes. |
99 | * |
100 | * The result is the list of transformed CTEs to be put into the output |
101 | * Query. (This is in fact the same as the ending value of p_ctenamespace, |
102 | * but it seems cleaner to not expose that in the function's API.) |
103 | */ |
104 | List * |
105 | transformWithClause(ParseState *pstate, WithClause *withClause) |
106 | { |
107 | ListCell *lc; |
108 | |
109 | /* Only one WITH clause per query level */ |
110 | Assert(pstate->p_ctenamespace == NIL); |
111 | Assert(pstate->p_future_ctes == NIL); |
112 | |
113 | /* |
114 | * For either type of WITH, there must not be duplicate CTE names in the |
115 | * list. Check this right away so we needn't worry later. |
116 | * |
117 | * Also, tentatively mark each CTE as non-recursive, and initialize its |
118 | * reference count to zero, and set pstate->p_hasModifyingCTE if needed. |
119 | */ |
120 | foreach(lc, withClause->ctes) |
121 | { |
122 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); |
123 | ListCell *rest; |
124 | |
125 | for_each_cell(rest, lnext(lc)) |
126 | { |
127 | CommonTableExpr *cte2 = (CommonTableExpr *) lfirst(rest); |
128 | |
129 | if (strcmp(cte->ctename, cte2->ctename) == 0) |
130 | ereport(ERROR, |
131 | (errcode(ERRCODE_DUPLICATE_ALIAS), |
132 | errmsg("WITH query name \"%s\" specified more than once" , |
133 | cte2->ctename), |
134 | parser_errposition(pstate, cte2->location))); |
135 | } |
136 | |
137 | cte->cterecursive = false; |
138 | cte->cterefcount = 0; |
139 | |
140 | if (!IsA(cte->ctequery, SelectStmt)) |
141 | { |
142 | /* must be a data-modifying statement */ |
143 | Assert(IsA(cte->ctequery, InsertStmt) || |
144 | IsA(cte->ctequery, UpdateStmt) || |
145 | IsA(cte->ctequery, DeleteStmt)); |
146 | |
147 | pstate->p_hasModifyingCTE = true; |
148 | } |
149 | } |
150 | |
151 | if (withClause->recursive) |
152 | { |
153 | /* |
154 | * For WITH RECURSIVE, we rearrange the list elements if needed to |
155 | * eliminate forward references. First, build a work array and set up |
156 | * the data structure needed by the tree walkers. |
157 | */ |
158 | CteState cstate; |
159 | int i; |
160 | |
161 | cstate.pstate = pstate; |
162 | cstate.numitems = list_length(withClause->ctes); |
163 | cstate.items = (CteItem *) palloc0(cstate.numitems * sizeof(CteItem)); |
164 | i = 0; |
165 | foreach(lc, withClause->ctes) |
166 | { |
167 | cstate.items[i].cte = (CommonTableExpr *) lfirst(lc); |
168 | cstate.items[i].id = i; |
169 | i++; |
170 | } |
171 | |
172 | /* |
173 | * Find all the dependencies and sort the CteItems into a safe |
174 | * processing order. Also, mark CTEs that contain self-references. |
175 | */ |
176 | makeDependencyGraph(&cstate); |
177 | |
178 | /* |
179 | * Check that recursive queries are well-formed. |
180 | */ |
181 | checkWellFormedRecursion(&cstate); |
182 | |
183 | /* |
184 | * Set up the ctenamespace for parse analysis. Per spec, all the WITH |
185 | * items are visible to all others, so stuff them all in before parse |
186 | * analysis. We build the list in safe processing order so that the |
187 | * planner can process the queries in sequence. |
188 | */ |
189 | for (i = 0; i < cstate.numitems; i++) |
190 | { |
191 | CommonTableExpr *cte = cstate.items[i].cte; |
192 | |
193 | pstate->p_ctenamespace = lappend(pstate->p_ctenamespace, cte); |
194 | } |
195 | |
196 | /* |
197 | * Do parse analysis in the order determined by the topological sort. |
198 | */ |
199 | for (i = 0; i < cstate.numitems; i++) |
200 | { |
201 | CommonTableExpr *cte = cstate.items[i].cte; |
202 | |
203 | analyzeCTE(pstate, cte); |
204 | } |
205 | } |
206 | else |
207 | { |
208 | /* |
209 | * For non-recursive WITH, just analyze each CTE in sequence and then |
210 | * add it to the ctenamespace. This corresponds to the spec's |
211 | * definition of the scope of each WITH name. However, to allow error |
212 | * reports to be aware of the possibility of an erroneous reference, |
213 | * we maintain a list in p_future_ctes of the not-yet-visible CTEs. |
214 | */ |
215 | pstate->p_future_ctes = list_copy(withClause->ctes); |
216 | |
217 | foreach(lc, withClause->ctes) |
218 | { |
219 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); |
220 | |
221 | analyzeCTE(pstate, cte); |
222 | pstate->p_ctenamespace = lappend(pstate->p_ctenamespace, cte); |
223 | pstate->p_future_ctes = list_delete_first(pstate->p_future_ctes); |
224 | } |
225 | } |
226 | |
227 | return pstate->p_ctenamespace; |
228 | } |
229 | |
230 | |
231 | /* |
232 | * Perform the actual parse analysis transformation of one CTE. All |
233 | * CTEs it depends on have already been loaded into pstate->p_ctenamespace, |
234 | * and have been marked with the correct output column names/types. |
235 | */ |
236 | static void |
237 | analyzeCTE(ParseState *pstate, CommonTableExpr *cte) |
238 | { |
239 | Query *query; |
240 | |
241 | /* Analysis not done already */ |
242 | Assert(!IsA(cte->ctequery, Query)); |
243 | |
244 | query = parse_sub_analyze(cte->ctequery, pstate, cte, false, true); |
245 | cte->ctequery = (Node *) query; |
246 | |
247 | /* |
248 | * Check that we got something reasonable. These first two cases should |
249 | * be prevented by the grammar. |
250 | */ |
251 | if (!IsA(query, Query)) |
252 | elog(ERROR, "unexpected non-Query statement in WITH" ); |
253 | if (query->utilityStmt != NULL) |
254 | elog(ERROR, "unexpected utility statement in WITH" ); |
255 | |
256 | /* |
257 | * We disallow data-modifying WITH except at the top level of a query, |
258 | * because it's not clear when such a modification should be executed. |
259 | */ |
260 | if (query->commandType != CMD_SELECT && |
261 | pstate->parentParseState != NULL) |
262 | ereport(ERROR, |
263 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
264 | errmsg("WITH clause containing a data-modifying statement must be at the top level" ), |
265 | parser_errposition(pstate, cte->location))); |
266 | |
267 | /* |
268 | * CTE queries are always marked not canSetTag. (Currently this only |
269 | * matters for data-modifying statements, for which the flag will be |
270 | * propagated to the ModifyTable plan node.) |
271 | */ |
272 | query->canSetTag = false; |
273 | |
274 | if (!cte->cterecursive) |
275 | { |
276 | /* Compute the output column names/types if not done yet */ |
277 | analyzeCTETargetList(pstate, cte, GetCTETargetList(cte)); |
278 | } |
279 | else |
280 | { |
281 | /* |
282 | * Verify that the previously determined output column types and |
283 | * collations match what the query really produced. We have to check |
284 | * this because the recursive term could have overridden the |
285 | * non-recursive term, and we don't have any easy way to fix that. |
286 | */ |
287 | ListCell *lctlist, |
288 | *lctyp, |
289 | *lctypmod, |
290 | *lccoll; |
291 | int varattno; |
292 | |
293 | lctyp = list_head(cte->ctecoltypes); |
294 | lctypmod = list_head(cte->ctecoltypmods); |
295 | lccoll = list_head(cte->ctecolcollations); |
296 | varattno = 0; |
297 | foreach(lctlist, GetCTETargetList(cte)) |
298 | { |
299 | TargetEntry *te = (TargetEntry *) lfirst(lctlist); |
300 | Node *texpr; |
301 | |
302 | if (te->resjunk) |
303 | continue; |
304 | varattno++; |
305 | Assert(varattno == te->resno); |
306 | if (lctyp == NULL || lctypmod == NULL || lccoll == NULL) /* shouldn't happen */ |
307 | elog(ERROR, "wrong number of output columns in WITH" ); |
308 | texpr = (Node *) te->expr; |
309 | if (exprType(texpr) != lfirst_oid(lctyp) || |
310 | exprTypmod(texpr) != lfirst_int(lctypmod)) |
311 | ereport(ERROR, |
312 | (errcode(ERRCODE_DATATYPE_MISMATCH), |
313 | errmsg("recursive query \"%s\" column %d has type %s in non-recursive term but type %s overall" , |
314 | cte->ctename, varattno, |
315 | format_type_with_typemod(lfirst_oid(lctyp), |
316 | lfirst_int(lctypmod)), |
317 | format_type_with_typemod(exprType(texpr), |
318 | exprTypmod(texpr))), |
319 | errhint("Cast the output of the non-recursive term to the correct type." ), |
320 | parser_errposition(pstate, exprLocation(texpr)))); |
321 | if (exprCollation(texpr) != lfirst_oid(lccoll)) |
322 | ereport(ERROR, |
323 | (errcode(ERRCODE_COLLATION_MISMATCH), |
324 | errmsg("recursive query \"%s\" column %d has collation \"%s\" in non-recursive term but collation \"%s\" overall" , |
325 | cte->ctename, varattno, |
326 | get_collation_name(lfirst_oid(lccoll)), |
327 | get_collation_name(exprCollation(texpr))), |
328 | errhint("Use the COLLATE clause to set the collation of the non-recursive term." ), |
329 | parser_errposition(pstate, exprLocation(texpr)))); |
330 | lctyp = lnext(lctyp); |
331 | lctypmod = lnext(lctypmod); |
332 | lccoll = lnext(lccoll); |
333 | } |
334 | if (lctyp != NULL || lctypmod != NULL || lccoll != NULL) /* shouldn't happen */ |
335 | elog(ERROR, "wrong number of output columns in WITH" ); |
336 | } |
337 | } |
338 | |
339 | /* |
340 | * Compute derived fields of a CTE, given the transformed output targetlist |
341 | * |
342 | * For a nonrecursive CTE, this is called after transforming the CTE's query. |
343 | * For a recursive CTE, we call it after transforming the non-recursive term, |
344 | * and pass the targetlist emitted by the non-recursive term only. |
345 | * |
346 | * Note: in the recursive case, the passed pstate is actually the one being |
347 | * used to analyze the CTE's query, so it is one level lower down than in |
348 | * the nonrecursive case. This doesn't matter since we only use it for |
349 | * error message context anyway. |
350 | */ |
351 | void |
352 | analyzeCTETargetList(ParseState *pstate, CommonTableExpr *cte, List *tlist) |
353 | { |
354 | int numaliases; |
355 | int varattno; |
356 | ListCell *tlistitem; |
357 | |
358 | /* Not done already ... */ |
359 | Assert(cte->ctecolnames == NIL); |
360 | |
361 | /* |
362 | * We need to determine column names, types, and collations. The alias |
363 | * column names override anything coming from the query itself. (Note: |
364 | * the SQL spec says that the alias list must be empty or exactly as long |
365 | * as the output column set; but we allow it to be shorter for consistency |
366 | * with Alias handling.) |
367 | */ |
368 | cte->ctecolnames = copyObject(cte->aliascolnames); |
369 | cte->ctecoltypes = cte->ctecoltypmods = cte->ctecolcollations = NIL; |
370 | numaliases = list_length(cte->aliascolnames); |
371 | varattno = 0; |
372 | foreach(tlistitem, tlist) |
373 | { |
374 | TargetEntry *te = (TargetEntry *) lfirst(tlistitem); |
375 | Oid coltype; |
376 | int32 coltypmod; |
377 | Oid colcoll; |
378 | |
379 | if (te->resjunk) |
380 | continue; |
381 | varattno++; |
382 | Assert(varattno == te->resno); |
383 | if (varattno > numaliases) |
384 | { |
385 | char *attrname; |
386 | |
387 | attrname = pstrdup(te->resname); |
388 | cte->ctecolnames = lappend(cte->ctecolnames, makeString(attrname)); |
389 | } |
390 | coltype = exprType((Node *) te->expr); |
391 | coltypmod = exprTypmod((Node *) te->expr); |
392 | colcoll = exprCollation((Node *) te->expr); |
393 | |
394 | /* |
395 | * If the CTE is recursive, force the exposed column type of any |
396 | * "unknown" column to "text". We must deal with this here because |
397 | * we're called on the non-recursive term before there's been any |
398 | * attempt to force unknown output columns to some other type. We |
399 | * have to resolve unknowns before looking at the recursive term. |
400 | * |
401 | * The column might contain 'foo' COLLATE "bar", so don't override |
402 | * collation if it's already set. |
403 | */ |
404 | if (cte->cterecursive && coltype == UNKNOWNOID) |
405 | { |
406 | coltype = TEXTOID; |
407 | coltypmod = -1; /* should be -1 already, but be sure */ |
408 | if (!OidIsValid(colcoll)) |
409 | colcoll = DEFAULT_COLLATION_OID; |
410 | } |
411 | cte->ctecoltypes = lappend_oid(cte->ctecoltypes, coltype); |
412 | cte->ctecoltypmods = lappend_int(cte->ctecoltypmods, coltypmod); |
413 | cte->ctecolcollations = lappend_oid(cte->ctecolcollations, colcoll); |
414 | } |
415 | if (varattno < numaliases) |
416 | ereport(ERROR, |
417 | (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), |
418 | errmsg("WITH query \"%s\" has %d columns available but %d columns specified" , |
419 | cte->ctename, varattno, numaliases), |
420 | parser_errposition(pstate, cte->location))); |
421 | } |
422 | |
423 | |
424 | /* |
425 | * Identify the cross-references of a list of WITH RECURSIVE items, |
426 | * and sort into an order that has no forward references. |
427 | */ |
428 | static void |
429 | makeDependencyGraph(CteState *cstate) |
430 | { |
431 | int i; |
432 | |
433 | for (i = 0; i < cstate->numitems; i++) |
434 | { |
435 | CommonTableExpr *cte = cstate->items[i].cte; |
436 | |
437 | cstate->curitem = i; |
438 | cstate->innerwiths = NIL; |
439 | makeDependencyGraphWalker((Node *) cte->ctequery, cstate); |
440 | Assert(cstate->innerwiths == NIL); |
441 | } |
442 | |
443 | TopologicalSort(cstate->pstate, cstate->items, cstate->numitems); |
444 | } |
445 | |
446 | /* |
447 | * Tree walker function to detect cross-references and self-references of the |
448 | * CTEs in a WITH RECURSIVE list. |
449 | */ |
450 | static bool |
451 | makeDependencyGraphWalker(Node *node, CteState *cstate) |
452 | { |
453 | if (node == NULL) |
454 | return false; |
455 | if (IsA(node, RangeVar)) |
456 | { |
457 | RangeVar *rv = (RangeVar *) node; |
458 | |
459 | /* If unqualified name, might be a CTE reference */ |
460 | if (!rv->schemaname) |
461 | { |
462 | ListCell *lc; |
463 | int i; |
464 | |
465 | /* ... but first see if it's captured by an inner WITH */ |
466 | foreach(lc, cstate->innerwiths) |
467 | { |
468 | List *withlist = (List *) lfirst(lc); |
469 | ListCell *lc2; |
470 | |
471 | foreach(lc2, withlist) |
472 | { |
473 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc2); |
474 | |
475 | if (strcmp(rv->relname, cte->ctename) == 0) |
476 | return false; /* yes, so bail out */ |
477 | } |
478 | } |
479 | |
480 | /* No, could be a reference to the query level we are working on */ |
481 | for (i = 0; i < cstate->numitems; i++) |
482 | { |
483 | CommonTableExpr *cte = cstate->items[i].cte; |
484 | |
485 | if (strcmp(rv->relname, cte->ctename) == 0) |
486 | { |
487 | int myindex = cstate->curitem; |
488 | |
489 | if (i != myindex) |
490 | { |
491 | /* Add cross-item dependency */ |
492 | cstate->items[myindex].depends_on = |
493 | bms_add_member(cstate->items[myindex].depends_on, |
494 | cstate->items[i].id); |
495 | } |
496 | else |
497 | { |
498 | /* Found out this one is self-referential */ |
499 | cte->cterecursive = true; |
500 | } |
501 | break; |
502 | } |
503 | } |
504 | } |
505 | return false; |
506 | } |
507 | if (IsA(node, SelectStmt)) |
508 | { |
509 | SelectStmt *stmt = (SelectStmt *) node; |
510 | ListCell *lc; |
511 | |
512 | if (stmt->withClause) |
513 | { |
514 | if (stmt->withClause->recursive) |
515 | { |
516 | /* |
517 | * In the RECURSIVE case, all query names of the WITH are |
518 | * visible to all WITH items as well as the main query. So |
519 | * push them all on, process, pop them all off. |
520 | */ |
521 | cstate->innerwiths = lcons(stmt->withClause->ctes, |
522 | cstate->innerwiths); |
523 | foreach(lc, stmt->withClause->ctes) |
524 | { |
525 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); |
526 | |
527 | (void) makeDependencyGraphWalker(cte->ctequery, cstate); |
528 | } |
529 | (void) raw_expression_tree_walker(node, |
530 | makeDependencyGraphWalker, |
531 | (void *) cstate); |
532 | cstate->innerwiths = list_delete_first(cstate->innerwiths); |
533 | } |
534 | else |
535 | { |
536 | /* |
537 | * In the non-RECURSIVE case, query names are visible to the |
538 | * WITH items after them and to the main query. |
539 | */ |
540 | ListCell *cell1; |
541 | |
542 | cstate->innerwiths = lcons(NIL, cstate->innerwiths); |
543 | cell1 = list_head(cstate->innerwiths); |
544 | foreach(lc, stmt->withClause->ctes) |
545 | { |
546 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); |
547 | |
548 | (void) makeDependencyGraphWalker(cte->ctequery, cstate); |
549 | lfirst(cell1) = lappend((List *) lfirst(cell1), cte); |
550 | } |
551 | (void) raw_expression_tree_walker(node, |
552 | makeDependencyGraphWalker, |
553 | (void *) cstate); |
554 | cstate->innerwiths = list_delete_first(cstate->innerwiths); |
555 | } |
556 | /* We're done examining the SelectStmt */ |
557 | return false; |
558 | } |
559 | /* if no WITH clause, just fall through for normal processing */ |
560 | } |
561 | if (IsA(node, WithClause)) |
562 | { |
563 | /* |
564 | * Prevent raw_expression_tree_walker from recursing directly into a |
565 | * WITH clause. We need that to happen only under the control of the |
566 | * code above. |
567 | */ |
568 | return false; |
569 | } |
570 | return raw_expression_tree_walker(node, |
571 | makeDependencyGraphWalker, |
572 | (void *) cstate); |
573 | } |
574 | |
575 | /* |
576 | * Sort by dependencies, using a standard topological sort operation |
577 | */ |
578 | static void |
579 | TopologicalSort(ParseState *pstate, CteItem *items, int numitems) |
580 | { |
581 | int i, |
582 | j; |
583 | |
584 | /* for each position in sequence ... */ |
585 | for (i = 0; i < numitems; i++) |
586 | { |
587 | /* ... scan the remaining items to find one that has no dependencies */ |
588 | for (j = i; j < numitems; j++) |
589 | { |
590 | if (bms_is_empty(items[j].depends_on)) |
591 | break; |
592 | } |
593 | |
594 | /* if we didn't find one, the dependency graph has a cycle */ |
595 | if (j >= numitems) |
596 | ereport(ERROR, |
597 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
598 | errmsg("mutual recursion between WITH items is not implemented" ), |
599 | parser_errposition(pstate, items[i].cte->location))); |
600 | |
601 | /* |
602 | * Found one. Move it to front and remove it from every other item's |
603 | * dependencies. |
604 | */ |
605 | if (i != j) |
606 | { |
607 | CteItem tmp; |
608 | |
609 | tmp = items[i]; |
610 | items[i] = items[j]; |
611 | items[j] = tmp; |
612 | } |
613 | |
614 | /* |
615 | * Items up through i are known to have no dependencies left, so we |
616 | * can skip them in this loop. |
617 | */ |
618 | for (j = i + 1; j < numitems; j++) |
619 | { |
620 | items[j].depends_on = bms_del_member(items[j].depends_on, |
621 | items[i].id); |
622 | } |
623 | } |
624 | } |
625 | |
626 | |
627 | /* |
628 | * Check that recursive queries are well-formed. |
629 | */ |
630 | static void |
631 | checkWellFormedRecursion(CteState *cstate) |
632 | { |
633 | int i; |
634 | |
635 | for (i = 0; i < cstate->numitems; i++) |
636 | { |
637 | CommonTableExpr *cte = cstate->items[i].cte; |
638 | SelectStmt *stmt = (SelectStmt *) cte->ctequery; |
639 | |
640 | Assert(!IsA(stmt, Query)); /* not analyzed yet */ |
641 | |
642 | /* Ignore items that weren't found to be recursive */ |
643 | if (!cte->cterecursive) |
644 | continue; |
645 | |
646 | /* Must be a SELECT statement */ |
647 | if (!IsA(stmt, SelectStmt)) |
648 | ereport(ERROR, |
649 | (errcode(ERRCODE_INVALID_RECURSION), |
650 | errmsg("recursive query \"%s\" must not contain data-modifying statements" , |
651 | cte->ctename), |
652 | parser_errposition(cstate->pstate, cte->location))); |
653 | |
654 | /* Must have top-level UNION */ |
655 | if (stmt->op != SETOP_UNION) |
656 | ereport(ERROR, |
657 | (errcode(ERRCODE_INVALID_RECURSION), |
658 | errmsg("recursive query \"%s\" does not have the form non-recursive-term UNION [ALL] recursive-term" , |
659 | cte->ctename), |
660 | parser_errposition(cstate->pstate, cte->location))); |
661 | |
662 | /* The left-hand operand mustn't contain self-reference at all */ |
663 | cstate->curitem = i; |
664 | cstate->innerwiths = NIL; |
665 | cstate->selfrefcount = 0; |
666 | cstate->context = RECURSION_NONRECURSIVETERM; |
667 | checkWellFormedRecursionWalker((Node *) stmt->larg, cstate); |
668 | Assert(cstate->innerwiths == NIL); |
669 | |
670 | /* Right-hand operand should contain one reference in a valid place */ |
671 | cstate->curitem = i; |
672 | cstate->innerwiths = NIL; |
673 | cstate->selfrefcount = 0; |
674 | cstate->context = RECURSION_OK; |
675 | checkWellFormedRecursionWalker((Node *) stmt->rarg, cstate); |
676 | Assert(cstate->innerwiths == NIL); |
677 | if (cstate->selfrefcount != 1) /* shouldn't happen */ |
678 | elog(ERROR, "missing recursive reference" ); |
679 | |
680 | /* WITH mustn't contain self-reference, either */ |
681 | if (stmt->withClause) |
682 | { |
683 | cstate->curitem = i; |
684 | cstate->innerwiths = NIL; |
685 | cstate->selfrefcount = 0; |
686 | cstate->context = RECURSION_SUBLINK; |
687 | checkWellFormedRecursionWalker((Node *) stmt->withClause->ctes, |
688 | cstate); |
689 | Assert(cstate->innerwiths == NIL); |
690 | } |
691 | |
692 | /* |
693 | * Disallow ORDER BY and similar decoration atop the UNION. These |
694 | * don't make sense because it's impossible to figure out what they |
695 | * mean when we have only part of the recursive query's results. (If |
696 | * we did allow them, we'd have to check for recursive references |
697 | * inside these subtrees.) |
698 | */ |
699 | if (stmt->sortClause) |
700 | ereport(ERROR, |
701 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
702 | errmsg("ORDER BY in a recursive query is not implemented" ), |
703 | parser_errposition(cstate->pstate, |
704 | exprLocation((Node *) stmt->sortClause)))); |
705 | if (stmt->limitOffset) |
706 | ereport(ERROR, |
707 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
708 | errmsg("OFFSET in a recursive query is not implemented" ), |
709 | parser_errposition(cstate->pstate, |
710 | exprLocation(stmt->limitOffset)))); |
711 | if (stmt->limitCount) |
712 | ereport(ERROR, |
713 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
714 | errmsg("LIMIT in a recursive query is not implemented" ), |
715 | parser_errposition(cstate->pstate, |
716 | exprLocation(stmt->limitCount)))); |
717 | if (stmt->lockingClause) |
718 | ereport(ERROR, |
719 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
720 | errmsg("FOR UPDATE/SHARE in a recursive query is not implemented" ), |
721 | parser_errposition(cstate->pstate, |
722 | exprLocation((Node *) stmt->lockingClause)))); |
723 | } |
724 | } |
725 | |
726 | /* |
727 | * Tree walker function to detect invalid self-references in a recursive query. |
728 | */ |
729 | static bool |
730 | checkWellFormedRecursionWalker(Node *node, CteState *cstate) |
731 | { |
732 | RecursionContext save_context = cstate->context; |
733 | |
734 | if (node == NULL) |
735 | return false; |
736 | if (IsA(node, RangeVar)) |
737 | { |
738 | RangeVar *rv = (RangeVar *) node; |
739 | |
740 | /* If unqualified name, might be a CTE reference */ |
741 | if (!rv->schemaname) |
742 | { |
743 | ListCell *lc; |
744 | CommonTableExpr *mycte; |
745 | |
746 | /* ... but first see if it's captured by an inner WITH */ |
747 | foreach(lc, cstate->innerwiths) |
748 | { |
749 | List *withlist = (List *) lfirst(lc); |
750 | ListCell *lc2; |
751 | |
752 | foreach(lc2, withlist) |
753 | { |
754 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc2); |
755 | |
756 | if (strcmp(rv->relname, cte->ctename) == 0) |
757 | return false; /* yes, so bail out */ |
758 | } |
759 | } |
760 | |
761 | /* No, could be a reference to the query level we are working on */ |
762 | mycte = cstate->items[cstate->curitem].cte; |
763 | if (strcmp(rv->relname, mycte->ctename) == 0) |
764 | { |
765 | /* Found a recursive reference to the active query */ |
766 | if (cstate->context != RECURSION_OK) |
767 | ereport(ERROR, |
768 | (errcode(ERRCODE_INVALID_RECURSION), |
769 | errmsg(recursion_errormsgs[cstate->context], |
770 | mycte->ctename), |
771 | parser_errposition(cstate->pstate, |
772 | rv->location))); |
773 | /* Count references */ |
774 | if (++(cstate->selfrefcount) > 1) |
775 | ereport(ERROR, |
776 | (errcode(ERRCODE_INVALID_RECURSION), |
777 | errmsg("recursive reference to query \"%s\" must not appear more than once" , |
778 | mycte->ctename), |
779 | parser_errposition(cstate->pstate, |
780 | rv->location))); |
781 | } |
782 | } |
783 | return false; |
784 | } |
785 | if (IsA(node, SelectStmt)) |
786 | { |
787 | SelectStmt *stmt = (SelectStmt *) node; |
788 | ListCell *lc; |
789 | |
790 | if (stmt->withClause) |
791 | { |
792 | if (stmt->withClause->recursive) |
793 | { |
794 | /* |
795 | * In the RECURSIVE case, all query names of the WITH are |
796 | * visible to all WITH items as well as the main query. So |
797 | * push them all on, process, pop them all off. |
798 | */ |
799 | cstate->innerwiths = lcons(stmt->withClause->ctes, |
800 | cstate->innerwiths); |
801 | foreach(lc, stmt->withClause->ctes) |
802 | { |
803 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); |
804 | |
805 | (void) checkWellFormedRecursionWalker(cte->ctequery, cstate); |
806 | } |
807 | checkWellFormedSelectStmt(stmt, cstate); |
808 | cstate->innerwiths = list_delete_first(cstate->innerwiths); |
809 | } |
810 | else |
811 | { |
812 | /* |
813 | * In the non-RECURSIVE case, query names are visible to the |
814 | * WITH items after them and to the main query. |
815 | */ |
816 | ListCell *cell1; |
817 | |
818 | cstate->innerwiths = lcons(NIL, cstate->innerwiths); |
819 | cell1 = list_head(cstate->innerwiths); |
820 | foreach(lc, stmt->withClause->ctes) |
821 | { |
822 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); |
823 | |
824 | (void) checkWellFormedRecursionWalker(cte->ctequery, cstate); |
825 | lfirst(cell1) = lappend((List *) lfirst(cell1), cte); |
826 | } |
827 | checkWellFormedSelectStmt(stmt, cstate); |
828 | cstate->innerwiths = list_delete_first(cstate->innerwiths); |
829 | } |
830 | } |
831 | else |
832 | checkWellFormedSelectStmt(stmt, cstate); |
833 | /* We're done examining the SelectStmt */ |
834 | return false; |
835 | } |
836 | if (IsA(node, WithClause)) |
837 | { |
838 | /* |
839 | * Prevent raw_expression_tree_walker from recursing directly into a |
840 | * WITH clause. We need that to happen only under the control of the |
841 | * code above. |
842 | */ |
843 | return false; |
844 | } |
845 | if (IsA(node, JoinExpr)) |
846 | { |
847 | JoinExpr *j = (JoinExpr *) node; |
848 | |
849 | switch (j->jointype) |
850 | { |
851 | case JOIN_INNER: |
852 | checkWellFormedRecursionWalker(j->larg, cstate); |
853 | checkWellFormedRecursionWalker(j->rarg, cstate); |
854 | checkWellFormedRecursionWalker(j->quals, cstate); |
855 | break; |
856 | case JOIN_LEFT: |
857 | checkWellFormedRecursionWalker(j->larg, cstate); |
858 | if (save_context == RECURSION_OK) |
859 | cstate->context = RECURSION_OUTERJOIN; |
860 | checkWellFormedRecursionWalker(j->rarg, cstate); |
861 | cstate->context = save_context; |
862 | checkWellFormedRecursionWalker(j->quals, cstate); |
863 | break; |
864 | case JOIN_FULL: |
865 | if (save_context == RECURSION_OK) |
866 | cstate->context = RECURSION_OUTERJOIN; |
867 | checkWellFormedRecursionWalker(j->larg, cstate); |
868 | checkWellFormedRecursionWalker(j->rarg, cstate); |
869 | cstate->context = save_context; |
870 | checkWellFormedRecursionWalker(j->quals, cstate); |
871 | break; |
872 | case JOIN_RIGHT: |
873 | if (save_context == RECURSION_OK) |
874 | cstate->context = RECURSION_OUTERJOIN; |
875 | checkWellFormedRecursionWalker(j->larg, cstate); |
876 | cstate->context = save_context; |
877 | checkWellFormedRecursionWalker(j->rarg, cstate); |
878 | checkWellFormedRecursionWalker(j->quals, cstate); |
879 | break; |
880 | default: |
881 | elog(ERROR, "unrecognized join type: %d" , |
882 | (int) j->jointype); |
883 | } |
884 | return false; |
885 | } |
886 | if (IsA(node, SubLink)) |
887 | { |
888 | SubLink *sl = (SubLink *) node; |
889 | |
890 | /* |
891 | * we intentionally override outer context, since subquery is |
892 | * independent |
893 | */ |
894 | cstate->context = RECURSION_SUBLINK; |
895 | checkWellFormedRecursionWalker(sl->subselect, cstate); |
896 | cstate->context = save_context; |
897 | checkWellFormedRecursionWalker(sl->testexpr, cstate); |
898 | return false; |
899 | } |
900 | return raw_expression_tree_walker(node, |
901 | checkWellFormedRecursionWalker, |
902 | (void *) cstate); |
903 | } |
904 | |
905 | /* |
906 | * subroutine for checkWellFormedRecursionWalker: process a SelectStmt |
907 | * without worrying about its WITH clause |
908 | */ |
909 | static void |
910 | checkWellFormedSelectStmt(SelectStmt *stmt, CteState *cstate) |
911 | { |
912 | RecursionContext save_context = cstate->context; |
913 | |
914 | if (save_context != RECURSION_OK) |
915 | { |
916 | /* just recurse without changing state */ |
917 | raw_expression_tree_walker((Node *) stmt, |
918 | checkWellFormedRecursionWalker, |
919 | (void *) cstate); |
920 | } |
921 | else |
922 | { |
923 | switch (stmt->op) |
924 | { |
925 | case SETOP_NONE: |
926 | case SETOP_UNION: |
927 | raw_expression_tree_walker((Node *) stmt, |
928 | checkWellFormedRecursionWalker, |
929 | (void *) cstate); |
930 | break; |
931 | case SETOP_INTERSECT: |
932 | if (stmt->all) |
933 | cstate->context = RECURSION_INTERSECT; |
934 | checkWellFormedRecursionWalker((Node *) stmt->larg, |
935 | cstate); |
936 | checkWellFormedRecursionWalker((Node *) stmt->rarg, |
937 | cstate); |
938 | cstate->context = save_context; |
939 | checkWellFormedRecursionWalker((Node *) stmt->sortClause, |
940 | cstate); |
941 | checkWellFormedRecursionWalker((Node *) stmt->limitOffset, |
942 | cstate); |
943 | checkWellFormedRecursionWalker((Node *) stmt->limitCount, |
944 | cstate); |
945 | checkWellFormedRecursionWalker((Node *) stmt->lockingClause, |
946 | cstate); |
947 | /* stmt->withClause is intentionally ignored here */ |
948 | break; |
949 | case SETOP_EXCEPT: |
950 | if (stmt->all) |
951 | cstate->context = RECURSION_EXCEPT; |
952 | checkWellFormedRecursionWalker((Node *) stmt->larg, |
953 | cstate); |
954 | cstate->context = RECURSION_EXCEPT; |
955 | checkWellFormedRecursionWalker((Node *) stmt->rarg, |
956 | cstate); |
957 | cstate->context = save_context; |
958 | checkWellFormedRecursionWalker((Node *) stmt->sortClause, |
959 | cstate); |
960 | checkWellFormedRecursionWalker((Node *) stmt->limitOffset, |
961 | cstate); |
962 | checkWellFormedRecursionWalker((Node *) stmt->limitCount, |
963 | cstate); |
964 | checkWellFormedRecursionWalker((Node *) stmt->lockingClause, |
965 | cstate); |
966 | /* stmt->withClause is intentionally ignored here */ |
967 | break; |
968 | default: |
969 | elog(ERROR, "unrecognized set op: %d" , |
970 | (int) stmt->op); |
971 | } |
972 | } |
973 | } |
974 | |