| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * parse_cte.c |
| 4 | * handle CTEs (common table expressions) in parser |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * Portions Copyright (c) 1994, Regents of the University of California |
| 8 | * |
| 9 | * |
| 10 | * IDENTIFICATION |
| 11 | * src/backend/parser/parse_cte.c |
| 12 | * |
| 13 | *------------------------------------------------------------------------- |
| 14 | */ |
| 15 | #include "postgres.h" |
| 16 | |
| 17 | #include "catalog/pg_collation.h" |
| 18 | #include "catalog/pg_type.h" |
| 19 | #include "nodes/nodeFuncs.h" |
| 20 | #include "parser/analyze.h" |
| 21 | #include "parser/parse_cte.h" |
| 22 | #include "utils/builtins.h" |
| 23 | #include "utils/lsyscache.h" |
| 24 | |
| 25 | |
| 26 | /* Enumeration of contexts in which a self-reference is disallowed */ |
| 27 | typedef enum |
| 28 | { |
| 29 | RECURSION_OK, |
| 30 | RECURSION_NONRECURSIVETERM, /* inside the left-hand term */ |
| 31 | RECURSION_SUBLINK, /* inside a sublink */ |
| 32 | RECURSION_OUTERJOIN, /* inside nullable side of an outer join */ |
| 33 | RECURSION_INTERSECT, /* underneath INTERSECT (ALL) */ |
| 34 | RECURSION_EXCEPT /* underneath EXCEPT (ALL) */ |
| 35 | } RecursionContext; |
| 36 | |
| 37 | /* Associated error messages --- each must have one %s for CTE name */ |
| 38 | static const char *const recursion_errormsgs[] = { |
| 39 | /* RECURSION_OK */ |
| 40 | NULL, |
| 41 | /* RECURSION_NONRECURSIVETERM */ |
| 42 | gettext_noop("recursive reference to query \"%s\" must not appear within its non-recursive term" ), |
| 43 | /* RECURSION_SUBLINK */ |
| 44 | gettext_noop("recursive reference to query \"%s\" must not appear within a subquery" ), |
| 45 | /* RECURSION_OUTERJOIN */ |
| 46 | gettext_noop("recursive reference to query \"%s\" must not appear within an outer join" ), |
| 47 | /* RECURSION_INTERSECT */ |
| 48 | gettext_noop("recursive reference to query \"%s\" must not appear within INTERSECT" ), |
| 49 | /* RECURSION_EXCEPT */ |
| 50 | gettext_noop("recursive reference to query \"%s\" must not appear within EXCEPT" ) |
| 51 | }; |
| 52 | |
| 53 | /* |
| 54 | * For WITH RECURSIVE, we have to find an ordering of the clause members |
| 55 | * with no forward references, and determine which members are recursive |
| 56 | * (i.e., self-referential). It is convenient to do this with an array |
| 57 | * of CteItems instead of a list of CommonTableExprs. |
| 58 | */ |
| 59 | typedef struct CteItem |
| 60 | { |
| 61 | CommonTableExpr *cte; /* One CTE to examine */ |
| 62 | int id; /* Its ID number for dependencies */ |
| 63 | Bitmapset *depends_on; /* CTEs depended on (not including self) */ |
| 64 | } CteItem; |
| 65 | |
| 66 | /* CteState is what we need to pass around in the tree walkers */ |
| 67 | typedef struct CteState |
| 68 | { |
| 69 | /* global state: */ |
| 70 | ParseState *pstate; /* global parse state */ |
| 71 | CteItem *items; /* array of CTEs and extra data */ |
| 72 | int numitems; /* number of CTEs */ |
| 73 | /* working state during a tree walk: */ |
| 74 | int curitem; /* index of item currently being examined */ |
| 75 | List *innerwiths; /* list of lists of CommonTableExpr */ |
| 76 | /* working state for checkWellFormedRecursion walk only: */ |
| 77 | int selfrefcount; /* number of self-references detected */ |
| 78 | RecursionContext context; /* context to allow or disallow self-ref */ |
| 79 | } CteState; |
| 80 | |
| 81 | |
| 82 | static void analyzeCTE(ParseState *pstate, CommonTableExpr *cte); |
| 83 | |
| 84 | /* Dependency processing functions */ |
| 85 | static void makeDependencyGraph(CteState *cstate); |
| 86 | static bool makeDependencyGraphWalker(Node *node, CteState *cstate); |
| 87 | static void TopologicalSort(ParseState *pstate, CteItem *items, int numitems); |
| 88 | |
| 89 | /* Recursion validity checker functions */ |
| 90 | static void checkWellFormedRecursion(CteState *cstate); |
| 91 | static bool checkWellFormedRecursionWalker(Node *node, CteState *cstate); |
| 92 | static void checkWellFormedSelectStmt(SelectStmt *stmt, CteState *cstate); |
| 93 | |
| 94 | |
| 95 | /* |
| 96 | * transformWithClause - |
| 97 | * Transform the list of WITH clause "common table expressions" into |
| 98 | * Query nodes. |
| 99 | * |
| 100 | * The result is the list of transformed CTEs to be put into the output |
| 101 | * Query. (This is in fact the same as the ending value of p_ctenamespace, |
| 102 | * but it seems cleaner to not expose that in the function's API.) |
| 103 | */ |
| 104 | List * |
| 105 | transformWithClause(ParseState *pstate, WithClause *withClause) |
| 106 | { |
| 107 | ListCell *lc; |
| 108 | |
| 109 | /* Only one WITH clause per query level */ |
| 110 | Assert(pstate->p_ctenamespace == NIL); |
| 111 | Assert(pstate->p_future_ctes == NIL); |
| 112 | |
| 113 | /* |
| 114 | * For either type of WITH, there must not be duplicate CTE names in the |
| 115 | * list. Check this right away so we needn't worry later. |
| 116 | * |
| 117 | * Also, tentatively mark each CTE as non-recursive, and initialize its |
| 118 | * reference count to zero, and set pstate->p_hasModifyingCTE if needed. |
| 119 | */ |
| 120 | foreach(lc, withClause->ctes) |
| 121 | { |
| 122 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); |
| 123 | ListCell *rest; |
| 124 | |
| 125 | for_each_cell(rest, lnext(lc)) |
| 126 | { |
| 127 | CommonTableExpr *cte2 = (CommonTableExpr *) lfirst(rest); |
| 128 | |
| 129 | if (strcmp(cte->ctename, cte2->ctename) == 0) |
| 130 | ereport(ERROR, |
| 131 | (errcode(ERRCODE_DUPLICATE_ALIAS), |
| 132 | errmsg("WITH query name \"%s\" specified more than once" , |
| 133 | cte2->ctename), |
| 134 | parser_errposition(pstate, cte2->location))); |
| 135 | } |
| 136 | |
| 137 | cte->cterecursive = false; |
| 138 | cte->cterefcount = 0; |
| 139 | |
| 140 | if (!IsA(cte->ctequery, SelectStmt)) |
| 141 | { |
| 142 | /* must be a data-modifying statement */ |
| 143 | Assert(IsA(cte->ctequery, InsertStmt) || |
| 144 | IsA(cte->ctequery, UpdateStmt) || |
| 145 | IsA(cte->ctequery, DeleteStmt)); |
| 146 | |
| 147 | pstate->p_hasModifyingCTE = true; |
| 148 | } |
| 149 | } |
| 150 | |
| 151 | if (withClause->recursive) |
| 152 | { |
| 153 | /* |
| 154 | * For WITH RECURSIVE, we rearrange the list elements if needed to |
| 155 | * eliminate forward references. First, build a work array and set up |
| 156 | * the data structure needed by the tree walkers. |
| 157 | */ |
| 158 | CteState cstate; |
| 159 | int i; |
| 160 | |
| 161 | cstate.pstate = pstate; |
| 162 | cstate.numitems = list_length(withClause->ctes); |
| 163 | cstate.items = (CteItem *) palloc0(cstate.numitems * sizeof(CteItem)); |
| 164 | i = 0; |
| 165 | foreach(lc, withClause->ctes) |
| 166 | { |
| 167 | cstate.items[i].cte = (CommonTableExpr *) lfirst(lc); |
| 168 | cstate.items[i].id = i; |
| 169 | i++; |
| 170 | } |
| 171 | |
| 172 | /* |
| 173 | * Find all the dependencies and sort the CteItems into a safe |
| 174 | * processing order. Also, mark CTEs that contain self-references. |
| 175 | */ |
| 176 | makeDependencyGraph(&cstate); |
| 177 | |
| 178 | /* |
| 179 | * Check that recursive queries are well-formed. |
| 180 | */ |
| 181 | checkWellFormedRecursion(&cstate); |
| 182 | |
| 183 | /* |
| 184 | * Set up the ctenamespace for parse analysis. Per spec, all the WITH |
| 185 | * items are visible to all others, so stuff them all in before parse |
| 186 | * analysis. We build the list in safe processing order so that the |
| 187 | * planner can process the queries in sequence. |
| 188 | */ |
| 189 | for (i = 0; i < cstate.numitems; i++) |
| 190 | { |
| 191 | CommonTableExpr *cte = cstate.items[i].cte; |
| 192 | |
| 193 | pstate->p_ctenamespace = lappend(pstate->p_ctenamespace, cte); |
| 194 | } |
| 195 | |
| 196 | /* |
| 197 | * Do parse analysis in the order determined by the topological sort. |
| 198 | */ |
| 199 | for (i = 0; i < cstate.numitems; i++) |
| 200 | { |
| 201 | CommonTableExpr *cte = cstate.items[i].cte; |
| 202 | |
| 203 | analyzeCTE(pstate, cte); |
| 204 | } |
| 205 | } |
| 206 | else |
| 207 | { |
| 208 | /* |
| 209 | * For non-recursive WITH, just analyze each CTE in sequence and then |
| 210 | * add it to the ctenamespace. This corresponds to the spec's |
| 211 | * definition of the scope of each WITH name. However, to allow error |
| 212 | * reports to be aware of the possibility of an erroneous reference, |
| 213 | * we maintain a list in p_future_ctes of the not-yet-visible CTEs. |
| 214 | */ |
| 215 | pstate->p_future_ctes = list_copy(withClause->ctes); |
| 216 | |
| 217 | foreach(lc, withClause->ctes) |
| 218 | { |
| 219 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); |
| 220 | |
| 221 | analyzeCTE(pstate, cte); |
| 222 | pstate->p_ctenamespace = lappend(pstate->p_ctenamespace, cte); |
| 223 | pstate->p_future_ctes = list_delete_first(pstate->p_future_ctes); |
| 224 | } |
| 225 | } |
| 226 | |
| 227 | return pstate->p_ctenamespace; |
| 228 | } |
| 229 | |
| 230 | |
| 231 | /* |
| 232 | * Perform the actual parse analysis transformation of one CTE. All |
| 233 | * CTEs it depends on have already been loaded into pstate->p_ctenamespace, |
| 234 | * and have been marked with the correct output column names/types. |
| 235 | */ |
| 236 | static void |
| 237 | analyzeCTE(ParseState *pstate, CommonTableExpr *cte) |
| 238 | { |
| 239 | Query *query; |
| 240 | |
| 241 | /* Analysis not done already */ |
| 242 | Assert(!IsA(cte->ctequery, Query)); |
| 243 | |
| 244 | query = parse_sub_analyze(cte->ctequery, pstate, cte, false, true); |
| 245 | cte->ctequery = (Node *) query; |
| 246 | |
| 247 | /* |
| 248 | * Check that we got something reasonable. These first two cases should |
| 249 | * be prevented by the grammar. |
| 250 | */ |
| 251 | if (!IsA(query, Query)) |
| 252 | elog(ERROR, "unexpected non-Query statement in WITH" ); |
| 253 | if (query->utilityStmt != NULL) |
| 254 | elog(ERROR, "unexpected utility statement in WITH" ); |
| 255 | |
| 256 | /* |
| 257 | * We disallow data-modifying WITH except at the top level of a query, |
| 258 | * because it's not clear when such a modification should be executed. |
| 259 | */ |
| 260 | if (query->commandType != CMD_SELECT && |
| 261 | pstate->parentParseState != NULL) |
| 262 | ereport(ERROR, |
| 263 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| 264 | errmsg("WITH clause containing a data-modifying statement must be at the top level" ), |
| 265 | parser_errposition(pstate, cte->location))); |
| 266 | |
| 267 | /* |
| 268 | * CTE queries are always marked not canSetTag. (Currently this only |
| 269 | * matters for data-modifying statements, for which the flag will be |
| 270 | * propagated to the ModifyTable plan node.) |
| 271 | */ |
| 272 | query->canSetTag = false; |
| 273 | |
| 274 | if (!cte->cterecursive) |
| 275 | { |
| 276 | /* Compute the output column names/types if not done yet */ |
| 277 | analyzeCTETargetList(pstate, cte, GetCTETargetList(cte)); |
| 278 | } |
| 279 | else |
| 280 | { |
| 281 | /* |
| 282 | * Verify that the previously determined output column types and |
| 283 | * collations match what the query really produced. We have to check |
| 284 | * this because the recursive term could have overridden the |
| 285 | * non-recursive term, and we don't have any easy way to fix that. |
| 286 | */ |
| 287 | ListCell *lctlist, |
| 288 | *lctyp, |
| 289 | *lctypmod, |
| 290 | *lccoll; |
| 291 | int varattno; |
| 292 | |
| 293 | lctyp = list_head(cte->ctecoltypes); |
| 294 | lctypmod = list_head(cte->ctecoltypmods); |
| 295 | lccoll = list_head(cte->ctecolcollations); |
| 296 | varattno = 0; |
| 297 | foreach(lctlist, GetCTETargetList(cte)) |
| 298 | { |
| 299 | TargetEntry *te = (TargetEntry *) lfirst(lctlist); |
| 300 | Node *texpr; |
| 301 | |
| 302 | if (te->resjunk) |
| 303 | continue; |
| 304 | varattno++; |
| 305 | Assert(varattno == te->resno); |
| 306 | if (lctyp == NULL || lctypmod == NULL || lccoll == NULL) /* shouldn't happen */ |
| 307 | elog(ERROR, "wrong number of output columns in WITH" ); |
| 308 | texpr = (Node *) te->expr; |
| 309 | if (exprType(texpr) != lfirst_oid(lctyp) || |
| 310 | exprTypmod(texpr) != lfirst_int(lctypmod)) |
| 311 | ereport(ERROR, |
| 312 | (errcode(ERRCODE_DATATYPE_MISMATCH), |
| 313 | errmsg("recursive query \"%s\" column %d has type %s in non-recursive term but type %s overall" , |
| 314 | cte->ctename, varattno, |
| 315 | format_type_with_typemod(lfirst_oid(lctyp), |
| 316 | lfirst_int(lctypmod)), |
| 317 | format_type_with_typemod(exprType(texpr), |
| 318 | exprTypmod(texpr))), |
| 319 | errhint("Cast the output of the non-recursive term to the correct type." ), |
| 320 | parser_errposition(pstate, exprLocation(texpr)))); |
| 321 | if (exprCollation(texpr) != lfirst_oid(lccoll)) |
| 322 | ereport(ERROR, |
| 323 | (errcode(ERRCODE_COLLATION_MISMATCH), |
| 324 | errmsg("recursive query \"%s\" column %d has collation \"%s\" in non-recursive term but collation \"%s\" overall" , |
| 325 | cte->ctename, varattno, |
| 326 | get_collation_name(lfirst_oid(lccoll)), |
| 327 | get_collation_name(exprCollation(texpr))), |
| 328 | errhint("Use the COLLATE clause to set the collation of the non-recursive term." ), |
| 329 | parser_errposition(pstate, exprLocation(texpr)))); |
| 330 | lctyp = lnext(lctyp); |
| 331 | lctypmod = lnext(lctypmod); |
| 332 | lccoll = lnext(lccoll); |
| 333 | } |
| 334 | if (lctyp != NULL || lctypmod != NULL || lccoll != NULL) /* shouldn't happen */ |
| 335 | elog(ERROR, "wrong number of output columns in WITH" ); |
| 336 | } |
| 337 | } |
| 338 | |
| 339 | /* |
| 340 | * Compute derived fields of a CTE, given the transformed output targetlist |
| 341 | * |
| 342 | * For a nonrecursive CTE, this is called after transforming the CTE's query. |
| 343 | * For a recursive CTE, we call it after transforming the non-recursive term, |
| 344 | * and pass the targetlist emitted by the non-recursive term only. |
| 345 | * |
| 346 | * Note: in the recursive case, the passed pstate is actually the one being |
| 347 | * used to analyze the CTE's query, so it is one level lower down than in |
| 348 | * the nonrecursive case. This doesn't matter since we only use it for |
| 349 | * error message context anyway. |
| 350 | */ |
| 351 | void |
| 352 | analyzeCTETargetList(ParseState *pstate, CommonTableExpr *cte, List *tlist) |
| 353 | { |
| 354 | int numaliases; |
| 355 | int varattno; |
| 356 | ListCell *tlistitem; |
| 357 | |
| 358 | /* Not done already ... */ |
| 359 | Assert(cte->ctecolnames == NIL); |
| 360 | |
| 361 | /* |
| 362 | * We need to determine column names, types, and collations. The alias |
| 363 | * column names override anything coming from the query itself. (Note: |
| 364 | * the SQL spec says that the alias list must be empty or exactly as long |
| 365 | * as the output column set; but we allow it to be shorter for consistency |
| 366 | * with Alias handling.) |
| 367 | */ |
| 368 | cte->ctecolnames = copyObject(cte->aliascolnames); |
| 369 | cte->ctecoltypes = cte->ctecoltypmods = cte->ctecolcollations = NIL; |
| 370 | numaliases = list_length(cte->aliascolnames); |
| 371 | varattno = 0; |
| 372 | foreach(tlistitem, tlist) |
| 373 | { |
| 374 | TargetEntry *te = (TargetEntry *) lfirst(tlistitem); |
| 375 | Oid coltype; |
| 376 | int32 coltypmod; |
| 377 | Oid colcoll; |
| 378 | |
| 379 | if (te->resjunk) |
| 380 | continue; |
| 381 | varattno++; |
| 382 | Assert(varattno == te->resno); |
| 383 | if (varattno > numaliases) |
| 384 | { |
| 385 | char *attrname; |
| 386 | |
| 387 | attrname = pstrdup(te->resname); |
| 388 | cte->ctecolnames = lappend(cte->ctecolnames, makeString(attrname)); |
| 389 | } |
| 390 | coltype = exprType((Node *) te->expr); |
| 391 | coltypmod = exprTypmod((Node *) te->expr); |
| 392 | colcoll = exprCollation((Node *) te->expr); |
| 393 | |
| 394 | /* |
| 395 | * If the CTE is recursive, force the exposed column type of any |
| 396 | * "unknown" column to "text". We must deal with this here because |
| 397 | * we're called on the non-recursive term before there's been any |
| 398 | * attempt to force unknown output columns to some other type. We |
| 399 | * have to resolve unknowns before looking at the recursive term. |
| 400 | * |
| 401 | * The column might contain 'foo' COLLATE "bar", so don't override |
| 402 | * collation if it's already set. |
| 403 | */ |
| 404 | if (cte->cterecursive && coltype == UNKNOWNOID) |
| 405 | { |
| 406 | coltype = TEXTOID; |
| 407 | coltypmod = -1; /* should be -1 already, but be sure */ |
| 408 | if (!OidIsValid(colcoll)) |
| 409 | colcoll = DEFAULT_COLLATION_OID; |
| 410 | } |
| 411 | cte->ctecoltypes = lappend_oid(cte->ctecoltypes, coltype); |
| 412 | cte->ctecoltypmods = lappend_int(cte->ctecoltypmods, coltypmod); |
| 413 | cte->ctecolcollations = lappend_oid(cte->ctecolcollations, colcoll); |
| 414 | } |
| 415 | if (varattno < numaliases) |
| 416 | ereport(ERROR, |
| 417 | (errcode(ERRCODE_INVALID_COLUMN_REFERENCE), |
| 418 | errmsg("WITH query \"%s\" has %d columns available but %d columns specified" , |
| 419 | cte->ctename, varattno, numaliases), |
| 420 | parser_errposition(pstate, cte->location))); |
| 421 | } |
| 422 | |
| 423 | |
| 424 | /* |
| 425 | * Identify the cross-references of a list of WITH RECURSIVE items, |
| 426 | * and sort into an order that has no forward references. |
| 427 | */ |
| 428 | static void |
| 429 | makeDependencyGraph(CteState *cstate) |
| 430 | { |
| 431 | int i; |
| 432 | |
| 433 | for (i = 0; i < cstate->numitems; i++) |
| 434 | { |
| 435 | CommonTableExpr *cte = cstate->items[i].cte; |
| 436 | |
| 437 | cstate->curitem = i; |
| 438 | cstate->innerwiths = NIL; |
| 439 | makeDependencyGraphWalker((Node *) cte->ctequery, cstate); |
| 440 | Assert(cstate->innerwiths == NIL); |
| 441 | } |
| 442 | |
| 443 | TopologicalSort(cstate->pstate, cstate->items, cstate->numitems); |
| 444 | } |
| 445 | |
| 446 | /* |
| 447 | * Tree walker function to detect cross-references and self-references of the |
| 448 | * CTEs in a WITH RECURSIVE list. |
| 449 | */ |
| 450 | static bool |
| 451 | makeDependencyGraphWalker(Node *node, CteState *cstate) |
| 452 | { |
| 453 | if (node == NULL) |
| 454 | return false; |
| 455 | if (IsA(node, RangeVar)) |
| 456 | { |
| 457 | RangeVar *rv = (RangeVar *) node; |
| 458 | |
| 459 | /* If unqualified name, might be a CTE reference */ |
| 460 | if (!rv->schemaname) |
| 461 | { |
| 462 | ListCell *lc; |
| 463 | int i; |
| 464 | |
| 465 | /* ... but first see if it's captured by an inner WITH */ |
| 466 | foreach(lc, cstate->innerwiths) |
| 467 | { |
| 468 | List *withlist = (List *) lfirst(lc); |
| 469 | ListCell *lc2; |
| 470 | |
| 471 | foreach(lc2, withlist) |
| 472 | { |
| 473 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc2); |
| 474 | |
| 475 | if (strcmp(rv->relname, cte->ctename) == 0) |
| 476 | return false; /* yes, so bail out */ |
| 477 | } |
| 478 | } |
| 479 | |
| 480 | /* No, could be a reference to the query level we are working on */ |
| 481 | for (i = 0; i < cstate->numitems; i++) |
| 482 | { |
| 483 | CommonTableExpr *cte = cstate->items[i].cte; |
| 484 | |
| 485 | if (strcmp(rv->relname, cte->ctename) == 0) |
| 486 | { |
| 487 | int myindex = cstate->curitem; |
| 488 | |
| 489 | if (i != myindex) |
| 490 | { |
| 491 | /* Add cross-item dependency */ |
| 492 | cstate->items[myindex].depends_on = |
| 493 | bms_add_member(cstate->items[myindex].depends_on, |
| 494 | cstate->items[i].id); |
| 495 | } |
| 496 | else |
| 497 | { |
| 498 | /* Found out this one is self-referential */ |
| 499 | cte->cterecursive = true; |
| 500 | } |
| 501 | break; |
| 502 | } |
| 503 | } |
| 504 | } |
| 505 | return false; |
| 506 | } |
| 507 | if (IsA(node, SelectStmt)) |
| 508 | { |
| 509 | SelectStmt *stmt = (SelectStmt *) node; |
| 510 | ListCell *lc; |
| 511 | |
| 512 | if (stmt->withClause) |
| 513 | { |
| 514 | if (stmt->withClause->recursive) |
| 515 | { |
| 516 | /* |
| 517 | * In the RECURSIVE case, all query names of the WITH are |
| 518 | * visible to all WITH items as well as the main query. So |
| 519 | * push them all on, process, pop them all off. |
| 520 | */ |
| 521 | cstate->innerwiths = lcons(stmt->withClause->ctes, |
| 522 | cstate->innerwiths); |
| 523 | foreach(lc, stmt->withClause->ctes) |
| 524 | { |
| 525 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); |
| 526 | |
| 527 | (void) makeDependencyGraphWalker(cte->ctequery, cstate); |
| 528 | } |
| 529 | (void) raw_expression_tree_walker(node, |
| 530 | makeDependencyGraphWalker, |
| 531 | (void *) cstate); |
| 532 | cstate->innerwiths = list_delete_first(cstate->innerwiths); |
| 533 | } |
| 534 | else |
| 535 | { |
| 536 | /* |
| 537 | * In the non-RECURSIVE case, query names are visible to the |
| 538 | * WITH items after them and to the main query. |
| 539 | */ |
| 540 | ListCell *cell1; |
| 541 | |
| 542 | cstate->innerwiths = lcons(NIL, cstate->innerwiths); |
| 543 | cell1 = list_head(cstate->innerwiths); |
| 544 | foreach(lc, stmt->withClause->ctes) |
| 545 | { |
| 546 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); |
| 547 | |
| 548 | (void) makeDependencyGraphWalker(cte->ctequery, cstate); |
| 549 | lfirst(cell1) = lappend((List *) lfirst(cell1), cte); |
| 550 | } |
| 551 | (void) raw_expression_tree_walker(node, |
| 552 | makeDependencyGraphWalker, |
| 553 | (void *) cstate); |
| 554 | cstate->innerwiths = list_delete_first(cstate->innerwiths); |
| 555 | } |
| 556 | /* We're done examining the SelectStmt */ |
| 557 | return false; |
| 558 | } |
| 559 | /* if no WITH clause, just fall through for normal processing */ |
| 560 | } |
| 561 | if (IsA(node, WithClause)) |
| 562 | { |
| 563 | /* |
| 564 | * Prevent raw_expression_tree_walker from recursing directly into a |
| 565 | * WITH clause. We need that to happen only under the control of the |
| 566 | * code above. |
| 567 | */ |
| 568 | return false; |
| 569 | } |
| 570 | return raw_expression_tree_walker(node, |
| 571 | makeDependencyGraphWalker, |
| 572 | (void *) cstate); |
| 573 | } |
| 574 | |
| 575 | /* |
| 576 | * Sort by dependencies, using a standard topological sort operation |
| 577 | */ |
| 578 | static void |
| 579 | TopologicalSort(ParseState *pstate, CteItem *items, int numitems) |
| 580 | { |
| 581 | int i, |
| 582 | j; |
| 583 | |
| 584 | /* for each position in sequence ... */ |
| 585 | for (i = 0; i < numitems; i++) |
| 586 | { |
| 587 | /* ... scan the remaining items to find one that has no dependencies */ |
| 588 | for (j = i; j < numitems; j++) |
| 589 | { |
| 590 | if (bms_is_empty(items[j].depends_on)) |
| 591 | break; |
| 592 | } |
| 593 | |
| 594 | /* if we didn't find one, the dependency graph has a cycle */ |
| 595 | if (j >= numitems) |
| 596 | ereport(ERROR, |
| 597 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| 598 | errmsg("mutual recursion between WITH items is not implemented" ), |
| 599 | parser_errposition(pstate, items[i].cte->location))); |
| 600 | |
| 601 | /* |
| 602 | * Found one. Move it to front and remove it from every other item's |
| 603 | * dependencies. |
| 604 | */ |
| 605 | if (i != j) |
| 606 | { |
| 607 | CteItem tmp; |
| 608 | |
| 609 | tmp = items[i]; |
| 610 | items[i] = items[j]; |
| 611 | items[j] = tmp; |
| 612 | } |
| 613 | |
| 614 | /* |
| 615 | * Items up through i are known to have no dependencies left, so we |
| 616 | * can skip them in this loop. |
| 617 | */ |
| 618 | for (j = i + 1; j < numitems; j++) |
| 619 | { |
| 620 | items[j].depends_on = bms_del_member(items[j].depends_on, |
| 621 | items[i].id); |
| 622 | } |
| 623 | } |
| 624 | } |
| 625 | |
| 626 | |
| 627 | /* |
| 628 | * Check that recursive queries are well-formed. |
| 629 | */ |
| 630 | static void |
| 631 | checkWellFormedRecursion(CteState *cstate) |
| 632 | { |
| 633 | int i; |
| 634 | |
| 635 | for (i = 0; i < cstate->numitems; i++) |
| 636 | { |
| 637 | CommonTableExpr *cte = cstate->items[i].cte; |
| 638 | SelectStmt *stmt = (SelectStmt *) cte->ctequery; |
| 639 | |
| 640 | Assert(!IsA(stmt, Query)); /* not analyzed yet */ |
| 641 | |
| 642 | /* Ignore items that weren't found to be recursive */ |
| 643 | if (!cte->cterecursive) |
| 644 | continue; |
| 645 | |
| 646 | /* Must be a SELECT statement */ |
| 647 | if (!IsA(stmt, SelectStmt)) |
| 648 | ereport(ERROR, |
| 649 | (errcode(ERRCODE_INVALID_RECURSION), |
| 650 | errmsg("recursive query \"%s\" must not contain data-modifying statements" , |
| 651 | cte->ctename), |
| 652 | parser_errposition(cstate->pstate, cte->location))); |
| 653 | |
| 654 | /* Must have top-level UNION */ |
| 655 | if (stmt->op != SETOP_UNION) |
| 656 | ereport(ERROR, |
| 657 | (errcode(ERRCODE_INVALID_RECURSION), |
| 658 | errmsg("recursive query \"%s\" does not have the form non-recursive-term UNION [ALL] recursive-term" , |
| 659 | cte->ctename), |
| 660 | parser_errposition(cstate->pstate, cte->location))); |
| 661 | |
| 662 | /* The left-hand operand mustn't contain self-reference at all */ |
| 663 | cstate->curitem = i; |
| 664 | cstate->innerwiths = NIL; |
| 665 | cstate->selfrefcount = 0; |
| 666 | cstate->context = RECURSION_NONRECURSIVETERM; |
| 667 | checkWellFormedRecursionWalker((Node *) stmt->larg, cstate); |
| 668 | Assert(cstate->innerwiths == NIL); |
| 669 | |
| 670 | /* Right-hand operand should contain one reference in a valid place */ |
| 671 | cstate->curitem = i; |
| 672 | cstate->innerwiths = NIL; |
| 673 | cstate->selfrefcount = 0; |
| 674 | cstate->context = RECURSION_OK; |
| 675 | checkWellFormedRecursionWalker((Node *) stmt->rarg, cstate); |
| 676 | Assert(cstate->innerwiths == NIL); |
| 677 | if (cstate->selfrefcount != 1) /* shouldn't happen */ |
| 678 | elog(ERROR, "missing recursive reference" ); |
| 679 | |
| 680 | /* WITH mustn't contain self-reference, either */ |
| 681 | if (stmt->withClause) |
| 682 | { |
| 683 | cstate->curitem = i; |
| 684 | cstate->innerwiths = NIL; |
| 685 | cstate->selfrefcount = 0; |
| 686 | cstate->context = RECURSION_SUBLINK; |
| 687 | checkWellFormedRecursionWalker((Node *) stmt->withClause->ctes, |
| 688 | cstate); |
| 689 | Assert(cstate->innerwiths == NIL); |
| 690 | } |
| 691 | |
| 692 | /* |
| 693 | * Disallow ORDER BY and similar decoration atop the UNION. These |
| 694 | * don't make sense because it's impossible to figure out what they |
| 695 | * mean when we have only part of the recursive query's results. (If |
| 696 | * we did allow them, we'd have to check for recursive references |
| 697 | * inside these subtrees.) |
| 698 | */ |
| 699 | if (stmt->sortClause) |
| 700 | ereport(ERROR, |
| 701 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| 702 | errmsg("ORDER BY in a recursive query is not implemented" ), |
| 703 | parser_errposition(cstate->pstate, |
| 704 | exprLocation((Node *) stmt->sortClause)))); |
| 705 | if (stmt->limitOffset) |
| 706 | ereport(ERROR, |
| 707 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| 708 | errmsg("OFFSET in a recursive query is not implemented" ), |
| 709 | parser_errposition(cstate->pstate, |
| 710 | exprLocation(stmt->limitOffset)))); |
| 711 | if (stmt->limitCount) |
| 712 | ereport(ERROR, |
| 713 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| 714 | errmsg("LIMIT in a recursive query is not implemented" ), |
| 715 | parser_errposition(cstate->pstate, |
| 716 | exprLocation(stmt->limitCount)))); |
| 717 | if (stmt->lockingClause) |
| 718 | ereport(ERROR, |
| 719 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| 720 | errmsg("FOR UPDATE/SHARE in a recursive query is not implemented" ), |
| 721 | parser_errposition(cstate->pstate, |
| 722 | exprLocation((Node *) stmt->lockingClause)))); |
| 723 | } |
| 724 | } |
| 725 | |
| 726 | /* |
| 727 | * Tree walker function to detect invalid self-references in a recursive query. |
| 728 | */ |
| 729 | static bool |
| 730 | checkWellFormedRecursionWalker(Node *node, CteState *cstate) |
| 731 | { |
| 732 | RecursionContext save_context = cstate->context; |
| 733 | |
| 734 | if (node == NULL) |
| 735 | return false; |
| 736 | if (IsA(node, RangeVar)) |
| 737 | { |
| 738 | RangeVar *rv = (RangeVar *) node; |
| 739 | |
| 740 | /* If unqualified name, might be a CTE reference */ |
| 741 | if (!rv->schemaname) |
| 742 | { |
| 743 | ListCell *lc; |
| 744 | CommonTableExpr *mycte; |
| 745 | |
| 746 | /* ... but first see if it's captured by an inner WITH */ |
| 747 | foreach(lc, cstate->innerwiths) |
| 748 | { |
| 749 | List *withlist = (List *) lfirst(lc); |
| 750 | ListCell *lc2; |
| 751 | |
| 752 | foreach(lc2, withlist) |
| 753 | { |
| 754 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc2); |
| 755 | |
| 756 | if (strcmp(rv->relname, cte->ctename) == 0) |
| 757 | return false; /* yes, so bail out */ |
| 758 | } |
| 759 | } |
| 760 | |
| 761 | /* No, could be a reference to the query level we are working on */ |
| 762 | mycte = cstate->items[cstate->curitem].cte; |
| 763 | if (strcmp(rv->relname, mycte->ctename) == 0) |
| 764 | { |
| 765 | /* Found a recursive reference to the active query */ |
| 766 | if (cstate->context != RECURSION_OK) |
| 767 | ereport(ERROR, |
| 768 | (errcode(ERRCODE_INVALID_RECURSION), |
| 769 | errmsg(recursion_errormsgs[cstate->context], |
| 770 | mycte->ctename), |
| 771 | parser_errposition(cstate->pstate, |
| 772 | rv->location))); |
| 773 | /* Count references */ |
| 774 | if (++(cstate->selfrefcount) > 1) |
| 775 | ereport(ERROR, |
| 776 | (errcode(ERRCODE_INVALID_RECURSION), |
| 777 | errmsg("recursive reference to query \"%s\" must not appear more than once" , |
| 778 | mycte->ctename), |
| 779 | parser_errposition(cstate->pstate, |
| 780 | rv->location))); |
| 781 | } |
| 782 | } |
| 783 | return false; |
| 784 | } |
| 785 | if (IsA(node, SelectStmt)) |
| 786 | { |
| 787 | SelectStmt *stmt = (SelectStmt *) node; |
| 788 | ListCell *lc; |
| 789 | |
| 790 | if (stmt->withClause) |
| 791 | { |
| 792 | if (stmt->withClause->recursive) |
| 793 | { |
| 794 | /* |
| 795 | * In the RECURSIVE case, all query names of the WITH are |
| 796 | * visible to all WITH items as well as the main query. So |
| 797 | * push them all on, process, pop them all off. |
| 798 | */ |
| 799 | cstate->innerwiths = lcons(stmt->withClause->ctes, |
| 800 | cstate->innerwiths); |
| 801 | foreach(lc, stmt->withClause->ctes) |
| 802 | { |
| 803 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); |
| 804 | |
| 805 | (void) checkWellFormedRecursionWalker(cte->ctequery, cstate); |
| 806 | } |
| 807 | checkWellFormedSelectStmt(stmt, cstate); |
| 808 | cstate->innerwiths = list_delete_first(cstate->innerwiths); |
| 809 | } |
| 810 | else |
| 811 | { |
| 812 | /* |
| 813 | * In the non-RECURSIVE case, query names are visible to the |
| 814 | * WITH items after them and to the main query. |
| 815 | */ |
| 816 | ListCell *cell1; |
| 817 | |
| 818 | cstate->innerwiths = lcons(NIL, cstate->innerwiths); |
| 819 | cell1 = list_head(cstate->innerwiths); |
| 820 | foreach(lc, stmt->withClause->ctes) |
| 821 | { |
| 822 | CommonTableExpr *cte = (CommonTableExpr *) lfirst(lc); |
| 823 | |
| 824 | (void) checkWellFormedRecursionWalker(cte->ctequery, cstate); |
| 825 | lfirst(cell1) = lappend((List *) lfirst(cell1), cte); |
| 826 | } |
| 827 | checkWellFormedSelectStmt(stmt, cstate); |
| 828 | cstate->innerwiths = list_delete_first(cstate->innerwiths); |
| 829 | } |
| 830 | } |
| 831 | else |
| 832 | checkWellFormedSelectStmt(stmt, cstate); |
| 833 | /* We're done examining the SelectStmt */ |
| 834 | return false; |
| 835 | } |
| 836 | if (IsA(node, WithClause)) |
| 837 | { |
| 838 | /* |
| 839 | * Prevent raw_expression_tree_walker from recursing directly into a |
| 840 | * WITH clause. We need that to happen only under the control of the |
| 841 | * code above. |
| 842 | */ |
| 843 | return false; |
| 844 | } |
| 845 | if (IsA(node, JoinExpr)) |
| 846 | { |
| 847 | JoinExpr *j = (JoinExpr *) node; |
| 848 | |
| 849 | switch (j->jointype) |
| 850 | { |
| 851 | case JOIN_INNER: |
| 852 | checkWellFormedRecursionWalker(j->larg, cstate); |
| 853 | checkWellFormedRecursionWalker(j->rarg, cstate); |
| 854 | checkWellFormedRecursionWalker(j->quals, cstate); |
| 855 | break; |
| 856 | case JOIN_LEFT: |
| 857 | checkWellFormedRecursionWalker(j->larg, cstate); |
| 858 | if (save_context == RECURSION_OK) |
| 859 | cstate->context = RECURSION_OUTERJOIN; |
| 860 | checkWellFormedRecursionWalker(j->rarg, cstate); |
| 861 | cstate->context = save_context; |
| 862 | checkWellFormedRecursionWalker(j->quals, cstate); |
| 863 | break; |
| 864 | case JOIN_FULL: |
| 865 | if (save_context == RECURSION_OK) |
| 866 | cstate->context = RECURSION_OUTERJOIN; |
| 867 | checkWellFormedRecursionWalker(j->larg, cstate); |
| 868 | checkWellFormedRecursionWalker(j->rarg, cstate); |
| 869 | cstate->context = save_context; |
| 870 | checkWellFormedRecursionWalker(j->quals, cstate); |
| 871 | break; |
| 872 | case JOIN_RIGHT: |
| 873 | if (save_context == RECURSION_OK) |
| 874 | cstate->context = RECURSION_OUTERJOIN; |
| 875 | checkWellFormedRecursionWalker(j->larg, cstate); |
| 876 | cstate->context = save_context; |
| 877 | checkWellFormedRecursionWalker(j->rarg, cstate); |
| 878 | checkWellFormedRecursionWalker(j->quals, cstate); |
| 879 | break; |
| 880 | default: |
| 881 | elog(ERROR, "unrecognized join type: %d" , |
| 882 | (int) j->jointype); |
| 883 | } |
| 884 | return false; |
| 885 | } |
| 886 | if (IsA(node, SubLink)) |
| 887 | { |
| 888 | SubLink *sl = (SubLink *) node; |
| 889 | |
| 890 | /* |
| 891 | * we intentionally override outer context, since subquery is |
| 892 | * independent |
| 893 | */ |
| 894 | cstate->context = RECURSION_SUBLINK; |
| 895 | checkWellFormedRecursionWalker(sl->subselect, cstate); |
| 896 | cstate->context = save_context; |
| 897 | checkWellFormedRecursionWalker(sl->testexpr, cstate); |
| 898 | return false; |
| 899 | } |
| 900 | return raw_expression_tree_walker(node, |
| 901 | checkWellFormedRecursionWalker, |
| 902 | (void *) cstate); |
| 903 | } |
| 904 | |
| 905 | /* |
| 906 | * subroutine for checkWellFormedRecursionWalker: process a SelectStmt |
| 907 | * without worrying about its WITH clause |
| 908 | */ |
| 909 | static void |
| 910 | checkWellFormedSelectStmt(SelectStmt *stmt, CteState *cstate) |
| 911 | { |
| 912 | RecursionContext save_context = cstate->context; |
| 913 | |
| 914 | if (save_context != RECURSION_OK) |
| 915 | { |
| 916 | /* just recurse without changing state */ |
| 917 | raw_expression_tree_walker((Node *) stmt, |
| 918 | checkWellFormedRecursionWalker, |
| 919 | (void *) cstate); |
| 920 | } |
| 921 | else |
| 922 | { |
| 923 | switch (stmt->op) |
| 924 | { |
| 925 | case SETOP_NONE: |
| 926 | case SETOP_UNION: |
| 927 | raw_expression_tree_walker((Node *) stmt, |
| 928 | checkWellFormedRecursionWalker, |
| 929 | (void *) cstate); |
| 930 | break; |
| 931 | case SETOP_INTERSECT: |
| 932 | if (stmt->all) |
| 933 | cstate->context = RECURSION_INTERSECT; |
| 934 | checkWellFormedRecursionWalker((Node *) stmt->larg, |
| 935 | cstate); |
| 936 | checkWellFormedRecursionWalker((Node *) stmt->rarg, |
| 937 | cstate); |
| 938 | cstate->context = save_context; |
| 939 | checkWellFormedRecursionWalker((Node *) stmt->sortClause, |
| 940 | cstate); |
| 941 | checkWellFormedRecursionWalker((Node *) stmt->limitOffset, |
| 942 | cstate); |
| 943 | checkWellFormedRecursionWalker((Node *) stmt->limitCount, |
| 944 | cstate); |
| 945 | checkWellFormedRecursionWalker((Node *) stmt->lockingClause, |
| 946 | cstate); |
| 947 | /* stmt->withClause is intentionally ignored here */ |
| 948 | break; |
| 949 | case SETOP_EXCEPT: |
| 950 | if (stmt->all) |
| 951 | cstate->context = RECURSION_EXCEPT; |
| 952 | checkWellFormedRecursionWalker((Node *) stmt->larg, |
| 953 | cstate); |
| 954 | cstate->context = RECURSION_EXCEPT; |
| 955 | checkWellFormedRecursionWalker((Node *) stmt->rarg, |
| 956 | cstate); |
| 957 | cstate->context = save_context; |
| 958 | checkWellFormedRecursionWalker((Node *) stmt->sortClause, |
| 959 | cstate); |
| 960 | checkWellFormedRecursionWalker((Node *) stmt->limitOffset, |
| 961 | cstate); |
| 962 | checkWellFormedRecursionWalker((Node *) stmt->limitCount, |
| 963 | cstate); |
| 964 | checkWellFormedRecursionWalker((Node *) stmt->lockingClause, |
| 965 | cstate); |
| 966 | /* stmt->withClause is intentionally ignored here */ |
| 967 | break; |
| 968 | default: |
| 969 | elog(ERROR, "unrecognized set op: %d" , |
| 970 | (int) stmt->op); |
| 971 | } |
| 972 | } |
| 973 | } |
| 974 | |