| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * parse_collate.c |
| 4 | * Routines for assigning collation information. |
| 5 | * |
| 6 | * We choose to handle collation analysis in a post-pass over the output |
| 7 | * of expression parse analysis. This is because we need more state to |
| 8 | * perform this processing than is needed in the finished tree. If we |
| 9 | * did it on-the-fly while building the tree, all that state would have |
| 10 | * to be kept in expression node trees permanently. This way, the extra |
| 11 | * storage is just local variables in this recursive routine. |
| 12 | * |
| 13 | * The info that is actually saved in the finished tree is: |
| 14 | * 1. The output collation of each expression node, or InvalidOid if it |
| 15 | * returns a noncollatable data type. This can also be InvalidOid if the |
| 16 | * result type is collatable but the collation is indeterminate. |
| 17 | * 2. The collation to be used in executing each function. InvalidOid means |
| 18 | * that there are no collatable inputs or their collation is indeterminate. |
| 19 | * This value is only stored in node types that might call collation-using |
| 20 | * functions. |
| 21 | * |
| 22 | * You might think we could get away with storing only one collation per |
| 23 | * node, but the two concepts really need to be kept distinct. Otherwise |
| 24 | * it's too confusing when a function produces a collatable output type but |
| 25 | * has no collatable inputs or produces noncollatable output from collatable |
| 26 | * inputs. |
| 27 | * |
| 28 | * Cases with indeterminate collation might result in an error being thrown |
| 29 | * at runtime. If we knew exactly which functions require collation |
| 30 | * information, we could throw those errors at parse time instead. |
| 31 | * |
| 32 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 33 | * Portions Copyright (c) 1994, Regents of the University of California |
| 34 | * |
| 35 | * |
| 36 | * IDENTIFICATION |
| 37 | * src/backend/parser/parse_collate.c |
| 38 | * |
| 39 | *------------------------------------------------------------------------- |
| 40 | */ |
| 41 | #include "postgres.h" |
| 42 | |
| 43 | #include "catalog/pg_aggregate.h" |
| 44 | #include "catalog/pg_collation.h" |
| 45 | #include "nodes/makefuncs.h" |
| 46 | #include "nodes/nodeFuncs.h" |
| 47 | #include "parser/parse_collate.h" |
| 48 | #include "utils/lsyscache.h" |
| 49 | |
| 50 | |
| 51 | /* |
| 52 | * Collation strength (the SQL standard calls this "derivation"). Order is |
| 53 | * chosen to allow comparisons to work usefully. Note: the standard doesn't |
| 54 | * seem to distinguish between NONE and CONFLICT. |
| 55 | */ |
| 56 | typedef enum |
| 57 | { |
| 58 | COLLATE_NONE, /* expression is of a noncollatable datatype */ |
| 59 | COLLATE_IMPLICIT, /* collation was derived implicitly */ |
| 60 | COLLATE_CONFLICT, /* we had a conflict of implicit collations */ |
| 61 | COLLATE_EXPLICIT /* collation was derived explicitly */ |
| 62 | } CollateStrength; |
| 63 | |
| 64 | typedef struct |
| 65 | { |
| 66 | ParseState *pstate; /* parse state (for error reporting) */ |
| 67 | Oid collation; /* OID of current collation, if any */ |
| 68 | CollateStrength strength; /* strength of current collation choice */ |
| 69 | int location; /* location of expr that set collation */ |
| 70 | /* Remaining fields are only valid when strength == COLLATE_CONFLICT */ |
| 71 | Oid collation2; /* OID of conflicting collation */ |
| 72 | int location2; /* location of expr that set collation2 */ |
| 73 | } assign_collations_context; |
| 74 | |
| 75 | static bool assign_query_collations_walker(Node *node, ParseState *pstate); |
| 76 | static bool assign_collations_walker(Node *node, |
| 77 | assign_collations_context *context); |
| 78 | static void merge_collation_state(Oid collation, |
| 79 | CollateStrength strength, |
| 80 | int location, |
| 81 | Oid collation2, |
| 82 | int location2, |
| 83 | assign_collations_context *context); |
| 84 | static void assign_aggregate_collations(Aggref *aggref, |
| 85 | assign_collations_context *loccontext); |
| 86 | static void assign_ordered_set_collations(Aggref *aggref, |
| 87 | assign_collations_context *loccontext); |
| 88 | static void assign_hypothetical_collations(Aggref *aggref, |
| 89 | assign_collations_context *loccontext); |
| 90 | |
| 91 | |
| 92 | /* |
| 93 | * assign_query_collations() |
| 94 | * Mark all expressions in the given Query with collation information. |
| 95 | * |
| 96 | * This should be applied to each Query after completion of parse analysis |
| 97 | * for expressions. Note that we do not recurse into sub-Queries, since |
| 98 | * those should have been processed when built. |
| 99 | */ |
| 100 | void |
| 101 | assign_query_collations(ParseState *pstate, Query *query) |
| 102 | { |
| 103 | /* |
| 104 | * We just use query_tree_walker() to visit all the contained expressions. |
| 105 | * We can skip the rangetable and CTE subqueries, though, since RTEs and |
| 106 | * subqueries had better have been processed already (else Vars referring |
| 107 | * to them would not get created with the right collation). |
| 108 | */ |
| 109 | (void) query_tree_walker(query, |
| 110 | assign_query_collations_walker, |
| 111 | (void *) pstate, |
| 112 | QTW_IGNORE_RANGE_TABLE | |
| 113 | QTW_IGNORE_CTE_SUBQUERIES); |
| 114 | } |
| 115 | |
| 116 | /* |
| 117 | * Walker for assign_query_collations |
| 118 | * |
| 119 | * Each expression found by query_tree_walker is processed independently. |
| 120 | * Note that query_tree_walker may pass us a whole List, such as the |
| 121 | * targetlist, in which case each subexpression must be processed |
| 122 | * independently --- we don't want to bleat if two different targetentries |
| 123 | * have different collations. |
| 124 | */ |
| 125 | static bool |
| 126 | assign_query_collations_walker(Node *node, ParseState *pstate) |
| 127 | { |
| 128 | /* Need do nothing for empty subexpressions */ |
| 129 | if (node == NULL) |
| 130 | return false; |
| 131 | |
| 132 | /* |
| 133 | * We don't want to recurse into a set-operations tree; it's already been |
| 134 | * fully processed in transformSetOperationStmt. |
| 135 | */ |
| 136 | if (IsA(node, SetOperationStmt)) |
| 137 | return false; |
| 138 | |
| 139 | if (IsA(node, List)) |
| 140 | assign_list_collations(pstate, (List *) node); |
| 141 | else |
| 142 | assign_expr_collations(pstate, node); |
| 143 | |
| 144 | return false; |
| 145 | } |
| 146 | |
| 147 | /* |
| 148 | * assign_list_collations() |
| 149 | * Mark all nodes in the list of expressions with collation information. |
| 150 | * |
| 151 | * The list member expressions are processed independently; they do not have |
| 152 | * to share a common collation. |
| 153 | */ |
| 154 | void |
| 155 | assign_list_collations(ParseState *pstate, List *exprs) |
| 156 | { |
| 157 | ListCell *lc; |
| 158 | |
| 159 | foreach(lc, exprs) |
| 160 | { |
| 161 | Node *node = (Node *) lfirst(lc); |
| 162 | |
| 163 | assign_expr_collations(pstate, node); |
| 164 | } |
| 165 | } |
| 166 | |
| 167 | /* |
| 168 | * assign_expr_collations() |
| 169 | * Mark all nodes in the given expression tree with collation information. |
| 170 | * |
| 171 | * This is exported for the benefit of various utility commands that process |
| 172 | * expressions without building a complete Query. It should be applied after |
| 173 | * calling transformExpr() plus any expression-modifying operations such as |
| 174 | * coerce_to_boolean(). |
| 175 | */ |
| 176 | void |
| 177 | assign_expr_collations(ParseState *pstate, Node *expr) |
| 178 | { |
| 179 | assign_collations_context context; |
| 180 | |
| 181 | /* initialize context for tree walk */ |
| 182 | context.pstate = pstate; |
| 183 | context.collation = InvalidOid; |
| 184 | context.strength = COLLATE_NONE; |
| 185 | context.location = -1; |
| 186 | |
| 187 | /* and away we go */ |
| 188 | (void) assign_collations_walker(expr, &context); |
| 189 | } |
| 190 | |
| 191 | /* |
| 192 | * select_common_collation() |
| 193 | * Identify a common collation for a list of expressions. |
| 194 | * |
| 195 | * The expressions should all return the same datatype, else this is not |
| 196 | * terribly meaningful. |
| 197 | * |
| 198 | * none_ok means that it is permitted to return InvalidOid, indicating that |
| 199 | * no common collation could be identified, even for collatable datatypes. |
| 200 | * Otherwise, an error is thrown for conflict of implicit collations. |
| 201 | * |
| 202 | * In theory, none_ok = true reflects the rules of SQL standard clause "Result |
| 203 | * of data type combinations", none_ok = false reflects the rules of clause |
| 204 | * "Collation determination" (in some cases invoked via "Grouping |
| 205 | * operations"). |
| 206 | */ |
| 207 | Oid |
| 208 | select_common_collation(ParseState *pstate, List *exprs, bool none_ok) |
| 209 | { |
| 210 | assign_collations_context context; |
| 211 | |
| 212 | /* initialize context for tree walk */ |
| 213 | context.pstate = pstate; |
| 214 | context.collation = InvalidOid; |
| 215 | context.strength = COLLATE_NONE; |
| 216 | context.location = -1; |
| 217 | |
| 218 | /* and away we go */ |
| 219 | (void) assign_collations_walker((Node *) exprs, &context); |
| 220 | |
| 221 | /* deal with collation conflict */ |
| 222 | if (context.strength == COLLATE_CONFLICT) |
| 223 | { |
| 224 | if (none_ok) |
| 225 | return InvalidOid; |
| 226 | ereport(ERROR, |
| 227 | (errcode(ERRCODE_COLLATION_MISMATCH), |
| 228 | errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"" , |
| 229 | get_collation_name(context.collation), |
| 230 | get_collation_name(context.collation2)), |
| 231 | errhint("You can choose the collation by applying the COLLATE clause to one or both expressions." ), |
| 232 | parser_errposition(context.pstate, context.location2))); |
| 233 | } |
| 234 | |
| 235 | /* |
| 236 | * Note: if strength is still COLLATE_NONE, we'll return InvalidOid, but |
| 237 | * that's okay because it must mean none of the expressions returned |
| 238 | * collatable datatypes. |
| 239 | */ |
| 240 | return context.collation; |
| 241 | } |
| 242 | |
| 243 | /* |
| 244 | * assign_collations_walker() |
| 245 | * Recursive guts of collation processing. |
| 246 | * |
| 247 | * Nodes with no children (eg, Vars, Consts, Params) must have been marked |
| 248 | * when built. All upper-level nodes are marked here. |
| 249 | * |
| 250 | * Note: if this is invoked directly on a List, it will attempt to infer a |
| 251 | * common collation for all the list members. In particular, it will throw |
| 252 | * error if there are conflicting explicit collations for different members. |
| 253 | */ |
| 254 | static bool |
| 255 | assign_collations_walker(Node *node, assign_collations_context *context) |
| 256 | { |
| 257 | assign_collations_context loccontext; |
| 258 | Oid collation; |
| 259 | CollateStrength strength; |
| 260 | int location; |
| 261 | |
| 262 | /* Need do nothing for empty subexpressions */ |
| 263 | if (node == NULL) |
| 264 | return false; |
| 265 | |
| 266 | /* |
| 267 | * Prepare for recursion. For most node types, though not all, the first |
| 268 | * thing we do is recurse to process all nodes below this one. Each level |
| 269 | * of the tree has its own local context. |
| 270 | */ |
| 271 | loccontext.pstate = context->pstate; |
| 272 | loccontext.collation = InvalidOid; |
| 273 | loccontext.strength = COLLATE_NONE; |
| 274 | loccontext.location = -1; |
| 275 | /* Set these fields just to suppress uninitialized-value warnings: */ |
| 276 | loccontext.collation2 = InvalidOid; |
| 277 | loccontext.location2 = -1; |
| 278 | |
| 279 | /* |
| 280 | * Recurse if appropriate, then determine the collation for this node. |
| 281 | * |
| 282 | * Note: the general cases are at the bottom of the switch, after various |
| 283 | * special cases. |
| 284 | */ |
| 285 | switch (nodeTag(node)) |
| 286 | { |
| 287 | case T_CollateExpr: |
| 288 | { |
| 289 | /* |
| 290 | * COLLATE sets an explicitly derived collation, regardless of |
| 291 | * what the child state is. But we must recurse to set up |
| 292 | * collation info below here. |
| 293 | */ |
| 294 | CollateExpr *expr = (CollateExpr *) node; |
| 295 | |
| 296 | (void) expression_tree_walker(node, |
| 297 | assign_collations_walker, |
| 298 | (void *) &loccontext); |
| 299 | |
| 300 | collation = expr->collOid; |
| 301 | Assert(OidIsValid(collation)); |
| 302 | strength = COLLATE_EXPLICIT; |
| 303 | location = expr->location; |
| 304 | } |
| 305 | break; |
| 306 | case T_FieldSelect: |
| 307 | { |
| 308 | /* |
| 309 | * For FieldSelect, the result has the field's declared |
| 310 | * collation, independently of what happened in the arguments. |
| 311 | * (The immediate argument must be composite and thus not |
| 312 | * collatable, anyhow.) The field's collation was already |
| 313 | * looked up and saved in the node. |
| 314 | */ |
| 315 | FieldSelect *expr = (FieldSelect *) node; |
| 316 | |
| 317 | /* ... but first, recurse */ |
| 318 | (void) expression_tree_walker(node, |
| 319 | assign_collations_walker, |
| 320 | (void *) &loccontext); |
| 321 | |
| 322 | if (OidIsValid(expr->resultcollid)) |
| 323 | { |
| 324 | /* Node's result type is collatable. */ |
| 325 | /* Pass up field's collation as an implicit choice. */ |
| 326 | collation = expr->resultcollid; |
| 327 | strength = COLLATE_IMPLICIT; |
| 328 | location = exprLocation(node); |
| 329 | } |
| 330 | else |
| 331 | { |
| 332 | /* Node's result type isn't collatable. */ |
| 333 | collation = InvalidOid; |
| 334 | strength = COLLATE_NONE; |
| 335 | location = -1; /* won't be used */ |
| 336 | } |
| 337 | } |
| 338 | break; |
| 339 | case T_RowExpr: |
| 340 | { |
| 341 | /* |
| 342 | * RowExpr is a special case because the subexpressions are |
| 343 | * independent: we don't want to complain if some of them have |
| 344 | * incompatible explicit collations. |
| 345 | */ |
| 346 | RowExpr *expr = (RowExpr *) node; |
| 347 | |
| 348 | assign_list_collations(context->pstate, expr->args); |
| 349 | |
| 350 | /* |
| 351 | * Since the result is always composite and therefore never |
| 352 | * has a collation, we can just stop here: this node has no |
| 353 | * impact on the collation of its parent. |
| 354 | */ |
| 355 | return false; /* done */ |
| 356 | } |
| 357 | case T_RowCompareExpr: |
| 358 | { |
| 359 | /* |
| 360 | * For RowCompare, we have to find the common collation of |
| 361 | * each pair of input columns and build a list. If we can't |
| 362 | * find a common collation, we just put InvalidOid into the |
| 363 | * list, which may or may not cause an error at runtime. |
| 364 | */ |
| 365 | RowCompareExpr *expr = (RowCompareExpr *) node; |
| 366 | List *colls = NIL; |
| 367 | ListCell *l; |
| 368 | ListCell *r; |
| 369 | |
| 370 | forboth(l, expr->largs, r, expr->rargs) |
| 371 | { |
| 372 | Node *le = (Node *) lfirst(l); |
| 373 | Node *re = (Node *) lfirst(r); |
| 374 | Oid coll; |
| 375 | |
| 376 | coll = select_common_collation(context->pstate, |
| 377 | list_make2(le, re), |
| 378 | true); |
| 379 | colls = lappend_oid(colls, coll); |
| 380 | } |
| 381 | expr->inputcollids = colls; |
| 382 | |
| 383 | /* |
| 384 | * Since the result is always boolean and therefore never has |
| 385 | * a collation, we can just stop here: this node has no impact |
| 386 | * on the collation of its parent. |
| 387 | */ |
| 388 | return false; /* done */ |
| 389 | } |
| 390 | case T_CoerceToDomain: |
| 391 | { |
| 392 | /* |
| 393 | * If the domain declaration included a non-default COLLATE |
| 394 | * spec, then use that collation as the output collation of |
| 395 | * the coercion. Otherwise allow the input collation to |
| 396 | * bubble up. (The input should be of the domain's base type, |
| 397 | * therefore we don't need to worry about it not being |
| 398 | * collatable when the domain is.) |
| 399 | */ |
| 400 | CoerceToDomain *expr = (CoerceToDomain *) node; |
| 401 | Oid typcollation = get_typcollation(expr->resulttype); |
| 402 | |
| 403 | /* ... but first, recurse */ |
| 404 | (void) expression_tree_walker(node, |
| 405 | assign_collations_walker, |
| 406 | (void *) &loccontext); |
| 407 | |
| 408 | if (OidIsValid(typcollation)) |
| 409 | { |
| 410 | /* Node's result type is collatable. */ |
| 411 | if (typcollation == DEFAULT_COLLATION_OID) |
| 412 | { |
| 413 | /* Collation state bubbles up from child. */ |
| 414 | collation = loccontext.collation; |
| 415 | strength = loccontext.strength; |
| 416 | location = loccontext.location; |
| 417 | } |
| 418 | else |
| 419 | { |
| 420 | /* Use domain's collation as an implicit choice. */ |
| 421 | collation = typcollation; |
| 422 | strength = COLLATE_IMPLICIT; |
| 423 | location = exprLocation(node); |
| 424 | } |
| 425 | } |
| 426 | else |
| 427 | { |
| 428 | /* Node's result type isn't collatable. */ |
| 429 | collation = InvalidOid; |
| 430 | strength = COLLATE_NONE; |
| 431 | location = -1; /* won't be used */ |
| 432 | } |
| 433 | |
| 434 | /* |
| 435 | * Save the state into the expression node. We know it |
| 436 | * doesn't care about input collation. |
| 437 | */ |
| 438 | if (strength == COLLATE_CONFLICT) |
| 439 | exprSetCollation(node, InvalidOid); |
| 440 | else |
| 441 | exprSetCollation(node, collation); |
| 442 | } |
| 443 | break; |
| 444 | case T_TargetEntry: |
| 445 | (void) expression_tree_walker(node, |
| 446 | assign_collations_walker, |
| 447 | (void *) &loccontext); |
| 448 | |
| 449 | /* |
| 450 | * TargetEntry can have only one child, and should bubble that |
| 451 | * state up to its parent. We can't use the general-case code |
| 452 | * below because exprType and friends don't work on TargetEntry. |
| 453 | */ |
| 454 | collation = loccontext.collation; |
| 455 | strength = loccontext.strength; |
| 456 | location = loccontext.location; |
| 457 | |
| 458 | /* |
| 459 | * Throw error if the collation is indeterminate for a TargetEntry |
| 460 | * that is a sort/group target. We prefer to do this now, instead |
| 461 | * of leaving the comparison functions to fail at runtime, because |
| 462 | * we can give a syntax error pointer to help locate the problem. |
| 463 | * There are some cases where there might not be a failure, for |
| 464 | * example if the planner chooses to use hash aggregation instead |
| 465 | * of sorting for grouping; but it seems better to predictably |
| 466 | * throw an error. (Compare transformSetOperationTree, which will |
| 467 | * throw error for indeterminate collation of set-op columns, even |
| 468 | * though the planner might be able to implement the set-op |
| 469 | * without sorting.) |
| 470 | */ |
| 471 | if (strength == COLLATE_CONFLICT && |
| 472 | ((TargetEntry *) node)->ressortgroupref != 0) |
| 473 | ereport(ERROR, |
| 474 | (errcode(ERRCODE_COLLATION_MISMATCH), |
| 475 | errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"" , |
| 476 | get_collation_name(loccontext.collation), |
| 477 | get_collation_name(loccontext.collation2)), |
| 478 | errhint("You can choose the collation by applying the COLLATE clause to one or both expressions." ), |
| 479 | parser_errposition(context->pstate, |
| 480 | loccontext.location2))); |
| 481 | break; |
| 482 | case T_InferenceElem: |
| 483 | case T_RangeTblRef: |
| 484 | case T_JoinExpr: |
| 485 | case T_FromExpr: |
| 486 | case T_OnConflictExpr: |
| 487 | case T_SortGroupClause: |
| 488 | (void) expression_tree_walker(node, |
| 489 | assign_collations_walker, |
| 490 | (void *) &loccontext); |
| 491 | |
| 492 | /* |
| 493 | * When we're invoked on a query's jointree, we don't need to do |
| 494 | * anything with join nodes except recurse through them to process |
| 495 | * WHERE/ON expressions. So just stop here. Likewise, we don't |
| 496 | * need to do anything when invoked on sort/group lists. |
| 497 | */ |
| 498 | return false; |
| 499 | case T_Query: |
| 500 | { |
| 501 | /* |
| 502 | * We get here when we're invoked on the Query belonging to a |
| 503 | * SubLink. Act as though the Query returns its first output |
| 504 | * column, which indeed is what it does for EXPR_SUBLINK and |
| 505 | * ARRAY_SUBLINK cases. In the cases where the SubLink |
| 506 | * returns boolean, this info will be ignored. Special case: |
| 507 | * in EXISTS, the Query might return no columns, in which case |
| 508 | * we need do nothing. |
| 509 | * |
| 510 | * We needn't recurse, since the Query is already processed. |
| 511 | */ |
| 512 | Query *qtree = (Query *) node; |
| 513 | TargetEntry *tent; |
| 514 | |
| 515 | if (qtree->targetList == NIL) |
| 516 | return false; |
| 517 | tent = linitial_node(TargetEntry, qtree->targetList); |
| 518 | if (tent->resjunk) |
| 519 | return false; |
| 520 | |
| 521 | collation = exprCollation((Node *) tent->expr); |
| 522 | /* collation doesn't change if it's converted to array */ |
| 523 | strength = COLLATE_IMPLICIT; |
| 524 | location = exprLocation((Node *) tent->expr); |
| 525 | } |
| 526 | break; |
| 527 | case T_List: |
| 528 | (void) expression_tree_walker(node, |
| 529 | assign_collations_walker, |
| 530 | (void *) &loccontext); |
| 531 | |
| 532 | /* |
| 533 | * When processing a list, collation state just bubbles up from |
| 534 | * the list elements. |
| 535 | */ |
| 536 | collation = loccontext.collation; |
| 537 | strength = loccontext.strength; |
| 538 | location = loccontext.location; |
| 539 | break; |
| 540 | |
| 541 | case T_Var: |
| 542 | case T_Const: |
| 543 | case T_Param: |
| 544 | case T_CoerceToDomainValue: |
| 545 | case T_CaseTestExpr: |
| 546 | case T_SetToDefault: |
| 547 | case T_CurrentOfExpr: |
| 548 | |
| 549 | /* |
| 550 | * General case for childless expression nodes. These should |
| 551 | * already have a collation assigned; it is not this function's |
| 552 | * responsibility to look into the catalogs for base-case |
| 553 | * information. |
| 554 | */ |
| 555 | collation = exprCollation(node); |
| 556 | |
| 557 | /* |
| 558 | * Note: in most cases, there will be an assigned collation |
| 559 | * whenever type_is_collatable(exprType(node)); but an exception |
| 560 | * occurs for a Var referencing a subquery output column for which |
| 561 | * a unique collation was not determinable. That may lead to a |
| 562 | * runtime failure if a collation-sensitive function is applied to |
| 563 | * the Var. |
| 564 | */ |
| 565 | |
| 566 | if (OidIsValid(collation)) |
| 567 | strength = COLLATE_IMPLICIT; |
| 568 | else |
| 569 | strength = COLLATE_NONE; |
| 570 | location = exprLocation(node); |
| 571 | break; |
| 572 | |
| 573 | default: |
| 574 | { |
| 575 | /* |
| 576 | * General case for most expression nodes with children. First |
| 577 | * recurse, then figure out what to assign to this node. |
| 578 | */ |
| 579 | Oid typcollation; |
| 580 | |
| 581 | /* |
| 582 | * For most node types, we want to treat all the child |
| 583 | * expressions alike; but there are a few exceptions, hence |
| 584 | * this inner switch. |
| 585 | */ |
| 586 | switch (nodeTag(node)) |
| 587 | { |
| 588 | case T_Aggref: |
| 589 | { |
| 590 | /* |
| 591 | * Aggref is messy enough that we give it its own |
| 592 | * function, in fact three of them. The FILTER |
| 593 | * clause is independent of the rest of the |
| 594 | * aggregate, however, so it can be processed |
| 595 | * separately. |
| 596 | */ |
| 597 | Aggref *aggref = (Aggref *) node; |
| 598 | |
| 599 | switch (aggref->aggkind) |
| 600 | { |
| 601 | case AGGKIND_NORMAL: |
| 602 | assign_aggregate_collations(aggref, |
| 603 | &loccontext); |
| 604 | break; |
| 605 | case AGGKIND_ORDERED_SET: |
| 606 | assign_ordered_set_collations(aggref, |
| 607 | &loccontext); |
| 608 | break; |
| 609 | case AGGKIND_HYPOTHETICAL: |
| 610 | assign_hypothetical_collations(aggref, |
| 611 | &loccontext); |
| 612 | break; |
| 613 | default: |
| 614 | elog(ERROR, "unrecognized aggkind: %d" , |
| 615 | (int) aggref->aggkind); |
| 616 | } |
| 617 | |
| 618 | assign_expr_collations(context->pstate, |
| 619 | (Node *) aggref->aggfilter); |
| 620 | } |
| 621 | break; |
| 622 | case T_WindowFunc: |
| 623 | { |
| 624 | /* |
| 625 | * WindowFunc requires special processing only for |
| 626 | * its aggfilter clause, as for aggregates. |
| 627 | */ |
| 628 | WindowFunc *wfunc = (WindowFunc *) node; |
| 629 | |
| 630 | (void) assign_collations_walker((Node *) wfunc->args, |
| 631 | &loccontext); |
| 632 | |
| 633 | assign_expr_collations(context->pstate, |
| 634 | (Node *) wfunc->aggfilter); |
| 635 | } |
| 636 | break; |
| 637 | case T_CaseExpr: |
| 638 | { |
| 639 | /* |
| 640 | * CaseExpr is a special case because we do not |
| 641 | * want to recurse into the test expression (if |
| 642 | * any). It was already marked with collations |
| 643 | * during transformCaseExpr, and furthermore its |
| 644 | * collation is not relevant to the result of the |
| 645 | * CASE --- only the output expressions are. |
| 646 | */ |
| 647 | CaseExpr *expr = (CaseExpr *) node; |
| 648 | ListCell *lc; |
| 649 | |
| 650 | foreach(lc, expr->args) |
| 651 | { |
| 652 | CaseWhen *when = lfirst_node(CaseWhen, lc); |
| 653 | |
| 654 | /* |
| 655 | * The condition expressions mustn't affect |
| 656 | * the CASE's result collation either; but |
| 657 | * since they are known to yield boolean, it's |
| 658 | * safe to recurse directly on them --- they |
| 659 | * won't change loccontext. |
| 660 | */ |
| 661 | (void) assign_collations_walker((Node *) when->expr, |
| 662 | &loccontext); |
| 663 | (void) assign_collations_walker((Node *) when->result, |
| 664 | &loccontext); |
| 665 | } |
| 666 | (void) assign_collations_walker((Node *) expr->defresult, |
| 667 | &loccontext); |
| 668 | } |
| 669 | break; |
| 670 | default: |
| 671 | |
| 672 | /* |
| 673 | * Normal case: all child expressions contribute |
| 674 | * equally to loccontext. |
| 675 | */ |
| 676 | (void) expression_tree_walker(node, |
| 677 | assign_collations_walker, |
| 678 | (void *) &loccontext); |
| 679 | break; |
| 680 | } |
| 681 | |
| 682 | /* |
| 683 | * Now figure out what collation to assign to this node. |
| 684 | */ |
| 685 | typcollation = get_typcollation(exprType(node)); |
| 686 | if (OidIsValid(typcollation)) |
| 687 | { |
| 688 | /* Node's result is collatable; what about its input? */ |
| 689 | if (loccontext.strength > COLLATE_NONE) |
| 690 | { |
| 691 | /* Collation state bubbles up from children. */ |
| 692 | collation = loccontext.collation; |
| 693 | strength = loccontext.strength; |
| 694 | location = loccontext.location; |
| 695 | } |
| 696 | else |
| 697 | { |
| 698 | /* |
| 699 | * Collatable output produced without any collatable |
| 700 | * input. Use the type's collation (which is usually |
| 701 | * DEFAULT_COLLATION_OID, but might be different for a |
| 702 | * domain). |
| 703 | */ |
| 704 | collation = typcollation; |
| 705 | strength = COLLATE_IMPLICIT; |
| 706 | location = exprLocation(node); |
| 707 | } |
| 708 | } |
| 709 | else |
| 710 | { |
| 711 | /* Node's result type isn't collatable. */ |
| 712 | collation = InvalidOid; |
| 713 | strength = COLLATE_NONE; |
| 714 | location = -1; /* won't be used */ |
| 715 | } |
| 716 | |
| 717 | /* |
| 718 | * Save the result collation into the expression node. If the |
| 719 | * state is COLLATE_CONFLICT, we'll set the collation to |
| 720 | * InvalidOid, which might result in an error at runtime. |
| 721 | */ |
| 722 | if (strength == COLLATE_CONFLICT) |
| 723 | exprSetCollation(node, InvalidOid); |
| 724 | else |
| 725 | exprSetCollation(node, collation); |
| 726 | |
| 727 | /* |
| 728 | * Likewise save the input collation, which is the one that |
| 729 | * any function called by this node should use. |
| 730 | */ |
| 731 | if (loccontext.strength == COLLATE_CONFLICT) |
| 732 | exprSetInputCollation(node, InvalidOid); |
| 733 | else |
| 734 | exprSetInputCollation(node, loccontext.collation); |
| 735 | } |
| 736 | break; |
| 737 | } |
| 738 | |
| 739 | /* |
| 740 | * Now, merge my information into my parent's state. |
| 741 | */ |
| 742 | merge_collation_state(collation, |
| 743 | strength, |
| 744 | location, |
| 745 | loccontext.collation2, |
| 746 | loccontext.location2, |
| 747 | context); |
| 748 | |
| 749 | return false; |
| 750 | } |
| 751 | |
| 752 | /* |
| 753 | * Merge collation state of a subexpression into the context for its parent. |
| 754 | */ |
| 755 | static void |
| 756 | merge_collation_state(Oid collation, |
| 757 | CollateStrength strength, |
| 758 | int location, |
| 759 | Oid collation2, |
| 760 | int location2, |
| 761 | assign_collations_context *context) |
| 762 | { |
| 763 | /* |
| 764 | * If the collation strength for this node is different from what's |
| 765 | * already in *context, then this node either dominates or is dominated by |
| 766 | * earlier siblings. |
| 767 | */ |
| 768 | if (strength > context->strength) |
| 769 | { |
| 770 | /* Override previous parent state */ |
| 771 | context->collation = collation; |
| 772 | context->strength = strength; |
| 773 | context->location = location; |
| 774 | /* Bubble up error info if applicable */ |
| 775 | if (strength == COLLATE_CONFLICT) |
| 776 | { |
| 777 | context->collation2 = collation2; |
| 778 | context->location2 = location2; |
| 779 | } |
| 780 | } |
| 781 | else if (strength == context->strength) |
| 782 | { |
| 783 | /* Merge, or detect error if there's a collation conflict */ |
| 784 | switch (strength) |
| 785 | { |
| 786 | case COLLATE_NONE: |
| 787 | /* Nothing + nothing is still nothing */ |
| 788 | break; |
| 789 | case COLLATE_IMPLICIT: |
| 790 | if (collation != context->collation) |
| 791 | { |
| 792 | /* |
| 793 | * Non-default implicit collation always beats default. |
| 794 | */ |
| 795 | if (context->collation == DEFAULT_COLLATION_OID) |
| 796 | { |
| 797 | /* Override previous parent state */ |
| 798 | context->collation = collation; |
| 799 | context->strength = strength; |
| 800 | context->location = location; |
| 801 | } |
| 802 | else if (collation != DEFAULT_COLLATION_OID) |
| 803 | { |
| 804 | /* |
| 805 | * Oops, we have a conflict. We cannot throw error |
| 806 | * here, since the conflict could be resolved by a |
| 807 | * later sibling CollateExpr, or the parent might not |
| 808 | * care about collation anyway. Return enough info to |
| 809 | * throw the error later, if needed. |
| 810 | */ |
| 811 | context->strength = COLLATE_CONFLICT; |
| 812 | context->collation2 = collation; |
| 813 | context->location2 = location; |
| 814 | } |
| 815 | } |
| 816 | break; |
| 817 | case COLLATE_CONFLICT: |
| 818 | /* We're still conflicted ... */ |
| 819 | break; |
| 820 | case COLLATE_EXPLICIT: |
| 821 | if (collation != context->collation) |
| 822 | { |
| 823 | /* |
| 824 | * Oops, we have a conflict of explicit COLLATE clauses. |
| 825 | * Here we choose to throw error immediately; that is what |
| 826 | * the SQL standard says to do, and there's no good reason |
| 827 | * to be less strict. |
| 828 | */ |
| 829 | ereport(ERROR, |
| 830 | (errcode(ERRCODE_COLLATION_MISMATCH), |
| 831 | errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"" , |
| 832 | get_collation_name(context->collation), |
| 833 | get_collation_name(collation)), |
| 834 | parser_errposition(context->pstate, location))); |
| 835 | } |
| 836 | break; |
| 837 | } |
| 838 | } |
| 839 | } |
| 840 | |
| 841 | /* |
| 842 | * Aggref is a special case because expressions used only for ordering |
| 843 | * shouldn't be taken to conflict with each other or with regular args, |
| 844 | * indeed shouldn't affect the aggregate's result collation at all. |
| 845 | * We handle this by applying assign_expr_collations() to them rather than |
| 846 | * passing down our loccontext. |
| 847 | * |
| 848 | * Note that we recurse to each TargetEntry, not directly to its contained |
| 849 | * expression, so that the case above for T_TargetEntry will complain if we |
| 850 | * can't resolve a collation for an ORDER BY item (whether or not it is also |
| 851 | * a normal aggregate arg). |
| 852 | * |
| 853 | * We need not recurse into the aggorder or aggdistinct lists, because those |
| 854 | * contain only SortGroupClause nodes which we need not process. |
| 855 | */ |
| 856 | static void |
| 857 | assign_aggregate_collations(Aggref *aggref, |
| 858 | assign_collations_context *loccontext) |
| 859 | { |
| 860 | ListCell *lc; |
| 861 | |
| 862 | /* Plain aggregates have no direct args */ |
| 863 | Assert(aggref->aggdirectargs == NIL); |
| 864 | |
| 865 | /* Process aggregated args, holding resjunk ones at arm's length */ |
| 866 | foreach(lc, aggref->args) |
| 867 | { |
| 868 | TargetEntry *tle = lfirst_node(TargetEntry, lc); |
| 869 | |
| 870 | if (tle->resjunk) |
| 871 | assign_expr_collations(loccontext->pstate, (Node *) tle); |
| 872 | else |
| 873 | (void) assign_collations_walker((Node *) tle, loccontext); |
| 874 | } |
| 875 | } |
| 876 | |
| 877 | /* |
| 878 | * For ordered-set aggregates, it's somewhat unclear how best to proceed. |
| 879 | * The spec-defined inverse distribution functions have only one sort column |
| 880 | * and don't return collatable types, but this is clearly too restrictive in |
| 881 | * the general case. Our solution is to consider that the aggregate's direct |
| 882 | * arguments contribute normally to determination of the aggregate's own |
| 883 | * collation, while aggregated arguments contribute only when the aggregate |
| 884 | * is designed to have exactly one aggregated argument (i.e., it has a single |
| 885 | * aggregated argument and is non-variadic). If it can have more than one |
| 886 | * aggregated argument, we process the aggregated arguments as independent |
| 887 | * sort columns. This avoids throwing error for something like |
| 888 | * agg(...) within group (order by x collate "foo", y collate "bar") |
| 889 | * while also guaranteeing that variadic aggregates don't change in behavior |
| 890 | * depending on how many sort columns a particular call happens to have. |
| 891 | * |
| 892 | * Otherwise this is much like the plain-aggregate case. |
| 893 | */ |
| 894 | static void |
| 895 | assign_ordered_set_collations(Aggref *aggref, |
| 896 | assign_collations_context *loccontext) |
| 897 | { |
| 898 | bool merge_sort_collations; |
| 899 | ListCell *lc; |
| 900 | |
| 901 | /* Merge sort collations to parent only if there can be only one */ |
| 902 | merge_sort_collations = (list_length(aggref->args) == 1 && |
| 903 | get_func_variadictype(aggref->aggfnoid) == InvalidOid); |
| 904 | |
| 905 | /* Direct args, if any, are normal children of the Aggref node */ |
| 906 | (void) assign_collations_walker((Node *) aggref->aggdirectargs, |
| 907 | loccontext); |
| 908 | |
| 909 | /* Process aggregated args appropriately */ |
| 910 | foreach(lc, aggref->args) |
| 911 | { |
| 912 | TargetEntry *tle = lfirst_node(TargetEntry, lc); |
| 913 | |
| 914 | if (merge_sort_collations) |
| 915 | (void) assign_collations_walker((Node *) tle, loccontext); |
| 916 | else |
| 917 | assign_expr_collations(loccontext->pstate, (Node *) tle); |
| 918 | } |
| 919 | } |
| 920 | |
| 921 | /* |
| 922 | * Hypothetical-set aggregates are even more special: per spec, we need to |
| 923 | * unify the collations of each pair of hypothetical and aggregated args. |
| 924 | * And we need to force the choice of collation down into the sort column |
| 925 | * to ensure that the sort happens with the chosen collation. Other than |
| 926 | * that, the behavior is like regular ordered-set aggregates. Note that |
| 927 | * hypothetical direct arguments contribute to the aggregate collation |
| 928 | * only when their partner aggregated arguments do. |
| 929 | */ |
| 930 | static void |
| 931 | assign_hypothetical_collations(Aggref *aggref, |
| 932 | assign_collations_context *loccontext) |
| 933 | { |
| 934 | ListCell *h_cell = list_head(aggref->aggdirectargs); |
| 935 | ListCell *s_cell = list_head(aggref->args); |
| 936 | bool merge_sort_collations; |
| 937 | int ; |
| 938 | |
| 939 | /* Merge sort collations to parent only if there can be only one */ |
| 940 | merge_sort_collations = (list_length(aggref->args) == 1 && |
| 941 | get_func_variadictype(aggref->aggfnoid) == InvalidOid); |
| 942 | |
| 943 | /* Process any non-hypothetical direct args */ |
| 944 | extra_args = list_length(aggref->aggdirectargs) - list_length(aggref->args); |
| 945 | Assert(extra_args >= 0); |
| 946 | while (extra_args-- > 0) |
| 947 | { |
| 948 | (void) assign_collations_walker((Node *) lfirst(h_cell), loccontext); |
| 949 | h_cell = lnext(h_cell); |
| 950 | } |
| 951 | |
| 952 | /* Scan hypothetical args and aggregated args in parallel */ |
| 953 | while (h_cell && s_cell) |
| 954 | { |
| 955 | Node *h_arg = (Node *) lfirst(h_cell); |
| 956 | TargetEntry *s_tle = (TargetEntry *) lfirst(s_cell); |
| 957 | assign_collations_context paircontext; |
| 958 | |
| 959 | /* |
| 960 | * Assign collations internally in this pair of expressions, then |
| 961 | * choose a common collation for them. This should match |
| 962 | * select_common_collation(), but we can't use that function as-is |
| 963 | * because we need access to the whole collation state so we can |
| 964 | * bubble it up to the aggregate function's level. |
| 965 | */ |
| 966 | paircontext.pstate = loccontext->pstate; |
| 967 | paircontext.collation = InvalidOid; |
| 968 | paircontext.strength = COLLATE_NONE; |
| 969 | paircontext.location = -1; |
| 970 | /* Set these fields just to suppress uninitialized-value warnings: */ |
| 971 | paircontext.collation2 = InvalidOid; |
| 972 | paircontext.location2 = -1; |
| 973 | |
| 974 | (void) assign_collations_walker(h_arg, &paircontext); |
| 975 | (void) assign_collations_walker((Node *) s_tle->expr, &paircontext); |
| 976 | |
| 977 | /* deal with collation conflict */ |
| 978 | if (paircontext.strength == COLLATE_CONFLICT) |
| 979 | ereport(ERROR, |
| 980 | (errcode(ERRCODE_COLLATION_MISMATCH), |
| 981 | errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"" , |
| 982 | get_collation_name(paircontext.collation), |
| 983 | get_collation_name(paircontext.collation2)), |
| 984 | errhint("You can choose the collation by applying the COLLATE clause to one or both expressions." ), |
| 985 | parser_errposition(paircontext.pstate, |
| 986 | paircontext.location2))); |
| 987 | |
| 988 | /* |
| 989 | * At this point paircontext.collation can be InvalidOid only if the |
| 990 | * type is not collatable; no need to do anything in that case. If we |
| 991 | * do have to change the sort column's collation, do it by inserting a |
| 992 | * RelabelType node into the sort column TLE. |
| 993 | * |
| 994 | * XXX This is pretty grotty for a couple of reasons: |
| 995 | * assign_collations_walker isn't supposed to be changing the |
| 996 | * expression structure like this, and a parse-time change of |
| 997 | * collation ought to be signaled by a CollateExpr not a RelabelType |
| 998 | * (the use of RelabelType for collation marking is supposed to be a |
| 999 | * planner/executor thing only). But we have no better alternative. |
| 1000 | * In particular, injecting a CollateExpr could result in the |
| 1001 | * expression being interpreted differently after dump/reload, since |
| 1002 | * we might be effectively promoting an implicit collation to |
| 1003 | * explicit. This kluge is relying on ruleutils.c not printing a |
| 1004 | * COLLATE clause for a RelabelType, and probably on some other |
| 1005 | * fragile behaviors. |
| 1006 | */ |
| 1007 | if (OidIsValid(paircontext.collation) && |
| 1008 | paircontext.collation != exprCollation((Node *) s_tle->expr)) |
| 1009 | { |
| 1010 | s_tle->expr = (Expr *) |
| 1011 | makeRelabelType(s_tle->expr, |
| 1012 | exprType((Node *) s_tle->expr), |
| 1013 | exprTypmod((Node *) s_tle->expr), |
| 1014 | paircontext.collation, |
| 1015 | COERCE_IMPLICIT_CAST); |
| 1016 | } |
| 1017 | |
| 1018 | /* |
| 1019 | * If appropriate, merge this column's collation state up to the |
| 1020 | * aggregate function. |
| 1021 | */ |
| 1022 | if (merge_sort_collations) |
| 1023 | merge_collation_state(paircontext.collation, |
| 1024 | paircontext.strength, |
| 1025 | paircontext.location, |
| 1026 | paircontext.collation2, |
| 1027 | paircontext.location2, |
| 1028 | loccontext); |
| 1029 | |
| 1030 | h_cell = lnext(h_cell); |
| 1031 | s_cell = lnext(s_cell); |
| 1032 | } |
| 1033 | Assert(h_cell == NULL && s_cell == NULL); |
| 1034 | } |
| 1035 | |