1 | /*------------------------------------------------------------------------- |
2 | * |
3 | * parse_collate.c |
4 | * Routines for assigning collation information. |
5 | * |
6 | * We choose to handle collation analysis in a post-pass over the output |
7 | * of expression parse analysis. This is because we need more state to |
8 | * perform this processing than is needed in the finished tree. If we |
9 | * did it on-the-fly while building the tree, all that state would have |
10 | * to be kept in expression node trees permanently. This way, the extra |
11 | * storage is just local variables in this recursive routine. |
12 | * |
13 | * The info that is actually saved in the finished tree is: |
14 | * 1. The output collation of each expression node, or InvalidOid if it |
15 | * returns a noncollatable data type. This can also be InvalidOid if the |
16 | * result type is collatable but the collation is indeterminate. |
17 | * 2. The collation to be used in executing each function. InvalidOid means |
18 | * that there are no collatable inputs or their collation is indeterminate. |
19 | * This value is only stored in node types that might call collation-using |
20 | * functions. |
21 | * |
22 | * You might think we could get away with storing only one collation per |
23 | * node, but the two concepts really need to be kept distinct. Otherwise |
24 | * it's too confusing when a function produces a collatable output type but |
25 | * has no collatable inputs or produces noncollatable output from collatable |
26 | * inputs. |
27 | * |
28 | * Cases with indeterminate collation might result in an error being thrown |
29 | * at runtime. If we knew exactly which functions require collation |
30 | * information, we could throw those errors at parse time instead. |
31 | * |
32 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
33 | * Portions Copyright (c) 1994, Regents of the University of California |
34 | * |
35 | * |
36 | * IDENTIFICATION |
37 | * src/backend/parser/parse_collate.c |
38 | * |
39 | *------------------------------------------------------------------------- |
40 | */ |
41 | #include "postgres.h" |
42 | |
43 | #include "catalog/pg_aggregate.h" |
44 | #include "catalog/pg_collation.h" |
45 | #include "nodes/makefuncs.h" |
46 | #include "nodes/nodeFuncs.h" |
47 | #include "parser/parse_collate.h" |
48 | #include "utils/lsyscache.h" |
49 | |
50 | |
51 | /* |
52 | * Collation strength (the SQL standard calls this "derivation"). Order is |
53 | * chosen to allow comparisons to work usefully. Note: the standard doesn't |
54 | * seem to distinguish between NONE and CONFLICT. |
55 | */ |
56 | typedef enum |
57 | { |
58 | COLLATE_NONE, /* expression is of a noncollatable datatype */ |
59 | COLLATE_IMPLICIT, /* collation was derived implicitly */ |
60 | COLLATE_CONFLICT, /* we had a conflict of implicit collations */ |
61 | COLLATE_EXPLICIT /* collation was derived explicitly */ |
62 | } CollateStrength; |
63 | |
64 | typedef struct |
65 | { |
66 | ParseState *pstate; /* parse state (for error reporting) */ |
67 | Oid collation; /* OID of current collation, if any */ |
68 | CollateStrength strength; /* strength of current collation choice */ |
69 | int location; /* location of expr that set collation */ |
70 | /* Remaining fields are only valid when strength == COLLATE_CONFLICT */ |
71 | Oid collation2; /* OID of conflicting collation */ |
72 | int location2; /* location of expr that set collation2 */ |
73 | } assign_collations_context; |
74 | |
75 | static bool assign_query_collations_walker(Node *node, ParseState *pstate); |
76 | static bool assign_collations_walker(Node *node, |
77 | assign_collations_context *context); |
78 | static void merge_collation_state(Oid collation, |
79 | CollateStrength strength, |
80 | int location, |
81 | Oid collation2, |
82 | int location2, |
83 | assign_collations_context *context); |
84 | static void assign_aggregate_collations(Aggref *aggref, |
85 | assign_collations_context *loccontext); |
86 | static void assign_ordered_set_collations(Aggref *aggref, |
87 | assign_collations_context *loccontext); |
88 | static void assign_hypothetical_collations(Aggref *aggref, |
89 | assign_collations_context *loccontext); |
90 | |
91 | |
92 | /* |
93 | * assign_query_collations() |
94 | * Mark all expressions in the given Query with collation information. |
95 | * |
96 | * This should be applied to each Query after completion of parse analysis |
97 | * for expressions. Note that we do not recurse into sub-Queries, since |
98 | * those should have been processed when built. |
99 | */ |
100 | void |
101 | assign_query_collations(ParseState *pstate, Query *query) |
102 | { |
103 | /* |
104 | * We just use query_tree_walker() to visit all the contained expressions. |
105 | * We can skip the rangetable and CTE subqueries, though, since RTEs and |
106 | * subqueries had better have been processed already (else Vars referring |
107 | * to them would not get created with the right collation). |
108 | */ |
109 | (void) query_tree_walker(query, |
110 | assign_query_collations_walker, |
111 | (void *) pstate, |
112 | QTW_IGNORE_RANGE_TABLE | |
113 | QTW_IGNORE_CTE_SUBQUERIES); |
114 | } |
115 | |
116 | /* |
117 | * Walker for assign_query_collations |
118 | * |
119 | * Each expression found by query_tree_walker is processed independently. |
120 | * Note that query_tree_walker may pass us a whole List, such as the |
121 | * targetlist, in which case each subexpression must be processed |
122 | * independently --- we don't want to bleat if two different targetentries |
123 | * have different collations. |
124 | */ |
125 | static bool |
126 | assign_query_collations_walker(Node *node, ParseState *pstate) |
127 | { |
128 | /* Need do nothing for empty subexpressions */ |
129 | if (node == NULL) |
130 | return false; |
131 | |
132 | /* |
133 | * We don't want to recurse into a set-operations tree; it's already been |
134 | * fully processed in transformSetOperationStmt. |
135 | */ |
136 | if (IsA(node, SetOperationStmt)) |
137 | return false; |
138 | |
139 | if (IsA(node, List)) |
140 | assign_list_collations(pstate, (List *) node); |
141 | else |
142 | assign_expr_collations(pstate, node); |
143 | |
144 | return false; |
145 | } |
146 | |
147 | /* |
148 | * assign_list_collations() |
149 | * Mark all nodes in the list of expressions with collation information. |
150 | * |
151 | * The list member expressions are processed independently; they do not have |
152 | * to share a common collation. |
153 | */ |
154 | void |
155 | assign_list_collations(ParseState *pstate, List *exprs) |
156 | { |
157 | ListCell *lc; |
158 | |
159 | foreach(lc, exprs) |
160 | { |
161 | Node *node = (Node *) lfirst(lc); |
162 | |
163 | assign_expr_collations(pstate, node); |
164 | } |
165 | } |
166 | |
167 | /* |
168 | * assign_expr_collations() |
169 | * Mark all nodes in the given expression tree with collation information. |
170 | * |
171 | * This is exported for the benefit of various utility commands that process |
172 | * expressions without building a complete Query. It should be applied after |
173 | * calling transformExpr() plus any expression-modifying operations such as |
174 | * coerce_to_boolean(). |
175 | */ |
176 | void |
177 | assign_expr_collations(ParseState *pstate, Node *expr) |
178 | { |
179 | assign_collations_context context; |
180 | |
181 | /* initialize context for tree walk */ |
182 | context.pstate = pstate; |
183 | context.collation = InvalidOid; |
184 | context.strength = COLLATE_NONE; |
185 | context.location = -1; |
186 | |
187 | /* and away we go */ |
188 | (void) assign_collations_walker(expr, &context); |
189 | } |
190 | |
191 | /* |
192 | * select_common_collation() |
193 | * Identify a common collation for a list of expressions. |
194 | * |
195 | * The expressions should all return the same datatype, else this is not |
196 | * terribly meaningful. |
197 | * |
198 | * none_ok means that it is permitted to return InvalidOid, indicating that |
199 | * no common collation could be identified, even for collatable datatypes. |
200 | * Otherwise, an error is thrown for conflict of implicit collations. |
201 | * |
202 | * In theory, none_ok = true reflects the rules of SQL standard clause "Result |
203 | * of data type combinations", none_ok = false reflects the rules of clause |
204 | * "Collation determination" (in some cases invoked via "Grouping |
205 | * operations"). |
206 | */ |
207 | Oid |
208 | select_common_collation(ParseState *pstate, List *exprs, bool none_ok) |
209 | { |
210 | assign_collations_context context; |
211 | |
212 | /* initialize context for tree walk */ |
213 | context.pstate = pstate; |
214 | context.collation = InvalidOid; |
215 | context.strength = COLLATE_NONE; |
216 | context.location = -1; |
217 | |
218 | /* and away we go */ |
219 | (void) assign_collations_walker((Node *) exprs, &context); |
220 | |
221 | /* deal with collation conflict */ |
222 | if (context.strength == COLLATE_CONFLICT) |
223 | { |
224 | if (none_ok) |
225 | return InvalidOid; |
226 | ereport(ERROR, |
227 | (errcode(ERRCODE_COLLATION_MISMATCH), |
228 | errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"" , |
229 | get_collation_name(context.collation), |
230 | get_collation_name(context.collation2)), |
231 | errhint("You can choose the collation by applying the COLLATE clause to one or both expressions." ), |
232 | parser_errposition(context.pstate, context.location2))); |
233 | } |
234 | |
235 | /* |
236 | * Note: if strength is still COLLATE_NONE, we'll return InvalidOid, but |
237 | * that's okay because it must mean none of the expressions returned |
238 | * collatable datatypes. |
239 | */ |
240 | return context.collation; |
241 | } |
242 | |
243 | /* |
244 | * assign_collations_walker() |
245 | * Recursive guts of collation processing. |
246 | * |
247 | * Nodes with no children (eg, Vars, Consts, Params) must have been marked |
248 | * when built. All upper-level nodes are marked here. |
249 | * |
250 | * Note: if this is invoked directly on a List, it will attempt to infer a |
251 | * common collation for all the list members. In particular, it will throw |
252 | * error if there are conflicting explicit collations for different members. |
253 | */ |
254 | static bool |
255 | assign_collations_walker(Node *node, assign_collations_context *context) |
256 | { |
257 | assign_collations_context loccontext; |
258 | Oid collation; |
259 | CollateStrength strength; |
260 | int location; |
261 | |
262 | /* Need do nothing for empty subexpressions */ |
263 | if (node == NULL) |
264 | return false; |
265 | |
266 | /* |
267 | * Prepare for recursion. For most node types, though not all, the first |
268 | * thing we do is recurse to process all nodes below this one. Each level |
269 | * of the tree has its own local context. |
270 | */ |
271 | loccontext.pstate = context->pstate; |
272 | loccontext.collation = InvalidOid; |
273 | loccontext.strength = COLLATE_NONE; |
274 | loccontext.location = -1; |
275 | /* Set these fields just to suppress uninitialized-value warnings: */ |
276 | loccontext.collation2 = InvalidOid; |
277 | loccontext.location2 = -1; |
278 | |
279 | /* |
280 | * Recurse if appropriate, then determine the collation for this node. |
281 | * |
282 | * Note: the general cases are at the bottom of the switch, after various |
283 | * special cases. |
284 | */ |
285 | switch (nodeTag(node)) |
286 | { |
287 | case T_CollateExpr: |
288 | { |
289 | /* |
290 | * COLLATE sets an explicitly derived collation, regardless of |
291 | * what the child state is. But we must recurse to set up |
292 | * collation info below here. |
293 | */ |
294 | CollateExpr *expr = (CollateExpr *) node; |
295 | |
296 | (void) expression_tree_walker(node, |
297 | assign_collations_walker, |
298 | (void *) &loccontext); |
299 | |
300 | collation = expr->collOid; |
301 | Assert(OidIsValid(collation)); |
302 | strength = COLLATE_EXPLICIT; |
303 | location = expr->location; |
304 | } |
305 | break; |
306 | case T_FieldSelect: |
307 | { |
308 | /* |
309 | * For FieldSelect, the result has the field's declared |
310 | * collation, independently of what happened in the arguments. |
311 | * (The immediate argument must be composite and thus not |
312 | * collatable, anyhow.) The field's collation was already |
313 | * looked up and saved in the node. |
314 | */ |
315 | FieldSelect *expr = (FieldSelect *) node; |
316 | |
317 | /* ... but first, recurse */ |
318 | (void) expression_tree_walker(node, |
319 | assign_collations_walker, |
320 | (void *) &loccontext); |
321 | |
322 | if (OidIsValid(expr->resultcollid)) |
323 | { |
324 | /* Node's result type is collatable. */ |
325 | /* Pass up field's collation as an implicit choice. */ |
326 | collation = expr->resultcollid; |
327 | strength = COLLATE_IMPLICIT; |
328 | location = exprLocation(node); |
329 | } |
330 | else |
331 | { |
332 | /* Node's result type isn't collatable. */ |
333 | collation = InvalidOid; |
334 | strength = COLLATE_NONE; |
335 | location = -1; /* won't be used */ |
336 | } |
337 | } |
338 | break; |
339 | case T_RowExpr: |
340 | { |
341 | /* |
342 | * RowExpr is a special case because the subexpressions are |
343 | * independent: we don't want to complain if some of them have |
344 | * incompatible explicit collations. |
345 | */ |
346 | RowExpr *expr = (RowExpr *) node; |
347 | |
348 | assign_list_collations(context->pstate, expr->args); |
349 | |
350 | /* |
351 | * Since the result is always composite and therefore never |
352 | * has a collation, we can just stop here: this node has no |
353 | * impact on the collation of its parent. |
354 | */ |
355 | return false; /* done */ |
356 | } |
357 | case T_RowCompareExpr: |
358 | { |
359 | /* |
360 | * For RowCompare, we have to find the common collation of |
361 | * each pair of input columns and build a list. If we can't |
362 | * find a common collation, we just put InvalidOid into the |
363 | * list, which may or may not cause an error at runtime. |
364 | */ |
365 | RowCompareExpr *expr = (RowCompareExpr *) node; |
366 | List *colls = NIL; |
367 | ListCell *l; |
368 | ListCell *r; |
369 | |
370 | forboth(l, expr->largs, r, expr->rargs) |
371 | { |
372 | Node *le = (Node *) lfirst(l); |
373 | Node *re = (Node *) lfirst(r); |
374 | Oid coll; |
375 | |
376 | coll = select_common_collation(context->pstate, |
377 | list_make2(le, re), |
378 | true); |
379 | colls = lappend_oid(colls, coll); |
380 | } |
381 | expr->inputcollids = colls; |
382 | |
383 | /* |
384 | * Since the result is always boolean and therefore never has |
385 | * a collation, we can just stop here: this node has no impact |
386 | * on the collation of its parent. |
387 | */ |
388 | return false; /* done */ |
389 | } |
390 | case T_CoerceToDomain: |
391 | { |
392 | /* |
393 | * If the domain declaration included a non-default COLLATE |
394 | * spec, then use that collation as the output collation of |
395 | * the coercion. Otherwise allow the input collation to |
396 | * bubble up. (The input should be of the domain's base type, |
397 | * therefore we don't need to worry about it not being |
398 | * collatable when the domain is.) |
399 | */ |
400 | CoerceToDomain *expr = (CoerceToDomain *) node; |
401 | Oid typcollation = get_typcollation(expr->resulttype); |
402 | |
403 | /* ... but first, recurse */ |
404 | (void) expression_tree_walker(node, |
405 | assign_collations_walker, |
406 | (void *) &loccontext); |
407 | |
408 | if (OidIsValid(typcollation)) |
409 | { |
410 | /* Node's result type is collatable. */ |
411 | if (typcollation == DEFAULT_COLLATION_OID) |
412 | { |
413 | /* Collation state bubbles up from child. */ |
414 | collation = loccontext.collation; |
415 | strength = loccontext.strength; |
416 | location = loccontext.location; |
417 | } |
418 | else |
419 | { |
420 | /* Use domain's collation as an implicit choice. */ |
421 | collation = typcollation; |
422 | strength = COLLATE_IMPLICIT; |
423 | location = exprLocation(node); |
424 | } |
425 | } |
426 | else |
427 | { |
428 | /* Node's result type isn't collatable. */ |
429 | collation = InvalidOid; |
430 | strength = COLLATE_NONE; |
431 | location = -1; /* won't be used */ |
432 | } |
433 | |
434 | /* |
435 | * Save the state into the expression node. We know it |
436 | * doesn't care about input collation. |
437 | */ |
438 | if (strength == COLLATE_CONFLICT) |
439 | exprSetCollation(node, InvalidOid); |
440 | else |
441 | exprSetCollation(node, collation); |
442 | } |
443 | break; |
444 | case T_TargetEntry: |
445 | (void) expression_tree_walker(node, |
446 | assign_collations_walker, |
447 | (void *) &loccontext); |
448 | |
449 | /* |
450 | * TargetEntry can have only one child, and should bubble that |
451 | * state up to its parent. We can't use the general-case code |
452 | * below because exprType and friends don't work on TargetEntry. |
453 | */ |
454 | collation = loccontext.collation; |
455 | strength = loccontext.strength; |
456 | location = loccontext.location; |
457 | |
458 | /* |
459 | * Throw error if the collation is indeterminate for a TargetEntry |
460 | * that is a sort/group target. We prefer to do this now, instead |
461 | * of leaving the comparison functions to fail at runtime, because |
462 | * we can give a syntax error pointer to help locate the problem. |
463 | * There are some cases where there might not be a failure, for |
464 | * example if the planner chooses to use hash aggregation instead |
465 | * of sorting for grouping; but it seems better to predictably |
466 | * throw an error. (Compare transformSetOperationTree, which will |
467 | * throw error for indeterminate collation of set-op columns, even |
468 | * though the planner might be able to implement the set-op |
469 | * without sorting.) |
470 | */ |
471 | if (strength == COLLATE_CONFLICT && |
472 | ((TargetEntry *) node)->ressortgroupref != 0) |
473 | ereport(ERROR, |
474 | (errcode(ERRCODE_COLLATION_MISMATCH), |
475 | errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"" , |
476 | get_collation_name(loccontext.collation), |
477 | get_collation_name(loccontext.collation2)), |
478 | errhint("You can choose the collation by applying the COLLATE clause to one or both expressions." ), |
479 | parser_errposition(context->pstate, |
480 | loccontext.location2))); |
481 | break; |
482 | case T_InferenceElem: |
483 | case T_RangeTblRef: |
484 | case T_JoinExpr: |
485 | case T_FromExpr: |
486 | case T_OnConflictExpr: |
487 | case T_SortGroupClause: |
488 | (void) expression_tree_walker(node, |
489 | assign_collations_walker, |
490 | (void *) &loccontext); |
491 | |
492 | /* |
493 | * When we're invoked on a query's jointree, we don't need to do |
494 | * anything with join nodes except recurse through them to process |
495 | * WHERE/ON expressions. So just stop here. Likewise, we don't |
496 | * need to do anything when invoked on sort/group lists. |
497 | */ |
498 | return false; |
499 | case T_Query: |
500 | { |
501 | /* |
502 | * We get here when we're invoked on the Query belonging to a |
503 | * SubLink. Act as though the Query returns its first output |
504 | * column, which indeed is what it does for EXPR_SUBLINK and |
505 | * ARRAY_SUBLINK cases. In the cases where the SubLink |
506 | * returns boolean, this info will be ignored. Special case: |
507 | * in EXISTS, the Query might return no columns, in which case |
508 | * we need do nothing. |
509 | * |
510 | * We needn't recurse, since the Query is already processed. |
511 | */ |
512 | Query *qtree = (Query *) node; |
513 | TargetEntry *tent; |
514 | |
515 | if (qtree->targetList == NIL) |
516 | return false; |
517 | tent = linitial_node(TargetEntry, qtree->targetList); |
518 | if (tent->resjunk) |
519 | return false; |
520 | |
521 | collation = exprCollation((Node *) tent->expr); |
522 | /* collation doesn't change if it's converted to array */ |
523 | strength = COLLATE_IMPLICIT; |
524 | location = exprLocation((Node *) tent->expr); |
525 | } |
526 | break; |
527 | case T_List: |
528 | (void) expression_tree_walker(node, |
529 | assign_collations_walker, |
530 | (void *) &loccontext); |
531 | |
532 | /* |
533 | * When processing a list, collation state just bubbles up from |
534 | * the list elements. |
535 | */ |
536 | collation = loccontext.collation; |
537 | strength = loccontext.strength; |
538 | location = loccontext.location; |
539 | break; |
540 | |
541 | case T_Var: |
542 | case T_Const: |
543 | case T_Param: |
544 | case T_CoerceToDomainValue: |
545 | case T_CaseTestExpr: |
546 | case T_SetToDefault: |
547 | case T_CurrentOfExpr: |
548 | |
549 | /* |
550 | * General case for childless expression nodes. These should |
551 | * already have a collation assigned; it is not this function's |
552 | * responsibility to look into the catalogs for base-case |
553 | * information. |
554 | */ |
555 | collation = exprCollation(node); |
556 | |
557 | /* |
558 | * Note: in most cases, there will be an assigned collation |
559 | * whenever type_is_collatable(exprType(node)); but an exception |
560 | * occurs for a Var referencing a subquery output column for which |
561 | * a unique collation was not determinable. That may lead to a |
562 | * runtime failure if a collation-sensitive function is applied to |
563 | * the Var. |
564 | */ |
565 | |
566 | if (OidIsValid(collation)) |
567 | strength = COLLATE_IMPLICIT; |
568 | else |
569 | strength = COLLATE_NONE; |
570 | location = exprLocation(node); |
571 | break; |
572 | |
573 | default: |
574 | { |
575 | /* |
576 | * General case for most expression nodes with children. First |
577 | * recurse, then figure out what to assign to this node. |
578 | */ |
579 | Oid typcollation; |
580 | |
581 | /* |
582 | * For most node types, we want to treat all the child |
583 | * expressions alike; but there are a few exceptions, hence |
584 | * this inner switch. |
585 | */ |
586 | switch (nodeTag(node)) |
587 | { |
588 | case T_Aggref: |
589 | { |
590 | /* |
591 | * Aggref is messy enough that we give it its own |
592 | * function, in fact three of them. The FILTER |
593 | * clause is independent of the rest of the |
594 | * aggregate, however, so it can be processed |
595 | * separately. |
596 | */ |
597 | Aggref *aggref = (Aggref *) node; |
598 | |
599 | switch (aggref->aggkind) |
600 | { |
601 | case AGGKIND_NORMAL: |
602 | assign_aggregate_collations(aggref, |
603 | &loccontext); |
604 | break; |
605 | case AGGKIND_ORDERED_SET: |
606 | assign_ordered_set_collations(aggref, |
607 | &loccontext); |
608 | break; |
609 | case AGGKIND_HYPOTHETICAL: |
610 | assign_hypothetical_collations(aggref, |
611 | &loccontext); |
612 | break; |
613 | default: |
614 | elog(ERROR, "unrecognized aggkind: %d" , |
615 | (int) aggref->aggkind); |
616 | } |
617 | |
618 | assign_expr_collations(context->pstate, |
619 | (Node *) aggref->aggfilter); |
620 | } |
621 | break; |
622 | case T_WindowFunc: |
623 | { |
624 | /* |
625 | * WindowFunc requires special processing only for |
626 | * its aggfilter clause, as for aggregates. |
627 | */ |
628 | WindowFunc *wfunc = (WindowFunc *) node; |
629 | |
630 | (void) assign_collations_walker((Node *) wfunc->args, |
631 | &loccontext); |
632 | |
633 | assign_expr_collations(context->pstate, |
634 | (Node *) wfunc->aggfilter); |
635 | } |
636 | break; |
637 | case T_CaseExpr: |
638 | { |
639 | /* |
640 | * CaseExpr is a special case because we do not |
641 | * want to recurse into the test expression (if |
642 | * any). It was already marked with collations |
643 | * during transformCaseExpr, and furthermore its |
644 | * collation is not relevant to the result of the |
645 | * CASE --- only the output expressions are. |
646 | */ |
647 | CaseExpr *expr = (CaseExpr *) node; |
648 | ListCell *lc; |
649 | |
650 | foreach(lc, expr->args) |
651 | { |
652 | CaseWhen *when = lfirst_node(CaseWhen, lc); |
653 | |
654 | /* |
655 | * The condition expressions mustn't affect |
656 | * the CASE's result collation either; but |
657 | * since they are known to yield boolean, it's |
658 | * safe to recurse directly on them --- they |
659 | * won't change loccontext. |
660 | */ |
661 | (void) assign_collations_walker((Node *) when->expr, |
662 | &loccontext); |
663 | (void) assign_collations_walker((Node *) when->result, |
664 | &loccontext); |
665 | } |
666 | (void) assign_collations_walker((Node *) expr->defresult, |
667 | &loccontext); |
668 | } |
669 | break; |
670 | default: |
671 | |
672 | /* |
673 | * Normal case: all child expressions contribute |
674 | * equally to loccontext. |
675 | */ |
676 | (void) expression_tree_walker(node, |
677 | assign_collations_walker, |
678 | (void *) &loccontext); |
679 | break; |
680 | } |
681 | |
682 | /* |
683 | * Now figure out what collation to assign to this node. |
684 | */ |
685 | typcollation = get_typcollation(exprType(node)); |
686 | if (OidIsValid(typcollation)) |
687 | { |
688 | /* Node's result is collatable; what about its input? */ |
689 | if (loccontext.strength > COLLATE_NONE) |
690 | { |
691 | /* Collation state bubbles up from children. */ |
692 | collation = loccontext.collation; |
693 | strength = loccontext.strength; |
694 | location = loccontext.location; |
695 | } |
696 | else |
697 | { |
698 | /* |
699 | * Collatable output produced without any collatable |
700 | * input. Use the type's collation (which is usually |
701 | * DEFAULT_COLLATION_OID, but might be different for a |
702 | * domain). |
703 | */ |
704 | collation = typcollation; |
705 | strength = COLLATE_IMPLICIT; |
706 | location = exprLocation(node); |
707 | } |
708 | } |
709 | else |
710 | { |
711 | /* Node's result type isn't collatable. */ |
712 | collation = InvalidOid; |
713 | strength = COLLATE_NONE; |
714 | location = -1; /* won't be used */ |
715 | } |
716 | |
717 | /* |
718 | * Save the result collation into the expression node. If the |
719 | * state is COLLATE_CONFLICT, we'll set the collation to |
720 | * InvalidOid, which might result in an error at runtime. |
721 | */ |
722 | if (strength == COLLATE_CONFLICT) |
723 | exprSetCollation(node, InvalidOid); |
724 | else |
725 | exprSetCollation(node, collation); |
726 | |
727 | /* |
728 | * Likewise save the input collation, which is the one that |
729 | * any function called by this node should use. |
730 | */ |
731 | if (loccontext.strength == COLLATE_CONFLICT) |
732 | exprSetInputCollation(node, InvalidOid); |
733 | else |
734 | exprSetInputCollation(node, loccontext.collation); |
735 | } |
736 | break; |
737 | } |
738 | |
739 | /* |
740 | * Now, merge my information into my parent's state. |
741 | */ |
742 | merge_collation_state(collation, |
743 | strength, |
744 | location, |
745 | loccontext.collation2, |
746 | loccontext.location2, |
747 | context); |
748 | |
749 | return false; |
750 | } |
751 | |
752 | /* |
753 | * Merge collation state of a subexpression into the context for its parent. |
754 | */ |
755 | static void |
756 | merge_collation_state(Oid collation, |
757 | CollateStrength strength, |
758 | int location, |
759 | Oid collation2, |
760 | int location2, |
761 | assign_collations_context *context) |
762 | { |
763 | /* |
764 | * If the collation strength for this node is different from what's |
765 | * already in *context, then this node either dominates or is dominated by |
766 | * earlier siblings. |
767 | */ |
768 | if (strength > context->strength) |
769 | { |
770 | /* Override previous parent state */ |
771 | context->collation = collation; |
772 | context->strength = strength; |
773 | context->location = location; |
774 | /* Bubble up error info if applicable */ |
775 | if (strength == COLLATE_CONFLICT) |
776 | { |
777 | context->collation2 = collation2; |
778 | context->location2 = location2; |
779 | } |
780 | } |
781 | else if (strength == context->strength) |
782 | { |
783 | /* Merge, or detect error if there's a collation conflict */ |
784 | switch (strength) |
785 | { |
786 | case COLLATE_NONE: |
787 | /* Nothing + nothing is still nothing */ |
788 | break; |
789 | case COLLATE_IMPLICIT: |
790 | if (collation != context->collation) |
791 | { |
792 | /* |
793 | * Non-default implicit collation always beats default. |
794 | */ |
795 | if (context->collation == DEFAULT_COLLATION_OID) |
796 | { |
797 | /* Override previous parent state */ |
798 | context->collation = collation; |
799 | context->strength = strength; |
800 | context->location = location; |
801 | } |
802 | else if (collation != DEFAULT_COLLATION_OID) |
803 | { |
804 | /* |
805 | * Oops, we have a conflict. We cannot throw error |
806 | * here, since the conflict could be resolved by a |
807 | * later sibling CollateExpr, or the parent might not |
808 | * care about collation anyway. Return enough info to |
809 | * throw the error later, if needed. |
810 | */ |
811 | context->strength = COLLATE_CONFLICT; |
812 | context->collation2 = collation; |
813 | context->location2 = location; |
814 | } |
815 | } |
816 | break; |
817 | case COLLATE_CONFLICT: |
818 | /* We're still conflicted ... */ |
819 | break; |
820 | case COLLATE_EXPLICIT: |
821 | if (collation != context->collation) |
822 | { |
823 | /* |
824 | * Oops, we have a conflict of explicit COLLATE clauses. |
825 | * Here we choose to throw error immediately; that is what |
826 | * the SQL standard says to do, and there's no good reason |
827 | * to be less strict. |
828 | */ |
829 | ereport(ERROR, |
830 | (errcode(ERRCODE_COLLATION_MISMATCH), |
831 | errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"" , |
832 | get_collation_name(context->collation), |
833 | get_collation_name(collation)), |
834 | parser_errposition(context->pstate, location))); |
835 | } |
836 | break; |
837 | } |
838 | } |
839 | } |
840 | |
841 | /* |
842 | * Aggref is a special case because expressions used only for ordering |
843 | * shouldn't be taken to conflict with each other or with regular args, |
844 | * indeed shouldn't affect the aggregate's result collation at all. |
845 | * We handle this by applying assign_expr_collations() to them rather than |
846 | * passing down our loccontext. |
847 | * |
848 | * Note that we recurse to each TargetEntry, not directly to its contained |
849 | * expression, so that the case above for T_TargetEntry will complain if we |
850 | * can't resolve a collation for an ORDER BY item (whether or not it is also |
851 | * a normal aggregate arg). |
852 | * |
853 | * We need not recurse into the aggorder or aggdistinct lists, because those |
854 | * contain only SortGroupClause nodes which we need not process. |
855 | */ |
856 | static void |
857 | assign_aggregate_collations(Aggref *aggref, |
858 | assign_collations_context *loccontext) |
859 | { |
860 | ListCell *lc; |
861 | |
862 | /* Plain aggregates have no direct args */ |
863 | Assert(aggref->aggdirectargs == NIL); |
864 | |
865 | /* Process aggregated args, holding resjunk ones at arm's length */ |
866 | foreach(lc, aggref->args) |
867 | { |
868 | TargetEntry *tle = lfirst_node(TargetEntry, lc); |
869 | |
870 | if (tle->resjunk) |
871 | assign_expr_collations(loccontext->pstate, (Node *) tle); |
872 | else |
873 | (void) assign_collations_walker((Node *) tle, loccontext); |
874 | } |
875 | } |
876 | |
877 | /* |
878 | * For ordered-set aggregates, it's somewhat unclear how best to proceed. |
879 | * The spec-defined inverse distribution functions have only one sort column |
880 | * and don't return collatable types, but this is clearly too restrictive in |
881 | * the general case. Our solution is to consider that the aggregate's direct |
882 | * arguments contribute normally to determination of the aggregate's own |
883 | * collation, while aggregated arguments contribute only when the aggregate |
884 | * is designed to have exactly one aggregated argument (i.e., it has a single |
885 | * aggregated argument and is non-variadic). If it can have more than one |
886 | * aggregated argument, we process the aggregated arguments as independent |
887 | * sort columns. This avoids throwing error for something like |
888 | * agg(...) within group (order by x collate "foo", y collate "bar") |
889 | * while also guaranteeing that variadic aggregates don't change in behavior |
890 | * depending on how many sort columns a particular call happens to have. |
891 | * |
892 | * Otherwise this is much like the plain-aggregate case. |
893 | */ |
894 | static void |
895 | assign_ordered_set_collations(Aggref *aggref, |
896 | assign_collations_context *loccontext) |
897 | { |
898 | bool merge_sort_collations; |
899 | ListCell *lc; |
900 | |
901 | /* Merge sort collations to parent only if there can be only one */ |
902 | merge_sort_collations = (list_length(aggref->args) == 1 && |
903 | get_func_variadictype(aggref->aggfnoid) == InvalidOid); |
904 | |
905 | /* Direct args, if any, are normal children of the Aggref node */ |
906 | (void) assign_collations_walker((Node *) aggref->aggdirectargs, |
907 | loccontext); |
908 | |
909 | /* Process aggregated args appropriately */ |
910 | foreach(lc, aggref->args) |
911 | { |
912 | TargetEntry *tle = lfirst_node(TargetEntry, lc); |
913 | |
914 | if (merge_sort_collations) |
915 | (void) assign_collations_walker((Node *) tle, loccontext); |
916 | else |
917 | assign_expr_collations(loccontext->pstate, (Node *) tle); |
918 | } |
919 | } |
920 | |
921 | /* |
922 | * Hypothetical-set aggregates are even more special: per spec, we need to |
923 | * unify the collations of each pair of hypothetical and aggregated args. |
924 | * And we need to force the choice of collation down into the sort column |
925 | * to ensure that the sort happens with the chosen collation. Other than |
926 | * that, the behavior is like regular ordered-set aggregates. Note that |
927 | * hypothetical direct arguments contribute to the aggregate collation |
928 | * only when their partner aggregated arguments do. |
929 | */ |
930 | static void |
931 | assign_hypothetical_collations(Aggref *aggref, |
932 | assign_collations_context *loccontext) |
933 | { |
934 | ListCell *h_cell = list_head(aggref->aggdirectargs); |
935 | ListCell *s_cell = list_head(aggref->args); |
936 | bool merge_sort_collations; |
937 | int ; |
938 | |
939 | /* Merge sort collations to parent only if there can be only one */ |
940 | merge_sort_collations = (list_length(aggref->args) == 1 && |
941 | get_func_variadictype(aggref->aggfnoid) == InvalidOid); |
942 | |
943 | /* Process any non-hypothetical direct args */ |
944 | extra_args = list_length(aggref->aggdirectargs) - list_length(aggref->args); |
945 | Assert(extra_args >= 0); |
946 | while (extra_args-- > 0) |
947 | { |
948 | (void) assign_collations_walker((Node *) lfirst(h_cell), loccontext); |
949 | h_cell = lnext(h_cell); |
950 | } |
951 | |
952 | /* Scan hypothetical args and aggregated args in parallel */ |
953 | while (h_cell && s_cell) |
954 | { |
955 | Node *h_arg = (Node *) lfirst(h_cell); |
956 | TargetEntry *s_tle = (TargetEntry *) lfirst(s_cell); |
957 | assign_collations_context paircontext; |
958 | |
959 | /* |
960 | * Assign collations internally in this pair of expressions, then |
961 | * choose a common collation for them. This should match |
962 | * select_common_collation(), but we can't use that function as-is |
963 | * because we need access to the whole collation state so we can |
964 | * bubble it up to the aggregate function's level. |
965 | */ |
966 | paircontext.pstate = loccontext->pstate; |
967 | paircontext.collation = InvalidOid; |
968 | paircontext.strength = COLLATE_NONE; |
969 | paircontext.location = -1; |
970 | /* Set these fields just to suppress uninitialized-value warnings: */ |
971 | paircontext.collation2 = InvalidOid; |
972 | paircontext.location2 = -1; |
973 | |
974 | (void) assign_collations_walker(h_arg, &paircontext); |
975 | (void) assign_collations_walker((Node *) s_tle->expr, &paircontext); |
976 | |
977 | /* deal with collation conflict */ |
978 | if (paircontext.strength == COLLATE_CONFLICT) |
979 | ereport(ERROR, |
980 | (errcode(ERRCODE_COLLATION_MISMATCH), |
981 | errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"" , |
982 | get_collation_name(paircontext.collation), |
983 | get_collation_name(paircontext.collation2)), |
984 | errhint("You can choose the collation by applying the COLLATE clause to one or both expressions." ), |
985 | parser_errposition(paircontext.pstate, |
986 | paircontext.location2))); |
987 | |
988 | /* |
989 | * At this point paircontext.collation can be InvalidOid only if the |
990 | * type is not collatable; no need to do anything in that case. If we |
991 | * do have to change the sort column's collation, do it by inserting a |
992 | * RelabelType node into the sort column TLE. |
993 | * |
994 | * XXX This is pretty grotty for a couple of reasons: |
995 | * assign_collations_walker isn't supposed to be changing the |
996 | * expression structure like this, and a parse-time change of |
997 | * collation ought to be signaled by a CollateExpr not a RelabelType |
998 | * (the use of RelabelType for collation marking is supposed to be a |
999 | * planner/executor thing only). But we have no better alternative. |
1000 | * In particular, injecting a CollateExpr could result in the |
1001 | * expression being interpreted differently after dump/reload, since |
1002 | * we might be effectively promoting an implicit collation to |
1003 | * explicit. This kluge is relying on ruleutils.c not printing a |
1004 | * COLLATE clause for a RelabelType, and probably on some other |
1005 | * fragile behaviors. |
1006 | */ |
1007 | if (OidIsValid(paircontext.collation) && |
1008 | paircontext.collation != exprCollation((Node *) s_tle->expr)) |
1009 | { |
1010 | s_tle->expr = (Expr *) |
1011 | makeRelabelType(s_tle->expr, |
1012 | exprType((Node *) s_tle->expr), |
1013 | exprTypmod((Node *) s_tle->expr), |
1014 | paircontext.collation, |
1015 | COERCE_IMPLICIT_CAST); |
1016 | } |
1017 | |
1018 | /* |
1019 | * If appropriate, merge this column's collation state up to the |
1020 | * aggregate function. |
1021 | */ |
1022 | if (merge_sort_collations) |
1023 | merge_collation_state(paircontext.collation, |
1024 | paircontext.strength, |
1025 | paircontext.location, |
1026 | paircontext.collation2, |
1027 | paircontext.location2, |
1028 | loccontext); |
1029 | |
1030 | h_cell = lnext(h_cell); |
1031 | s_cell = lnext(s_cell); |
1032 | } |
1033 | Assert(h_cell == NULL && s_cell == NULL); |
1034 | } |
1035 | |