parse_collate.c source code [PostgreSQL/src/backend/parser/parse_collate.c]

1	/-------------------------------------------------------------------------*
2	*
3	* parse_collate.c
4	* Routines for assigning collation information.
5	*
6	* We choose to handle collation analysis in a post-pass over the output
7	* of expression parse analysis. This is because we need more state to
8	* perform this processing than is needed in the finished tree. If we
9	* did it on-the-fly while building the tree, all that state would have
10	* to be kept in expression node trees permanently. This way, the extra
11	* storage is just local variables in this recursive routine.
12	*
13	* The info that is actually saved in the finished tree is:
14	* 1. The output collation of each expression node, or InvalidOid if it
15	* returns a noncollatable data type. This can also be InvalidOid if the
16	* result type is collatable but the collation is indeterminate.
17	* 2. The collation to be used in executing each function. InvalidOid means
18	* that there are no collatable inputs or their collation is indeterminate.
19	* This value is only stored in node types that might call collation-using
20	* functions.
21	*
22	* You might think we could get away with storing only one collation per
23	* node, but the two concepts really need to be kept distinct. Otherwise
24	* it's too confusing when a function produces a collatable output type but
25	* has no collatable inputs or produces noncollatable output from collatable
26	* inputs.
27	*
28	* Cases with indeterminate collation might result in an error being thrown
29	* at runtime. If we knew exactly which functions require collation
30	* information, we could throw those errors at parse time instead.
31	*
32	* Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group
33	* Portions Copyright (c) 1994, Regents of the University of California
34	*
35	*
36	* IDENTIFICATION
37	* src/backend/parser/parse_collate.c
38	*
39	*-------------------------------------------------------------------------
40	*/
41	#include "postgres.h"
42
43	#include "catalog/pg_aggregate.h"
44	#include "catalog/pg_collation.h"
45	#include "nodes/makefuncs.h"
46	#include "nodes/nodeFuncs.h"
47	#include "parser/parse_collate.h"
48	#include "utils/lsyscache.h"
49
50
51	/*
52	* Collation strength (the SQL standard calls this "derivation"). Order is
53	* chosen to allow comparisons to work usefully. Note: the standard doesn't
54	* seem to distinguish between NONE and CONFLICT.
55	*/
56	typedef enum
57	{
58	COLLATE_NONE, / expression is of a noncollatable datatype /
59	COLLATE_IMPLICIT, / collation was derived implicitly /
60	COLLATE_CONFLICT, / we had a conflict of implicit collations /
61	COLLATE_EXPLICIT / collation was derived explicitly /
62	} CollateStrength;
63
64	typedef struct
65	{
66	ParseState pstate; /* parse state (for error reporting) /
67	Oid collation; / OID of current collation, if any /
68	CollateStrength strength; / strength of current collation choice /
69	int location; / location of expr that set collation /
70	/ Remaining fields are only valid when strength == COLLATE_CONFLICT /
71	Oid collation2; / OID of conflicting collation /
72	int location2; / location of expr that set collation2 /
73	} assign_collations_context;
74
75	static bool assign_query_collations_walker(Node node, ParseState pstate);
76	static bool assign_collations_walker(Node *node,
77	assign_collations_context *context);
78	static void merge_collation_state(Oid collation,
79	CollateStrength strength,
80	int location,
81	Oid collation2,
82	int location2,
83	assign_collations_context *context);
84	static void assign_aggregate_collations(Aggref *aggref,
85	assign_collations_context *loccontext);
86	static void assign_ordered_set_collations(Aggref *aggref,
87	assign_collations_context *loccontext);
88	static void assign_hypothetical_collations(Aggref *aggref,
89	assign_collations_context *loccontext);
90
91
92	/*
93	* assign_query_collations()
94	* Mark all expressions in the given Query with collation information.
95	*
96	* This should be applied to each Query after completion of parse analysis
97	* for expressions. Note that we do not recurse into sub-Queries, since
98	* those should have been processed when built.
99	*/
100	void
101	assign_query_collations(ParseState pstate, Query query)
102	{
103	/*
104	* We just use query_tree_walker() to visit all the contained expressions.
105	* We can skip the rangetable and CTE subqueries, though, since RTEs and
106	* subqueries had better have been processed already (else Vars referring
107	* to them would not get created with the right collation).
108	*/
109	(void) query_tree_walker(query,
110	assign_query_collations_walker,
111	(void *) pstate,
112	QTW_IGNORE_RANGE_TABLE \|
113	QTW_IGNORE_CTE_SUBQUERIES);
114	}
115
116	/*
117	* Walker for assign_query_collations
118	*
119	* Each expression found by query_tree_walker is processed independently.
120	* Note that query_tree_walker may pass us a whole List, such as the
121	* targetlist, in which case each subexpression must be processed
122	* independently --- we don't want to bleat if two different targetentries
123	* have different collations.
124	*/
125	static bool
126	assign_query_collations_walker(Node node, ParseState pstate)
127	{
128	/ Need do nothing for empty subexpressions /
129	if (node == NULL)
130	return false;
131
132	/*
133	* We don't want to recurse into a set-operations tree; it's already been
134	* fully processed in transformSetOperationStmt.
135	*/
136	if (IsA(node, SetOperationStmt))
137	return false;
138
139	if (IsA(node, List))
140	assign_list_collations(pstate, (List *) node);
141	else
142	assign_expr_collations(pstate, node);
143
144	return false;
145	}
146
147	/*
148	* assign_list_collations()
149	* Mark all nodes in the list of expressions with collation information.
150	*
151	* The list member expressions are processed independently; they do not have
152	* to share a common collation.
153	*/
154	void
155	assign_list_collations(ParseState pstate, List exprs)
156	{
157	ListCell *lc;
158
159	foreach(lc, exprs)
160	{
161	Node node = (Node ) lfirst(lc);
162
163	assign_expr_collations(pstate, node);
164	}
165	}
166
167	/*
168	* assign_expr_collations()
169	* Mark all nodes in the given expression tree with collation information.
170	*
171	* This is exported for the benefit of various utility commands that process
172	* expressions without building a complete Query. It should be applied after
173	* calling transformExpr() plus any expression-modifying operations such as
174	* coerce_to_boolean().
175	*/
176	void
177	assign_expr_collations(ParseState pstate, Node expr)
178	{
179	assign_collations_context context;
180
181	/ initialize context for tree walk /
182	context.pstate = pstate;
183	context.collation = InvalidOid;
184	context.strength = COLLATE_NONE;
185	context.location = -`1`;
186
187	/ and away we go /
188	(void) assign_collations_walker(expr, &context);
189	}
190
191	/*
192	* select_common_collation()
193	* Identify a common collation for a list of expressions.
194	*
195	* The expressions should all return the same datatype, else this is not
196	* terribly meaningful.
197	*
198	* none_ok means that it is permitted to return InvalidOid, indicating that
199	* no common collation could be identified, even for collatable datatypes.
200	* Otherwise, an error is thrown for conflict of implicit collations.
201	*
202	* In theory, none_ok = true reflects the rules of SQL standard clause "Result
203	* of data type combinations", none_ok = false reflects the rules of clause
204	* "Collation determination" (in some cases invoked via "Grouping
205	* operations").
206	*/
207	Oid
208	select_common_collation(ParseState pstate, List exprs, bool none_ok)
209	{
210	assign_collations_context context;
211
212	/ initialize context for tree walk /
213	context.pstate = pstate;
214	context.collation = InvalidOid;
215	context.strength = COLLATE_NONE;
216	context.location = -`1`;
217
218	/ and away we go /
219	(void) assign_collations_walker((Node *) exprs, &context);
220
221	/ deal with collation conflict /
222	if (context.strength == COLLATE_CONFLICT)
223	{
224	if (none_ok)
225	return InvalidOid;
226	ereport(ERROR,
227	(errcode(ERRCODE_COLLATION_MISMATCH),
228	errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
229	get_collation_name(context.collation),
230	get_collation_name(context.collation2)),
231	errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
232	parser_errposition(context.pstate, context.location2)));
233	}
234
235	/*
236	* Note: if strength is still COLLATE_NONE, we'll return InvalidOid, but
237	* that's okay because it must mean none of the expressions returned
238	* collatable datatypes.
239	*/
240	return context.collation;
241	}
242
243	/*
244	* assign_collations_walker()
245	* Recursive guts of collation processing.
246	*
247	* Nodes with no children (eg, Vars, Consts, Params) must have been marked
248	* when built. All upper-level nodes are marked here.
249	*
250	* Note: if this is invoked directly on a List, it will attempt to infer a
251	* common collation for all the list members. In particular, it will throw
252	* error if there are conflicting explicit collations for different members.
253	*/
254	static bool
255	assign_collations_walker(Node node, assign_collations_context context)
256	{
257	assign_collations_context loccontext;
258	Oid collation;
259	CollateStrength strength;
260	int location;
261
262	/ Need do nothing for empty subexpressions /
263	if (node == NULL)
264	return false;
265
266	/*
267	* Prepare for recursion. For most node types, though not all, the first
268	* thing we do is recurse to process all nodes below this one. Each level
269	* of the tree has its own local context.
270	*/
271	loccontext.pstate = context->pstate;
272	loccontext.collation = InvalidOid;
273	loccontext.strength = COLLATE_NONE;
274	loccontext.location = -`1`;
275	/ Set these fields just to suppress uninitialized-value warnings: /
276	loccontext.collation2 = InvalidOid;
277	loccontext.location2 = -`1`;
278
279	/*
280	* Recurse if appropriate, then determine the collation for this node.
281	*
282	* Note: the general cases are at the bottom of the switch, after various
283	* special cases.
284	*/
285	switch (nodeTag(node))
286	{
287	case T_CollateExpr:
288	{
289	/*
290	* COLLATE sets an explicitly derived collation, regardless of
291	* what the child state is. But we must recurse to set up
292	* collation info below here.
293	*/
294	CollateExpr expr = (CollateExpr ) node;
295
296	(void) expression_tree_walker(node,
297	assign_collations_walker,
298	(void *) &loccontext);
299
300	collation = expr->collOid;
301	Assert(OidIsValid(collation));
302	strength = COLLATE_EXPLICIT;
303	location = expr->location;
304	}
305	break;
306	case T_FieldSelect:
307	{
308	/*
309	* For FieldSelect, the result has the field's declared
310	* collation, independently of what happened in the arguments.
311	* (The immediate argument must be composite and thus not
312	* collatable, anyhow.) The field's collation was already
313	* looked up and saved in the node.
314	*/
315	FieldSelect expr = (FieldSelect ) node;
316
317	/ ... but first, recurse /
318	(void) expression_tree_walker(node,
319	assign_collations_walker,
320	(void *) &loccontext);
321
322	if (OidIsValid(expr->resultcollid))
323	{
324	/ Node's result type is collatable. /
325	/ Pass up field's collation as an implicit choice. /
326	collation = expr->resultcollid;
327	strength = COLLATE_IMPLICIT;
328	location = exprLocation(node);
329	}
330	else
331	{
332	/ Node's result type isn't collatable. /
333	collation = InvalidOid;
334	strength = COLLATE_NONE;
335	location = -`1`; / won't be used /
336	}
337	}
338	break;
339	case T_RowExpr:
340	{
341	/*
342	* RowExpr is a special case because the subexpressions are
343	* independent: we don't want to complain if some of them have
344	* incompatible explicit collations.
345	*/
346	RowExpr expr = (RowExpr ) node;
347
348	assign_list_collations(context->pstate, expr->args);
349
350	/*
351	* Since the result is always composite and therefore never
352	* has a collation, we can just stop here: this node has no
353	* impact on the collation of its parent.
354	*/
355	return false; / done /
356	}
357	case T_RowCompareExpr:
358	{
359	/*
360	* For RowCompare, we have to find the common collation of
361	* each pair of input columns and build a list. If we can't
362	* find a common collation, we just put InvalidOid into the
363	* list, which may or may not cause an error at runtime.
364	*/
365	RowCompareExpr expr = (RowCompareExpr ) node;
366	List *colls = NIL;
367	ListCell *l;
368	ListCell *r;
369
370	forboth(l, expr->largs, r, expr->rargs)
371	{
372	Node le = (Node ) lfirst(l);
373	Node re = (Node ) lfirst(r);
374	Oid coll;
375
376	coll = select_common_collation(context->pstate,
377	list_make2(le, re),
378	true);
379	colls = lappend_oid(colls, coll);
380	}
381	expr->inputcollids = colls;
382
383	/*
384	* Since the result is always boolean and therefore never has
385	* a collation, we can just stop here: this node has no impact
386	* on the collation of its parent.
387	*/
388	return false; / done /
389	}
390	case T_CoerceToDomain:
391	{
392	/*
393	* If the domain declaration included a non-default COLLATE
394	* spec, then use that collation as the output collation of
395	* the coercion. Otherwise allow the input collation to
396	* bubble up. (The input should be of the domain's base type,
397	* therefore we don't need to worry about it not being
398	* collatable when the domain is.)
399	*/
400	CoerceToDomain expr = (CoerceToDomain ) node;
401	Oid typcollation = get_typcollation(expr->resulttype);
402
403	/ ... but first, recurse /
404	(void) expression_tree_walker(node,
405	assign_collations_walker,
406	(void *) &loccontext);
407
408	if (OidIsValid(typcollation))
409	{
410	/ Node's result type is collatable. /
411	if (typcollation == DEFAULT_COLLATION_OID)
412	{
413	/ Collation state bubbles up from child. /
414	collation = loccontext.collation;
415	strength = loccontext.strength;
416	location = loccontext.location;
417	}
418	else
419	{
420	/ Use domain's collation as an implicit choice. /
421	collation = typcollation;
422	strength = COLLATE_IMPLICIT;
423	location = exprLocation(node);
424	}
425	}
426	else
427	{
428	/ Node's result type isn't collatable. /
429	collation = InvalidOid;
430	strength = COLLATE_NONE;
431	location = -`1`; / won't be used /
432	}
433
434	/*
435	* Save the state into the expression node. We know it
436	* doesn't care about input collation.
437	*/
438	if (strength == COLLATE_CONFLICT)
439	exprSetCollation(node, InvalidOid);
440	else
441	exprSetCollation(node, collation);
442	}
443	break;
444	case T_TargetEntry:
445	(void) expression_tree_walker(node,
446	assign_collations_walker,
447	(void *) &loccontext);
448
449	/*
450	* TargetEntry can have only one child, and should bubble that
451	* state up to its parent. We can't use the general-case code
452	* below because exprType and friends don't work on TargetEntry.
453	*/
454	collation = loccontext.collation;
455	strength = loccontext.strength;
456	location = loccontext.location;
457
458	/*
459	* Throw error if the collation is indeterminate for a TargetEntry
460	* that is a sort/group target. We prefer to do this now, instead
461	* of leaving the comparison functions to fail at runtime, because
462	* we can give a syntax error pointer to help locate the problem.
463	* There are some cases where there might not be a failure, for
464	* example if the planner chooses to use hash aggregation instead
465	* of sorting for grouping; but it seems better to predictably
466	* throw an error. (Compare transformSetOperationTree, which will
467	* throw error for indeterminate collation of set-op columns, even
468	* though the planner might be able to implement the set-op
469	* without sorting.)
470	*/
471	if (strength == COLLATE_CONFLICT &&
472	((TargetEntry *) node)->ressortgroupref != `0`)
473	ereport(ERROR,
474	(errcode(ERRCODE_COLLATION_MISMATCH),
475	errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
476	get_collation_name(loccontext.collation),
477	get_collation_name(loccontext.collation2)),
478	errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
479	parser_errposition(context->pstate,
480	loccontext.location2)));
481	break;
482	case T_InferenceElem:
483	case T_RangeTblRef:
484	case T_JoinExpr:
485	case T_FromExpr:
486	case T_OnConflictExpr:
487	case T_SortGroupClause:
488	(void) expression_tree_walker(node,
489	assign_collations_walker,
490	(void *) &loccontext);
491
492	/*
493	* When we're invoked on a query's jointree, we don't need to do
494	* anything with join nodes except recurse through them to process
495	* WHERE/ON expressions. So just stop here. Likewise, we don't
496	* need to do anything when invoked on sort/group lists.
497	*/
498	return false;
499	case T_Query:
500	{
501	/*
502	* We get here when we're invoked on the Query belonging to a
503	* SubLink. Act as though the Query returns its first output
504	* column, which indeed is what it does for EXPR_SUBLINK and
505	* ARRAY_SUBLINK cases. In the cases where the SubLink
506	* returns boolean, this info will be ignored. Special case:
507	* in EXISTS, the Query might return no columns, in which case
508	* we need do nothing.
509	*
510	* We needn't recurse, since the Query is already processed.
511	*/
512	Query qtree = (Query ) node;
513	TargetEntry *tent;
514
515	if (qtree->targetList == NIL)
516	return false;
517	tent = linitial_node(TargetEntry, qtree->targetList);
518	if (tent->resjunk)
519	return false;
520
521	collation = exprCollation((Node *) tent->expr);
522	/ collation doesn't change if it's converted to array /
523	strength = COLLATE_IMPLICIT;
524	location = exprLocation((Node *) tent->expr);
525	}
526	break;
527	case T_List:
528	(void) expression_tree_walker(node,
529	assign_collations_walker,
530	(void *) &loccontext);
531
532	/*
533	* When processing a list, collation state just bubbles up from
534	* the list elements.
535	*/
536	collation = loccontext.collation;
537	strength = loccontext.strength;
538	location = loccontext.location;
539	break;
540
541	case T_Var:
542	case T_Const:
543	case T_Param:
544	case T_CoerceToDomainValue:
545	case T_CaseTestExpr:
546	case T_SetToDefault:
547	case T_CurrentOfExpr:
548
549	/*
550	* General case for childless expression nodes. These should
551	* already have a collation assigned; it is not this function's
552	* responsibility to look into the catalogs for base-case
553	* information.
554	*/
555	collation = exprCollation(node);
556
557	/*
558	* Note: in most cases, there will be an assigned collation
559	* whenever type_is_collatable(exprType(node)); but an exception
560	* occurs for a Var referencing a subquery output column for which
561	* a unique collation was not determinable. That may lead to a
562	* runtime failure if a collation-sensitive function is applied to
563	* the Var.
564	*/
565
566	if (OidIsValid(collation))
567	strength = COLLATE_IMPLICIT;
568	else
569	strength = COLLATE_NONE;
570	location = exprLocation(node);
571	break;
572
573	default:
574	{
575	/*
576	* General case for most expression nodes with children. First
577	* recurse, then figure out what to assign to this node.
578	*/
579	Oid typcollation;
580
581	/*
582	* For most node types, we want to treat all the child
583	* expressions alike; but there are a few exceptions, hence
584	* this inner switch.
585	*/
586	switch (nodeTag(node))
587	{
588	case T_Aggref:
589	{
590	/*
591	* Aggref is messy enough that we give it its own
592	* function, in fact three of them. The FILTER
593	* clause is independent of the rest of the
594	* aggregate, however, so it can be processed
595	* separately.
596	*/
597	Aggref aggref = (Aggref ) node;
598
599	switch (aggref->aggkind)
600	{
601	case AGGKIND_NORMAL:
602	assign_aggregate_collations(aggref,
603	&loccontext);
604	break;
605	case AGGKIND_ORDERED_SET:
606	assign_ordered_set_collations(aggref,
607	&loccontext);
608	break;
609	case AGGKIND_HYPOTHETICAL:
610	assign_hypothetical_collations(aggref,
611	&loccontext);
612	break;
613	default:
614	elog(ERROR, "unrecognized aggkind: %d",
615	(int) aggref->aggkind);
616	}
617
618	assign_expr_collations(context->pstate,
619	(Node *) aggref->aggfilter);
620	}
621	break;
622	case T_WindowFunc:
623	{
624	/*
625	* WindowFunc requires special processing only for
626	* its aggfilter clause, as for aggregates.
627	*/
628	WindowFunc wfunc = (WindowFunc ) node;
629
630	(void) assign_collations_walker((Node *) wfunc->args,
631	&loccontext);
632
633	assign_expr_collations(context->pstate,
634	(Node *) wfunc->aggfilter);
635	}
636	break;
637	case T_CaseExpr:
638	{
639	/*
640	* CaseExpr is a special case because we do not
641	* want to recurse into the test expression (if
642	* any). It was already marked with collations
643	* during transformCaseExpr, and furthermore its
644	* collation is not relevant to the result of the
645	* CASE --- only the output expressions are.
646	*/
647	CaseExpr expr = (CaseExpr ) node;
648	ListCell *lc;
649
650	foreach(lc, expr->args)
651	{
652	CaseWhen *when = lfirst_node(CaseWhen, lc);
653
654	/*
655	* The condition expressions mustn't affect
656	* the CASE's result collation either; but
657	* since they are known to yield boolean, it's
658	* safe to recurse directly on them --- they
659	* won't change loccontext.
660	*/
661	(void) assign_collations_walker((Node *) when->expr,
662	&loccontext);
663	(void) assign_collations_walker((Node *) when->result,
664	&loccontext);
665	}
666	(void) assign_collations_walker((Node *) expr->defresult,
667	&loccontext);
668	}
669	break;
670	default:
671
672	/*
673	* Normal case: all child expressions contribute
674	* equally to loccontext.
675	*/
676	(void) expression_tree_walker(node,
677	assign_collations_walker,
678	(void *) &loccontext);
679	break;
680	}
681
682	/*
683	* Now figure out what collation to assign to this node.
684	*/
685	typcollation = get_typcollation(exprType(node));
686	if (OidIsValid(typcollation))
687	{
688	/ Node's result is collatable; what about its input? /
689	if (loccontext.strength > COLLATE_NONE)
690	{
691	/ Collation state bubbles up from children. /
692	collation = loccontext.collation;
693	strength = loccontext.strength;
694	location = loccontext.location;
695	}
696	else
697	{
698	/*
699	* Collatable output produced without any collatable
700	* input. Use the type's collation (which is usually
701	* DEFAULT_COLLATION_OID, but might be different for a
702	* domain).
703	*/
704	collation = typcollation;
705	strength = COLLATE_IMPLICIT;
706	location = exprLocation(node);
707	}
708	}
709	else
710	{
711	/ Node's result type isn't collatable. /
712	collation = InvalidOid;
713	strength = COLLATE_NONE;
714	location = -`1`; / won't be used /
715	}
716
717	/*
718	* Save the result collation into the expression node. If the
719	* state is COLLATE_CONFLICT, we'll set the collation to
720	* InvalidOid, which might result in an error at runtime.
721	*/
722	if (strength == COLLATE_CONFLICT)
723	exprSetCollation(node, InvalidOid);
724	else
725	exprSetCollation(node, collation);
726
727	/*
728	* Likewise save the input collation, which is the one that
729	* any function called by this node should use.
730	*/
731	if (loccontext.strength == COLLATE_CONFLICT)
732	exprSetInputCollation(node, InvalidOid);
733	else
734	exprSetInputCollation(node, loccontext.collation);
735	}
736	break;
737	}
738
739	/*
740	* Now, merge my information into my parent's state.
741	*/
742	merge_collation_state(collation,
743	strength,
744	location,
745	loccontext.collation2,
746	loccontext.location2,
747	context);
748
749	return false;
750	}
751
752	/*
753	* Merge collation state of a subexpression into the context for its parent.
754	*/
755	static void
756	merge_collation_state(Oid collation,
757	CollateStrength strength,
758	int location,
759	Oid collation2,
760	int location2,
761	assign_collations_context *context)
762	{
763	/*
764	* If the collation strength for this node is different from what's
765	* already in *context, then this node either dominates or is dominated by
766	* earlier siblings.
767	*/
768	if (strength > context->strength)
769	{
770	/ Override previous parent state /
771	context->collation = collation;
772	context->strength = strength;
773	context->location = location;
774	/ Bubble up error info if applicable /
775	if (strength == COLLATE_CONFLICT)
776	{
777	context->collation2 = collation2;
778	context->location2 = location2;
779	}
780	}
781	else if (strength == context->strength)
782	{
783	/ Merge, or detect error if there's a collation conflict /
784	switch (strength)
785	{
786	case COLLATE_NONE:
787	/ Nothing + nothing is still nothing /
788	break;
789	case COLLATE_IMPLICIT:
790	if (collation != context->collation)
791	{
792	/*
793	* Non-default implicit collation always beats default.
794	*/
795	if (context->collation == DEFAULT_COLLATION_OID)
796	{
797	/ Override previous parent state /
798	context->collation = collation;
799	context->strength = strength;
800	context->location = location;
801	}
802	else if (collation != DEFAULT_COLLATION_OID)
803	{
804	/*
805	* Oops, we have a conflict. We cannot throw error
806	* here, since the conflict could be resolved by a
807	* later sibling CollateExpr, or the parent might not
808	* care about collation anyway. Return enough info to
809	* throw the error later, if needed.
810	*/
811	context->strength = COLLATE_CONFLICT;
812	context->collation2 = collation;
813	context->location2 = location;
814	}
815	}
816	break;
817	case COLLATE_CONFLICT:
818	/ We're still conflicted ... /
819	break;
820	case COLLATE_EXPLICIT:
821	if (collation != context->collation)
822	{
823	/*
824	* Oops, we have a conflict of explicit COLLATE clauses.
825	* Here we choose to throw error immediately; that is what
826	* the SQL standard says to do, and there's no good reason
827	* to be less strict.
828	*/
829	ereport(ERROR,
830	(errcode(ERRCODE_COLLATION_MISMATCH),
831	errmsg("collation mismatch between explicit collations \"%s\" and \"%s\"",
832	get_collation_name(context->collation),
833	get_collation_name(collation)),
834	parser_errposition(context->pstate, location)));
835	}
836	break;
837	}
838	}
839	}
840
841	/*
842	* Aggref is a special case because expressions used only for ordering
843	* shouldn't be taken to conflict with each other or with regular args,
844	* indeed shouldn't affect the aggregate's result collation at all.
845	* We handle this by applying assign_expr_collations() to them rather than
846	* passing down our loccontext.
847	*
848	* Note that we recurse to each TargetEntry, not directly to its contained
849	* expression, so that the case above for T_TargetEntry will complain if we
850	* can't resolve a collation for an ORDER BY item (whether or not it is also
851	* a normal aggregate arg).
852	*
853	* We need not recurse into the aggorder or aggdistinct lists, because those
854	* contain only SortGroupClause nodes which we need not process.
855	*/
856	static void
857	assign_aggregate_collations(Aggref *aggref,
858	assign_collations_context *loccontext)
859	{
860	ListCell *lc;
861
862	/ Plain aggregates have no direct args /
863	Assert(aggref->aggdirectargs == NIL);
864
865	/ Process aggregated args, holding resjunk ones at arm's length /
866	foreach(lc, aggref->args)
867	{
868	TargetEntry *tle = lfirst_node(TargetEntry, lc);
869
870	if (tle->resjunk)
871	assign_expr_collations(loccontext->pstate, (Node *) tle);
872	else
873	(void) assign_collations_walker((Node *) tle, loccontext);
874	}
875	}
876
877	/*
878	* For ordered-set aggregates, it's somewhat unclear how best to proceed.
879	* The spec-defined inverse distribution functions have only one sort column
880	* and don't return collatable types, but this is clearly too restrictive in
881	* the general case. Our solution is to consider that the aggregate's direct
882	* arguments contribute normally to determination of the aggregate's own
883	* collation, while aggregated arguments contribute only when the aggregate
884	* is designed to have exactly one aggregated argument (i.e., it has a single
885	* aggregated argument and is non-variadic). If it can have more than one
886	* aggregated argument, we process the aggregated arguments as independent
887	* sort columns. This avoids throwing error for something like
888	* agg(...) within group (order by x collate "foo", y collate "bar")
889	* while also guaranteeing that variadic aggregates don't change in behavior
890	* depending on how many sort columns a particular call happens to have.
891	*
892	* Otherwise this is much like the plain-aggregate case.
893	*/
894	static void
895	assign_ordered_set_collations(Aggref *aggref,
896	assign_collations_context *loccontext)
897	{
898	bool merge_sort_collations;
899	ListCell *lc;
900
901	/ Merge sort collations to parent only if there can be only one /
902	merge_sort_collations = (list_length(aggref->args) == `1` &&
903	get_func_variadictype(aggref->aggfnoid) == InvalidOid);
904
905	/ Direct args, if any, are normal children of the Aggref node /
906	(void) assign_collations_walker((Node *) aggref->aggdirectargs,
907	loccontext);
908
909	/ Process aggregated args appropriately /
910	foreach(lc, aggref->args)
911	{
912	TargetEntry *tle = lfirst_node(TargetEntry, lc);
913
914	if (merge_sort_collations)
915	(void) assign_collations_walker((Node *) tle, loccontext);
916	else
917	assign_expr_collations(loccontext->pstate, (Node *) tle);
918	}
919	}
920
921	/*
922	* Hypothetical-set aggregates are even more special: per spec, we need to
923	* unify the collations of each pair of hypothetical and aggregated args.
924	* And we need to force the choice of collation down into the sort column
925	* to ensure that the sort happens with the chosen collation. Other than
926	* that, the behavior is like regular ordered-set aggregates. Note that
927	* hypothetical direct arguments contribute to the aggregate collation
928	* only when their partner aggregated arguments do.
929	*/
930	static void
931	assign_hypothetical_collations(Aggref *aggref,
932	assign_collations_context *loccontext)
933	{
934	ListCell *h_cell = list_head(aggref->aggdirectargs);
935	ListCell *s_cell = list_head(aggref->args);
936	bool merge_sort_collations;
937	int extra_args;
938
939	/ Merge sort collations to parent only if there can be only one /
940	merge_sort_collations = (list_length(aggref->args) == `1` &&
941	get_func_variadictype(aggref->aggfnoid) == InvalidOid);
942
943	/ Process any non-hypothetical direct args /
944	extra_args = list_length(aggref->aggdirectargs) - list_length(aggref->args);
945	Assert(extra_args >= `0`);
946	while (extra_args-- > `0`)
947	{
948	(void) assign_collations_walker((Node *) lfirst(h_cell), loccontext);
949	h_cell = lnext(h_cell);
950	}
951
952	/ Scan hypothetical args and aggregated args in parallel /
953	while (h_cell && s_cell)
954	{
955	Node h_arg = (Node ) lfirst(h_cell);
956	TargetEntry s_tle = (TargetEntry ) lfirst(s_cell);
957	assign_collations_context paircontext;
958
959	/*
960	* Assign collations internally in this pair of expressions, then
961	* choose a common collation for them. This should match
962	* select_common_collation(), but we can't use that function as-is
963	* because we need access to the whole collation state so we can
964	* bubble it up to the aggregate function's level.
965	*/
966	paircontext.pstate = loccontext->pstate;
967	paircontext.collation = InvalidOid;
968	paircontext.strength = COLLATE_NONE;
969	paircontext.location = -`1`;
970	/ Set these fields just to suppress uninitialized-value warnings: /
971	paircontext.collation2 = InvalidOid;
972	paircontext.location2 = -`1`;
973
974	(void) assign_collations_walker(h_arg, &paircontext);
975	(void) assign_collations_walker((Node *) s_tle->expr, &paircontext);
976
977	/ deal with collation conflict /
978	if (paircontext.strength == COLLATE_CONFLICT)
979	ereport(ERROR,
980	(errcode(ERRCODE_COLLATION_MISMATCH),
981	errmsg("collation mismatch between implicit collations \"%s\" and \"%s\"",
982	get_collation_name(paircontext.collation),
983	get_collation_name(paircontext.collation2)),
984	errhint("You can choose the collation by applying the COLLATE clause to one or both expressions."),
985	parser_errposition(paircontext.pstate,
986	paircontext.location2)));
987
988	/*
989	* At this point paircontext.collation can be InvalidOid only if the
990	* type is not collatable; no need to do anything in that case. If we
991	* do have to change the sort column's collation, do it by inserting a
992	* RelabelType node into the sort column TLE.
993	*
994	* XXX This is pretty grotty for a couple of reasons:
995	* assign_collations_walker isn't supposed to be changing the
996	* expression structure like this, and a parse-time change of
997	* collation ought to be signaled by a CollateExpr not a RelabelType
998	* (the use of RelabelType for collation marking is supposed to be a
999	* planner/executor thing only). But we have no better alternative.
1000	* In particular, injecting a CollateExpr could result in the
1001	* expression being interpreted differently after dump/reload, since
1002	* we might be effectively promoting an implicit collation to
1003	* explicit. This kluge is relying on ruleutils.c not printing a
1004	* COLLATE clause for a RelabelType, and probably on some other
1005	* fragile behaviors.
1006	*/
1007	if (OidIsValid(paircontext.collation) &&
1008	paircontext.collation != exprCollation((Node *) s_tle->expr))
1009	{
1010	s_tle->expr = (Expr *)
1011	makeRelabelType(s_tle->expr,
1012	exprType((Node *) s_tle->expr),
1013	exprTypmod((Node *) s_tle->expr),
1014	paircontext.collation,
1015	COERCE_IMPLICIT_CAST);
1016	}
1017
1018	/*
1019	* If appropriate, merge this column's collation state up to the
1020	* aggregate function.
1021	*/
1022	if (merge_sort_collations)
1023	merge_collation_state(paircontext.collation,
1024	paircontext.strength,
1025	paircontext.location,
1026	paircontext.collation2,
1027	paircontext.location2,
1028	loccontext);
1029
1030	h_cell = lnext(h_cell);
1031	s_cell = lnext(s_cell);
1032	}
1033	Assert(h_cell == NULL && s_cell == NULL);
1034	}
1035

Browse the source code of PostgreSQL/src/backend/parser/parse_collate.c