pg_locale.c source code [PostgreSQL/src/backend/utils/adt/pg_locale.c]

1	/-----------------------------------------------------------------------*
2	*
3	* PostgreSQL locale utilities
4	*
5	* Portions Copyright (c) 2002-2019, PostgreSQL Global Development Group
6	*
7	* src/backend/utils/adt/pg_locale.c
8	*
9	*-----------------------------------------------------------------------
10	*/
11
12	/----------*
13	* Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14	* are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15	* be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16	* toupper(), etc. are always in the same fixed locale.
17	*
18	* LC_MESSAGES is settable at run time and will take effect
19	* immediately.
20	*
21	* The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22	* settable at run-time. However, we don't actually set those locale
23	* categories permanently. This would have bizarre effects like no
24	* longer accepting standard floating-point literals in some locales.
25	* Instead, we only set these locale categories briefly when needed,
26	* cache the required information obtained from localeconv() or
27	* strftime(), and then set the locale categories back to "C".
28	* The cached information is only used by the formatting functions
29	* (to_char, etc.) and the money type. For the user, this should all be
30	* transparent.
31	*
32	* !!! NOW HEAR THIS !!!
33	*
34	* We've been bitten repeatedly by this bug, so let's try to keep it in
35	* mind in future: on some platforms, the locale functions return pointers
36	* to static data that will be overwritten by any later locale function.
37	* Thus, for example, the obvious-looking sequence
38	* save = setlocale(category, NULL);
39	* if (!setlocale(category, value))
40	* fail = true;
41	* setlocale(category, save);
42	* DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
43	* will change the memory save is pointing at. To do this sort of thing
44	* safely, you must pstrdup what setlocale returns the first time.
45	*
46	* The POSIX locale standard is available here:
47	*
48	* http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
49	*----------
50	*/
51
52
53	#include "postgres.h"
54
55	#include <time.h>
56
57	#include "access/htup_details.h"
58	#include "catalog/pg_collation.h"
59	#include "catalog/pg_control.h"
60	#include "mb/pg_wchar.h"
61	#include "utils/builtins.h"
62	#include "utils/formatting.h"
63	#include "utils/hsearch.h"
64	#include "utils/lsyscache.h"
65	#include "utils/memutils.h"
66	#include "utils/pg_locale.h"
67	#include "utils/syscache.h"
68
69	#ifdef USE_ICU
70	#include <unicode/ucnv.h>
71	#endif
72
73	#ifdef WIN32
74	/*
75	* This Windows file defines StrNCpy. We don't need it here, so we undefine
76	* it to keep the compiler quiet, and undefine it again after the file is
77	* included, so we don't accidentally use theirs.
78	*/
79	#undef StrNCpy
80	#include <shlwapi.h>
81	#ifdef StrNCpy
82	#undef STrNCpy
83	#endif
84	#endif
85
86	#define MAX_L10N_DATA 80
87
88
89	/ GUC settings /
90	char *locale_messages;
91	char *locale_monetary;
92	char *locale_numeric;
93	char *locale_time;
94
95	/ lc_time localization cache /
96	char *localized_abbrev_days[`7`];
97	char *localized_full_days[`7`];
98	char *localized_abbrev_months[`12`];
99	char *localized_full_months[`12`];
100
101	/ indicates whether locale information cache is valid /
102	static bool CurrentLocaleConvValid = false;
103	static bool CurrentLCTimeValid = false;
104
105	/ Environment variable storage area /
106
107	#define LC_ENV_BUFSIZE (NAMEDATALEN + 20)
108
109	static char lc_collate_envbuf[LC_ENV_BUFSIZE];
110	static char lc_ctype_envbuf[LC_ENV_BUFSIZE];
111
112	#ifdef LC_MESSAGES
113	static char lc_messages_envbuf[LC_ENV_BUFSIZE];
114	#endif
115	static char lc_monetary_envbuf[LC_ENV_BUFSIZE];
116	static char lc_numeric_envbuf[LC_ENV_BUFSIZE];
117	static char lc_time_envbuf[LC_ENV_BUFSIZE];
118
119	/ Cache for collation-related knowledge /
120
121	typedef struct
122	{
123	Oid collid; / hash key: pg_collation OID /
124	bool collate_is_c; / is collation's LC_COLLATE C? /
125	bool ctype_is_c; / is collation's LC_CTYPE C? /
126	bool flags_valid; / true if above flags are valid /
127	pg_locale_t locale; / locale_t struct, or 0 if not valid /
128	} collation_cache_entry;
129
130	static HTAB *collation_cache = NULL;
131
132
133	#if defined(WIN32) && defined(LC_MESSAGES)
134	static char IsoLocaleName(const* char ); /* MSVC specific /
135	#endif
136
137	#ifdef USE_ICU
138	static void icu_set_collation_attributes(UCollator collator, const* char *loc);
139	#endif
140
141	/*
142	* pg_perm_setlocale
143	*
144	* This wraps the libc function setlocale(), with two additions. First, when
145	* changing LC_CTYPE, update gettext's encoding for the current message
146	* domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
147	* not on Windows. Second, if the operation is successful, the corresponding
148	* LC_XXX environment variable is set to match. By setting the environment
149	* variable, we ensure that any subsequent use of setlocale(..., "") will
150	* preserve the settings made through this routine. Of course, LC_ALL must
151	* also be unset to fully ensure that, but that has to be done elsewhere after
152	* all the individual LC_XXX variables have been set correctly. (Thank you
153	* Perl for making this kluge necessary.)
154	*/
155	char *
156	pg_perm_setlocale(int category, const char *locale)
157	{
158	char *result;
159	const char *envvar;
160	char *envbuf;
161
162	#ifndef WIN32
163	result = setlocale(category, locale);
164	#else
165
166	/*
167	* On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
168	* the given value is good and set it in the environment variables. We
169	* must ignore attempts to set to "", which means "keep using the old
170	* environment value".
171	*/
172	#ifdef LC_MESSAGES
173	if (category == LC_MESSAGES)
174	{
175	result = (char *) locale;
176	if (locale == NULL \|\| locale[`0`] == `'\0'`)
177	return result;
178	}
179	else
180	#endif
181	result = setlocale(category, locale);
182	#endif /* WIN32 */
183
184	if (result == NULL)
185	return result; / fall out immediately on failure /
186
187	/*
188	* Use the right encoding in translated messages. Under ENABLE_NLS, let
189	* pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
190	* format strings are ASCII, but database-encoding strings may enter the
191	* message via %s. This makes the overall message encoding equal to the
192	* database encoding.
193	*/
194	if (category == LC_CTYPE)
195	{
196	static char save_lc_ctype[LC_ENV_BUFSIZE];
197
198	/ copy setlocale() return value before callee invokes it again /
199	strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
200	result = save_lc_ctype;
201
202	#ifdef ENABLE_NLS
203	SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
204	#else
205	SetMessageEncoding(GetDatabaseEncoding());
206	#endif
207	}
208
209	switch (category)
210	{
211	case LC_COLLATE:
212	envvar = "LC_COLLATE";
213	envbuf = lc_collate_envbuf;
214	break;
215	case LC_CTYPE:
216	envvar = "LC_CTYPE";
217	envbuf = lc_ctype_envbuf;
218	break;
219	#ifdef LC_MESSAGES
220	case LC_MESSAGES:
221	envvar = "LC_MESSAGES";
222	envbuf = lc_messages_envbuf;
223	#ifdef WIN32
224	result = IsoLocaleName(locale);
225	if (result == NULL)
226	result = (char *) locale;
227	#endif /* WIN32 */
228	break;
229	#endif /* LC_MESSAGES */
230	case LC_MONETARY:
231	envvar = "LC_MONETARY";
232	envbuf = lc_monetary_envbuf;
233	break;
234	case LC_NUMERIC:
235	envvar = "LC_NUMERIC";
236	envbuf = lc_numeric_envbuf;
237	break;
238	case LC_TIME:
239	envvar = "LC_TIME";
240	envbuf = lc_time_envbuf;
241	break;
242	default:
243	elog(FATAL, "unrecognized LC category: %d", category);
244	envvar = NULL; / keep compiler quiet /
245	envbuf = NULL;
246	return NULL;
247	}
248
249	snprintf(envbuf, LC_ENV_BUFSIZE - `1`, "%s=%s", envvar, result);
250
251	if (putenv(envbuf))
252	return NULL;
253
254	return result;
255	}
256
257
258	/*
259	* Is the locale name valid for the locale category?
260	*
261	* If successful, and canonname isn't NULL, a palloc'd copy of the locale's
262	* canonical name is stored there. This is especially useful for figuring out
263	* what locale name "" means (ie, the server environment value). (Actually,
264	* it seems that on most implementations that's the only thing it's good for;
265	* we could wish that setlocale gave back a canonically spelled version of
266	* the locale name, but typically it doesn't.)
267	*/
268	bool
269	check_locale(int category, const char locale, char* **canonname)
270	{
271	char *save;
272	char *res;
273
274	if (canonname)
275	canonname = NULL; /* in case of failure /
276
277	save = setlocale(category, NULL);
278	if (!save)
279	return false; / won't happen, we hope /
280
281	/ save may be pointing at a modifiable scratch variable, see above. /
282	save = pstrdup(save);
283
284	/ set the locale with setlocale, to see if it accepts it. /
285	res = setlocale(category, locale);
286
287	/ save canonical name if requested. /
288	if (res && canonname)
289	*canonname = pstrdup(res);
290
291	/ restore old value. /
292	if (!setlocale(category, save))
293	elog(WARNING, "failed to restore old locale \"%s\"", save);
294	pfree(save);
295
296	return (res != NULL);
297	}
298
299
300	/*
301	* GUC check/assign hooks
302	*
303	* For most locale categories, the assign hook doesn't actually set the locale
304	* permanently, just reset flags so that the next use will cache the
305	* appropriate values. (See explanation at the top of this file.)
306	*
307	* Note: we accept value = "" as selecting the postmaster's environment
308	* value, whatever it was (so long as the environment setting is legal).
309	* This will have been locked down by an earlier call to pg_perm_setlocale.
310	*/
311	bool
312	check_locale_monetary(char *newval, void* **extra, GucSource source)
313	{
314	return check_locale(LC_MONETARY, *newval, NULL);
315	}
316
317	void
318	assign_locale_monetary(const char newval, void* *extra)
319	{
320	CurrentLocaleConvValid = false;
321	}
322
323	bool
324	check_locale_numeric(char *newval, void* **extra, GucSource source)
325	{
326	return check_locale(LC_NUMERIC, *newval, NULL);
327	}
328
329	void
330	assign_locale_numeric(const char newval, void* *extra)
331	{
332	CurrentLocaleConvValid = false;
333	}
334
335	bool
336	check_locale_time(char *newval, void* **extra, GucSource source)
337	{
338	return check_locale(LC_TIME, *newval, NULL);
339	}
340
341	void
342	assign_locale_time(const char newval, void* *extra)
343	{
344	CurrentLCTimeValid = false;
345	}
346
347	/*
348	* We allow LC_MESSAGES to actually be set globally.
349	*
350	* Note: we normally disallow value = "" because it wouldn't have consistent
351	* semantics (it'd effectively just use the previous value). However, this
352	* is the value passed for PGC_S_DEFAULT, so don't complain in that case,
353	* not even if the attempted setting fails due to invalid environment value.
354	* The idea there is just to accept the environment setting if possible
355	* during startup, until we can read the proper value from postgresql.conf.
356	*/
357	bool
358	check_locale_messages(char *newval, void* **extra, GucSource source)
359	{
360	if (**newval == `'\0'`)
361	{
362	if (source == PGC_S_DEFAULT)
363	return true;
364	else
365	return false;
366	}
367
368	/*
369	* LC_MESSAGES category does not exist everywhere, but accept it anyway
370	*
371	* On Windows, we can't even check the value, so accept blindly
372	*/
373	#if defined(LC_MESSAGES) && !defined(WIN32)
374	return check_locale(LC_MESSAGES, *newval, NULL);
375	#else
376	return true;
377	#endif
378	}
379
380	void
381	assign_locale_messages(const char newval, void* *extra)
382	{
383	/*
384	* LC_MESSAGES category does not exist everywhere, but accept it anyway.
385	* We ignore failure, as per comment above.
386	*/
387	#ifdef LC_MESSAGES
388	(void) pg_perm_setlocale(LC_MESSAGES, newval);
389	#endif
390	}
391
392
393	/*
394	* Frees the malloced content of a struct lconv. (But not the struct
395	* itself.) It's important that this not throw elog(ERROR).
396	*/
397	static void
398	free_struct_lconv(struct lconv *s)
399	{
400	if (s->decimal_point)
401	free(s->decimal_point);
402	if (s->thousands_sep)
403	free(s->thousands_sep);
404	if (s->grouping)
405	free(s->grouping);
406	if (s->int_curr_symbol)
407	free(s->int_curr_symbol);
408	if (s->currency_symbol)
409	free(s->currency_symbol);
410	if (s->mon_decimal_point)
411	free(s->mon_decimal_point);
412	if (s->mon_thousands_sep)
413	free(s->mon_thousands_sep);
414	if (s->mon_grouping)
415	free(s->mon_grouping);
416	if (s->positive_sign)
417	free(s->positive_sign);
418	if (s->negative_sign)
419	free(s->negative_sign);
420	}
421
422	/*
423	* Check that all fields of a struct lconv (or at least, the ones we care
424	* about) are non-NULL. The field list must match free_struct_lconv().
425	*/
426	static bool
427	struct_lconv_is_valid(struct lconv *s)
428	{
429	if (s->decimal_point == NULL)
430	return false;
431	if (s->thousands_sep == NULL)
432	return false;
433	if (s->grouping == NULL)
434	return false;
435	if (s->int_curr_symbol == NULL)
436	return false;
437	if (s->currency_symbol == NULL)
438	return false;
439	if (s->mon_decimal_point == NULL)
440	return false;
441	if (s->mon_thousands_sep == NULL)
442	return false;
443	if (s->mon_grouping == NULL)
444	return false;
445	if (s->positive_sign == NULL)
446	return false;
447	if (s->negative_sign == NULL)
448	return false;
449	return true;
450	}
451
452
453	/*
454	* Convert the strdup'd string at *str from the specified encoding to the
455	* database encoding.
456	*/
457	static void
458	db_encoding_convert(int encoding, char **str)
459	{
460	char *pstr;
461	char *mstr;
462
463	/ convert the string to the database encoding /
464	pstr = pg_any_to_server(str, strlen(str), encoding);
465	if (pstr == *str)
466	return; / no conversion happened /
467
468	/ need it malloc'd not palloc'd /
469	mstr = strdup(pstr);
470	if (mstr == NULL)
471	ereport(ERROR,
472	(errcode(ERRCODE_OUT_OF_MEMORY),
473	errmsg("out of memory")));
474
475	/ replace old string /
476	free(*str);
477	*str = mstr;
478
479	pfree(pstr);
480	}
481
482
483	/*
484	* Return the POSIX lconv struct (contains number/money formatting
485	* information) with locale information for all categories.
486	*/
487	struct lconv *
488	PGLC_localeconv(void)
489	{
490	static struct lconv CurrentLocaleConv;
491	static bool CurrentLocaleConvAllocated = false;
492	struct lconv *extlconv;
493	struct lconv worklconv;
494	char *save_lc_monetary;
495	char *save_lc_numeric;
496	#ifdef WIN32
497	char *save_lc_ctype;
498	#endif
499
500	/ Did we do it already? /
501	if (CurrentLocaleConvValid)
502	return &CurrentLocaleConv;
503
504	/ Free any already-allocated storage /
505	if (CurrentLocaleConvAllocated)
506	{
507	free_struct_lconv(&CurrentLocaleConv);
508	CurrentLocaleConvAllocated = false;
509	}
510
511	/*
512	* This is tricky because we really don't want to risk throwing error
513	* while the locale is set to other than our usual settings. Therefore,
514	* the process is: collect the usual settings, set locale to special
515	* setting, copy relevant data into worklconv using strdup(), restore
516	* normal settings, convert data to desired encoding, and finally stash
517	* the collected data in CurrentLocaleConv. This makes it safe if we
518	* throw an error during encoding conversion or run out of memory anywhere
519	* in the process. All data pointed to by struct lconv members is
520	* allocated with strdup, to avoid premature elog(ERROR) and to allow
521	* using a single cleanup routine.
522	*/
523	memset(&worklconv, `0`, sizeof(worklconv));
524
525	/ Save prevailing values of monetary and numeric locales /
526	save_lc_monetary = setlocale(LC_MONETARY, NULL);
527	if (!save_lc_monetary)
528	elog(ERROR, "setlocale(NULL) failed");
529	save_lc_monetary = pstrdup(save_lc_monetary);
530
531	save_lc_numeric = setlocale(LC_NUMERIC, NULL);
532	if (!save_lc_numeric)
533	elog(ERROR, "setlocale(NULL) failed");
534	save_lc_numeric = pstrdup(save_lc_numeric);
535
536	#ifdef WIN32
537
538	/*
539	* The POSIX standard explicitly says that it is undefined what happens if
540	* LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
541	* that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to
542	* believe that localeconv() should return strings that are encoded in the
543	* codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence,
544	* once we have successfully collected the localeconv() results, we will
545	* convert them from that codeset to the desired server encoding.
546	*
547	* Windows, of course, resolutely does things its own way; on that
548	* platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
549	* results. Hence, we must temporarily set that category as well.
550	*/
551
552	/ Save prevailing value of ctype locale /
553	save_lc_ctype = setlocale(LC_CTYPE, NULL);
554	if (!save_lc_ctype)
555	elog(ERROR, "setlocale(NULL) failed");
556	save_lc_ctype = pstrdup(save_lc_ctype);
557
558	/ Here begins the critical section where we must not throw error /
559
560	/ use numeric to set the ctype /
561	setlocale(LC_CTYPE, locale_numeric);
562	#endif
563
564	/ Get formatting information for numeric /
565	setlocale(LC_NUMERIC, locale_numeric);
566	extlconv = localeconv();
567
568	/ Must copy data now in case setlocale() overwrites it /
569	worklconv.decimal_point = strdup(extlconv->decimal_point);
570	worklconv.thousands_sep = strdup(extlconv->thousands_sep);
571	worklconv.grouping = strdup(extlconv->grouping);
572
573	#ifdef WIN32
574	/ use monetary to set the ctype /
575	setlocale(LC_CTYPE, locale_monetary);
576	#endif
577
578	/ Get formatting information for monetary /
579	setlocale(LC_MONETARY, locale_monetary);
580	extlconv = localeconv();
581
582	/ Must copy data now in case setlocale() overwrites it /
583	worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
584	worklconv.currency_symbol = strdup(extlconv->currency_symbol);
585	worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
586	worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
587	worklconv.mon_grouping = strdup(extlconv->mon_grouping);
588	worklconv.positive_sign = strdup(extlconv->positive_sign);
589	worklconv.negative_sign = strdup(extlconv->negative_sign);
590	/ Copy scalar fields as well /
591	worklconv.int_frac_digits = extlconv->int_frac_digits;
592	worklconv.frac_digits = extlconv->frac_digits;
593	worklconv.p_cs_precedes = extlconv->p_cs_precedes;
594	worklconv.p_sep_by_space = extlconv->p_sep_by_space;
595	worklconv.n_cs_precedes = extlconv->n_cs_precedes;
596	worklconv.n_sep_by_space = extlconv->n_sep_by_space;
597	worklconv.p_sign_posn = extlconv->p_sign_posn;
598	worklconv.n_sign_posn = extlconv->n_sign_posn;
599
600	/*
601	* Restore the prevailing locale settings; failure to do so is fatal.
602	* Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
603	* but proceeding with the wrong value of LC_CTYPE would certainly be bad
604	* news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
605	* are almost certainly "C", there's really no reason that restoring those
606	* should fail.
607	*/
608	#ifdef WIN32
609	if (!setlocale(LC_CTYPE, save_lc_ctype))
610	elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
611	#endif
612	if (!setlocale(LC_MONETARY, save_lc_monetary))
613	elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
614	if (!setlocale(LC_NUMERIC, save_lc_numeric))
615	elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
616
617	/*
618	* At this point we've done our best to clean up, and can call functions
619	* that might possibly throw errors with a clean conscience. But let's
620	* make sure we don't leak any already-strdup'd fields in worklconv.
621	*/
622	PG_TRY();
623	{
624	int encoding;
625
626	/ Release the pstrdup'd locale names /
627	pfree(save_lc_monetary);
628	pfree(save_lc_numeric);
629	#ifdef WIN32
630	pfree(save_lc_ctype);
631	#endif
632
633	/ If any of the preceding strdup calls failed, complain now. /
634	if (!struct_lconv_is_valid(&worklconv))
635	ereport(ERROR,
636	(errcode(ERRCODE_OUT_OF_MEMORY),
637	errmsg("out of memory")));
638
639	/*
640	* Now we must perform encoding conversion from whatever's associated
641	* with the locales into the database encoding. If we can't identify
642	* the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
643	* use PG_SQL_ASCII, which will result in just validating that the
644	* strings are OK in the database encoding.
645	*/
646	encoding = pg_get_encoding_from_locale(locale_numeric, true);
647	if (encoding < `0`)
648	encoding = PG_SQL_ASCII;
649
650	db_encoding_convert(encoding, &worklconv.decimal_point);
651	db_encoding_convert(encoding, &worklconv.thousands_sep);
652	/ grouping is not text and does not require conversion /
653
654	encoding = pg_get_encoding_from_locale(locale_monetary, true);
655	if (encoding < `0`)
656	encoding = PG_SQL_ASCII;
657
658	db_encoding_convert(encoding, &worklconv.int_curr_symbol);
659	db_encoding_convert(encoding, &worklconv.currency_symbol);
660	db_encoding_convert(encoding, &worklconv.mon_decimal_point);
661	db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
662	/ mon_grouping is not text and does not require conversion /
663	db_encoding_convert(encoding, &worklconv.positive_sign);
664	db_encoding_convert(encoding, &worklconv.negative_sign);
665	}
666	PG_CATCH();
667	{
668	free_struct_lconv(&worklconv);
669	PG_RE_THROW();
670	}
671	PG_END_TRY();
672
673	/*
674	* Everything is good, so save the results.
675	*/
676	CurrentLocaleConv = worklconv;
677	CurrentLocaleConvAllocated = true;
678	CurrentLocaleConvValid = true;
679	return &CurrentLocaleConv;
680	}
681
682	#ifdef WIN32
683	/*
684	* On Windows, strftime() returns its output in encoding CP_ACP (the default
685	* operating system codepage for the computer), which is likely different
686	* from SERVER_ENCODING. This is especially important in Japanese versions
687	* of Windows which will use SJIS encoding, which we don't support as a
688	* server encoding.
689	*
690	* So, instead of using strftime(), use wcsftime() to return the value in
691	* wide characters (internally UTF16) and then convert to UTF8, which we
692	* know how to handle directly.
693	*
694	* Note that this only affects the calls to strftime() in this file, which are
695	* used to get the locale-aware strings. Other parts of the backend use
696	* pg_strftime(), which isn't locale-aware and does not need to be replaced.
697	*/
698	static size_t
699	strftime_win32(char *dst, size_t dstlen,
700	const char format, const* struct tm *tm)
701	{
702	size_t len;
703	wchar_t wformat[`8`]; / formats used below need 3 chars /
704	wchar_t wbuf[MAX_L10N_DATA];
705
706	/*
707	* Get a wchar_t version of the format string. We only actually use
708	* plain-ASCII formats in this file, so we can say that they're UTF8.
709	*/
710	len = MultiByteToWideChar(CP_UTF8, `0`, format, -`1`,
711	wformat, lengthof(wformat));
712	if (len == `0`)
713	elog(ERROR, "could not convert format string from UTF-8: error code %lu",
714	GetLastError());
715
716	len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
717	if (len == `0`)
718	{
719	/*
720	* wcsftime failed, possibly because the result would not fit in
721	* MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
722	*/
723	return `0`;
724	}
725
726	len = WideCharToMultiByte(CP_UTF8, `0`, wbuf, len, dst, dstlen - `1`,
727	NULL, NULL);
728	if (len == `0`)
729	elog(ERROR, "could not convert string to UTF-8: error code %lu",
730	GetLastError());
731
732	dst[len] = `'\0'`;
733
734	return len;
735	}
736
737	/ redefine strftime() /
738	#define strftime(a,b,c,d) strftime_win32(a,b,c,d)
739	#endif /* WIN32 */
740
741	/*
742	* Subroutine for cache_locale_time().
743	* Convert the given string from encoding "encoding" to the database
744	* encoding, and store the result at *dst, replacing any previous value.
745	*/
746	static void
747	cache_single_string(char *dst, const* char src, int* encoding)
748	{
749	char *ptr;
750	char *olddst;
751
752	/ Convert the string to the database encoding, or validate it's OK /
753	ptr = pg_any_to_server(src, strlen(src), encoding);
754
755	/ Store the string in long-lived storage, replacing any previous value /
756	olddst = *dst;
757	*dst = MemoryContextStrdup(TopMemoryContext, ptr);
758	if (olddst)
759	pfree(olddst);
760
761	/ Might as well clean up any palloc'd conversion result, too /
762	if (ptr != src)
763	pfree(ptr);
764	}
765
766	/*
767	* Update the lc_time localization cache variables if needed.
768	*/
769	void
770	cache_locale_time(void)
771	{
772	char buf[(`2` * `7` + `2` * `12`) * MAX_L10N_DATA];
773	char *bufptr;
774	time_t timenow;
775	struct tm *timeinfo;
776	bool strftimefail = false;
777	int encoding;
778	int i;
779	char *save_lc_time;
780	#ifdef WIN32
781	char *save_lc_ctype;
782	#endif
783
784	/ did we do this already? /
785	if (CurrentLCTimeValid)
786	return;
787
788	elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
789
790	/*
791	* As in PGLC_localeconv(), it's critical that we not throw error while
792	* libc's locale settings have nondefault values. Hence, we just call
793	* strftime() within the critical section, and then convert and save its
794	* results afterwards.
795	*/
796
797	/ Save prevailing value of time locale /
798	save_lc_time = setlocale(LC_TIME, NULL);
799	if (!save_lc_time)
800	elog(ERROR, "setlocale(NULL) failed");
801	save_lc_time = pstrdup(save_lc_time);
802
803	#ifdef WIN32
804
805	/*
806	* On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
807	* must set it here. This code looks the same as what PGLC_localeconv()
808	* does, but the underlying reason is different: this does NOT determine
809	* the encoding we'll get back from strftime_win32().
810	*/
811
812	/ Save prevailing value of ctype locale /
813	save_lc_ctype = setlocale(LC_CTYPE, NULL);
814	if (!save_lc_ctype)
815	elog(ERROR, "setlocale(NULL) failed");
816	save_lc_ctype = pstrdup(save_lc_ctype);
817
818	/ use lc_time to set the ctype /
819	setlocale(LC_CTYPE, locale_time);
820	#endif
821
822	setlocale(LC_TIME, locale_time);
823
824	/ We use times close to current time as data for strftime(). /
825	timenow = time(NULL);
826	timeinfo = localtime(&timenow);
827
828	/ Store the strftime results in MAX_L10N_DATA-sized portions of buf[] /
829	bufptr = buf;
830
831	/*
832	* MAX_L10N_DATA is sufficient buffer space for every known locale, and
833	* POSIX defines no strftime() errors. (Buffer space exhaustion is not an
834	* error.) An implementation might report errors (e.g. ENOMEM) by
835	* returning 0 (or, less plausibly, a negative value) and setting errno.
836	* Report errno just in case the implementation did that, but clear it in
837	* advance of the calls so we don't emit a stale, unrelated errno.
838	*/
839	errno = `0`;
840
841	/ localized days /
842	for (i = `0`; i < `7`; i++)
843	{
844	timeinfo->tm_wday = i;
845	if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= `0`)
846	strftimefail = true;
847	bufptr += MAX_L10N_DATA;
848	if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= `0`)
849	strftimefail = true;
850	bufptr += MAX_L10N_DATA;
851	}
852
853	/ localized months /
854	for (i = `0`; i < `12`; i++)
855	{
856	timeinfo->tm_mon = i;
857	timeinfo->tm_mday = `1`; / make sure we don't have invalid date /
858	if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= `0`)
859	strftimefail = true;
860	bufptr += MAX_L10N_DATA;
861	if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= `0`)
862	strftimefail = true;
863	bufptr += MAX_L10N_DATA;
864	}
865
866	/*
867	* Restore the prevailing locale settings; as in PGLC_localeconv(),
868	* failure to do so is fatal.
869	*/
870	#ifdef WIN32
871	if (!setlocale(LC_CTYPE, save_lc_ctype))
872	elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
873	#endif
874	if (!setlocale(LC_TIME, save_lc_time))
875	elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
876
877	/*
878	* At this point we've done our best to clean up, and can throw errors, or
879	* call functions that might throw errors, with a clean conscience.
880	*/
881	if (strftimefail)
882	elog(ERROR, "strftime() failed: %m");
883
884	/ Release the pstrdup'd locale names /
885	pfree(save_lc_time);
886	#ifdef WIN32
887	pfree(save_lc_ctype);
888	#endif
889
890	#ifndef WIN32
891
892	/*
893	* As in PGLC_localeconv(), we must convert strftime()'s output from the
894	* encoding implied by LC_TIME to the database encoding. If we can't
895	* identify the LC_TIME encoding, just perform encoding validation.
896	*/
897	encoding = pg_get_encoding_from_locale(locale_time, true);
898	if (encoding < `0`)
899	encoding = PG_SQL_ASCII;
900
901	#else
902
903	/*
904	* On Windows, strftime_win32() always returns UTF8 data, so convert from
905	* that if necessary.
906	*/
907	encoding = PG_UTF8;
908
909	#endif /* WIN32 */
910
911	bufptr = buf;
912
913	/ localized days /
914	for (i = `0`; i < `7`; i++)
915	{
916	cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
917	bufptr += MAX_L10N_DATA;
918	cache_single_string(&localized_full_days[i], bufptr, encoding);
919	bufptr += MAX_L10N_DATA;
920	}
921
922	/ localized months /
923	for (i = `0`; i < `12`; i++)
924	{
925	cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
926	bufptr += MAX_L10N_DATA;
927	cache_single_string(&localized_full_months[i], bufptr, encoding);
928	bufptr += MAX_L10N_DATA;
929	}
930
931	CurrentLCTimeValid = true;
932	}
933
934
935	#if defined(WIN32) && defined(LC_MESSAGES)
936	/*
937	* Convert a Windows setlocale() argument to a Unix-style one.
938	*
939	* Regardless of platform, we install message catalogs under a Unix-style
940	* LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
941	* following that style will elicit localized interface strings.
942	*
943	* Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
944	* (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
945	* case-insensitive. setlocale() returns the fully-qualified form; for
946	* example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
947	* setlocale() and _create_locale() select a "locale identifier"[1] and store
948	* it in an undocumented _locale_t field. From that LCID, we can retrieve the
949	* ISO 639 language and the ISO 3166 country. Character encoding does not
950	* matter, because the server and client encodings govern that.
951	*
952	* Windows Vista introduced the "locale name" concept[2], closely following
953	* RFC 4646. Locale identifiers are now deprecated. Starting with Visual
954	* Studio 2012, setlocale() accepts locale names in addition to the strings it
955	* accepted historically. It does not standardize them; setlocale("Th-tH")
956	* returns "Th-tH". setlocale(category, "") still returns a traditional
957	* string. Furthermore, msvcr110.dll changed the undocumented _locale_t
958	* content to carry locale names instead of locale identifiers.
959	*
960	* MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol.
961	* IsoLocaleName() always fails in a MinGW-built postgres.exe, so only
962	* Unix-style values of the lc_messages GUC can elicit localized messages. In
963	* particular, every lc_messages setting that initdb can select automatically
964	* will yield only C-locale messages. XXX This could be fixed by running the
965	* fully-qualified locale name through a lookup table.
966	*
967	* This function returns a pointer to a static buffer bearing the converted
968	* name or NULL if conversion fails.
969	*
970	* [1] http://msdn.microsoft.com/en-us/library/windows/desktop/dd373763.aspx
971	* [2] http://msdn.microsoft.com/en-us/library/windows/desktop/dd373814.aspx
972	*/
973	static char *
974	IsoLocaleName(const char *winlocname)
975	{
976	#if (_MSC_VER >= 1400) /* VC8.0 or later */
977	static char iso_lc_messages[`32`];
978	_locale_t loct = NULL;
979
980	if (pg_strcasecmp("c", winlocname) == `0` \|\|
981	pg_strcasecmp("posix", winlocname) == `0`)
982	{
983	strcpy(iso_lc_messages, "C");
984	return iso_lc_messages;
985	}
986
987	loct = _create_locale(LC_CTYPE, winlocname);
988	if (loct != NULL)
989	{
990	#if (_MSC_VER >= 1700) /* Visual Studio 2012 or later */
991	size_t rc;
992	char *hyphen;
993
994	/ Locale names use only ASCII, any conversion locale suffices. /
995	rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
996	sizeof(iso_lc_messages), NULL);
997	_free_locale(loct);
998	if (rc == -`1` \|\| rc == sizeof(iso_lc_messages))
999	return NULL;
1000
1001	/*
1002	* Since the message catalogs sit on a case-insensitive filesystem, we
1003	* need not standardize letter case here. So long as we do not ship
1004	* message catalogs for which it would matter, we also need not
1005	* translate the script/variant portion, e.g. uz-Cyrl-UZ to
1006	* uz_UZ@cyrillic. Simply replace the hyphen with an underscore.
1007	*
1008	* Note that the locale name can be less-specific than the value we
1009	* would derive under earlier Visual Studio releases. For example,
1010	* French_France.1252 yields just "fr". This does not affect any of
1011	* the country-specific message catalogs available as of this writing
1012	* (pt_BR, zh_CN, zh_TW).
1013	*/
1014	hyphen = strchr(iso_lc_messages, `'-'`);
1015	if (hyphen)
1016	*hyphen = `'_'`;
1017	#else
1018	char isolang[`32`],
1019	isocrty[`32`];
1020	LCID lcid;
1021
1022	lcid = loct->locinfo->lc_handle[LC_CTYPE];
1023	if (lcid == `0`)
1024	lcid = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT);
1025	_free_locale(loct);
1026
1027	if (!GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, isolang, sizeof(isolang)))
1028	return NULL;
1029	if (!GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, isocrty, sizeof(isocrty)))
1030	return NULL;
1031	snprintf(iso_lc_messages, sizeof(iso_lc_messages) - `1`, "%s_%s", isolang, isocrty);
1032	#endif
1033	return iso_lc_messages;
1034	}
1035	return NULL;
1036	#else
1037	return NULL; / Not supported on this version of msvc/mingw /
1038	#endif /* _MSC_VER >= 1400 */
1039	}
1040	#endif /* WIN32 && LC_MESSAGES */
1041
1042
1043	/*
1044	* Detect aging strxfrm() implementations that, in a subset of locales, write
1045	* past the specified buffer length. Affected users must update OS packages
1046	* before using PostgreSQL 9.5 or later.
1047	*
1048	* Assume that the bug can come and go from one postmaster startup to another
1049	* due to physical replication among diverse machines. Assume that the bug's
1050	* presence will not change during the life of a particular postmaster. Given
1051	* those assumptions, call this no less than once per postmaster startup per
1052	* LC_COLLATE setting used. No known-affected system offers strxfrm_l(), so
1053	* there is no need to consider pg_collation locales.
1054	*/
1055	void
1056	check_strxfrm_bug(void)
1057	{
1058	char buf[`32`];
1059	const int canary = `0x7F`;
1060	bool ok = true;
1061
1062	/*
1063	* Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
1064	* 05/08 returns 18 and modifies 10 bytes. It respects limits above or
1065	* below that range.
1066	*
1067	* The bug is present in Solaris 8 as well; it is absent in Solaris 10
1068	* 01/13 and Solaris 11.2. Affected locales include is_IS.ISO8859-1,
1069	* en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R. Unaffected locales
1070	* include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
1071	*/
1072	buf[`7`] = canary;
1073	(void) strxfrm(buf, "ab", `7`);
1074	if (buf[`7`] != canary)
1075	ok = false;
1076
1077	/*
1078	* illumos bug #1594 was present in the source tree from 2010-10-11 to
1079	* 2012-02-01. Given an ASCII string of any length and length limit 1,
1080	* affected systems ignore the length limit and modify a number of bytes
1081	* one less than the return value. The problem inputs for this bug do not
1082	* overlap those for the Solaris bug, hence a distinct test.
1083	*
1084	* Affected systems include smartos-20110926T021612Z. Affected locales
1085	* include en_US.ISO8859-1 and en_US.UTF-8. Unaffected locales include C.
1086	*/
1087	buf[`1`] = canary;
1088	(void) strxfrm(buf, "a", `1`);
1089	if (buf[`1`] != canary)
1090	ok = false;
1091
1092	if (!ok)
1093	ereport(ERROR,
1094	(errcode(ERRCODE_SYSTEM_ERROR),
1095	errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
1096	setlocale(LC_COLLATE, NULL)),
1097	errhint("Apply system library package updates.")));
1098	}
1099
1100
1101	/*
1102	* Cache mechanism for collation information.
1103	*
1104	* We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1105	* (or POSIX), so we can optimize a few code paths in various places.
1106	* For the built-in C and POSIX collations, we can know that without even
1107	* doing a cache lookup, but we want to support aliases for C/POSIX too.
1108	* For the "default" collation, there are separate static cache variables,
1109	* since consulting the pg_collation catalog doesn't tell us what we need.
1110	*
1111	* Also, if a pg_locale_t has been requested for a collation, we cache that
1112	* for the life of a backend.
1113	*
1114	* Note that some code relies on the flags not reporting false negatives
1115	* (that is, saying it's not C when it is). For example, char2wchar()
1116	* could fail if the locale is C, so str_tolower() shouldn't call it
1117	* in that case.
1118	*
1119	* Note that we currently lack any way to flush the cache. Since we don't
1120	* support ALTER COLLATION, this is OK. The worst case is that someone
1121	* drops a collation, and a useless cache entry hangs around in existing
1122	* backends.
1123	*/
1124
1125	static collation_cache_entry *
1126	lookup_collation_cache(Oid collation, bool set_flags)
1127	{
1128	collation_cache_entry *cache_entry;
1129	bool found;
1130
1131	Assert(OidIsValid(collation));
1132	Assert(collation != DEFAULT_COLLATION_OID);
1133
1134	if (collation_cache == NULL)
1135	{
1136	/ First time through, initialize the hash table /
1137	HASHCTL ctl;
1138
1139	memset(&ctl, `0`, sizeof(ctl));
1140	ctl.keysize = sizeof(Oid);
1141	ctl.entrysize = sizeof(collation_cache_entry);
1142	collation_cache = hash_create("Collation cache", `100`, &ctl,
1143	HASH_ELEM \| HASH_BLOBS);
1144	}
1145
1146	cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
1147	if (!found)
1148	{
1149	/*
1150	* Make sure cache entry is marked invalid, in case we fail before
1151	* setting things.
1152	*/
1153	cache_entry->flags_valid = false;
1154	cache_entry->locale = `0`;
1155	}
1156
1157	if (set_flags && !cache_entry->flags_valid)
1158	{
1159	/ Attempt to set the flags /
1160	HeapTuple tp;
1161	Form_pg_collation collform;
1162	const char *collcollate;
1163	const char *collctype;
1164
1165	tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1166	if (!HeapTupleIsValid(tp))
1167	elog(ERROR, "cache lookup failed for collation %u", collation);
1168	collform = (Form_pg_collation) GETSTRUCT(tp);
1169
1170	collcollate = NameStr(collform->collcollate);
1171	collctype = NameStr(collform->collctype);
1172
1173	cache_entry->collate_is_c = ((strcmp(collcollate, "C") == `0`) \|\|
1174	(strcmp(collcollate, "POSIX") == `0`));
1175	cache_entry->ctype_is_c = ((strcmp(collctype, "C") == `0`) \|\|
1176	(strcmp(collctype, "POSIX") == `0`));
1177
1178	cache_entry->flags_valid = true;
1179
1180	ReleaseSysCache(tp);
1181	}
1182
1183	return cache_entry;
1184	}
1185
1186
1187	/*
1188	* Detect whether collation's LC_COLLATE property is C
1189	*/
1190	bool
1191	lc_collate_is_c(Oid collation)
1192	{
1193	/*
1194	* If we're asked about "collation 0", return false, so that the code will
1195	* go into the non-C path and report that the collation is bogus.
1196	*/
1197	if (!OidIsValid(collation))
1198	return false;
1199
1200	/*
1201	* If we're asked about the default collation, we have to inquire of the C
1202	* library. Cache the result so we only have to compute it once.
1203	*/
1204	if (collation == DEFAULT_COLLATION_OID)
1205	{
1206	static int result = -`1`;
1207	char *localeptr;
1208
1209	if (result >= `0`)
1210	return (bool) result;
1211	localeptr = setlocale(LC_COLLATE, NULL);
1212	if (!localeptr)
1213	elog(ERROR, "invalid LC_COLLATE setting");
1214
1215	if (strcmp(localeptr, "C") == `0`)
1216	result = true;
1217	else if (strcmp(localeptr, "POSIX") == `0`)
1218	result = true;
1219	else
1220	result = false;
1221	return (bool) result;
1222	}
1223
1224	/*
1225	* If we're asked about the built-in C/POSIX collations, we know that.
1226	*/
1227	if (collation == C_COLLATION_OID \|\|
1228	collation == POSIX_COLLATION_OID)
1229	return true;
1230
1231	/*
1232	* Otherwise, we have to consult pg_collation, but we cache that.
1233	*/
1234	return (lookup_collation_cache(collation, true))->collate_is_c;
1235	}
1236
1237	/*
1238	* Detect whether collation's LC_CTYPE property is C
1239	*/
1240	bool
1241	lc_ctype_is_c(Oid collation)
1242	{
1243	/*
1244	* If we're asked about "collation 0", return false, so that the code will
1245	* go into the non-C path and report that the collation is bogus.
1246	*/
1247	if (!OidIsValid(collation))
1248	return false;
1249
1250	/*
1251	* If we're asked about the default collation, we have to inquire of the C
1252	* library. Cache the result so we only have to compute it once.
1253	*/
1254	if (collation == DEFAULT_COLLATION_OID)
1255	{
1256	static int result = -`1`;
1257	char *localeptr;
1258
1259	if (result >= `0`)
1260	return (bool) result;
1261	localeptr = setlocale(LC_CTYPE, NULL);
1262	if (!localeptr)
1263	elog(ERROR, "invalid LC_CTYPE setting");
1264
1265	if (strcmp(localeptr, "C") == `0`)
1266	result = true;
1267	else if (strcmp(localeptr, "POSIX") == `0`)
1268	result = true;
1269	else
1270	result = false;
1271	return (bool) result;
1272	}
1273
1274	/*
1275	* If we're asked about the built-in C/POSIX collations, we know that.
1276	*/
1277	if (collation == C_COLLATION_OID \|\|
1278	collation == POSIX_COLLATION_OID)
1279	return true;
1280
1281	/*
1282	* Otherwise, we have to consult pg_collation, but we cache that.
1283	*/
1284	return (lookup_collation_cache(collation, true))->ctype_is_c;
1285	}
1286
1287
1288	/ simple subroutine for reporting errors from newlocale() /
1289	#ifdef HAVE_LOCALE_T
1290	static void
1291	report_newlocale_failure(const char *localename)
1292	{
1293	int save_errno;
1294
1295	/*
1296	* Windows doesn't provide any useful error indication from
1297	* _create_locale(), and BSD-derived platforms don't seem to feel they
1298	* need to set errno either (even though POSIX is pretty clear that
1299	* newlocale should do so). So, if errno hasn't been set, assume ENOENT
1300	* is what to report.
1301	*/
1302	if (errno == `0`)
1303	errno = ENOENT;
1304
1305	/*
1306	* ENOENT means "no such locale", not "no such file", so clarify that
1307	* errno with an errdetail message.
1308	*/
1309	save_errno = errno; / auxiliary funcs might change errno /
1310	ereport(ERROR,
1311	(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1312	errmsg("could not create locale \"%s\": %m",
1313	localename),
1314	(save_errno == ENOENT ?
1315	errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1316	localename) : `0`)));
1317	}
1318	#endif /* HAVE_LOCALE_T */
1319
1320
1321	/*
1322	* Create a locale_t from a collation OID. Results are cached for the
1323	* lifetime of the backend. Thus, do not free the result with freelocale().
1324	*
1325	* As a special optimization, the default/database collation returns 0.
1326	* Callers should then revert to the non-locale_t-enabled code path.
1327	* In fact, they shouldn't call this function at all when they are dealing
1328	* with the default locale. That can save quite a bit in hotspots.
1329	* Also, callers should avoid calling this before going down a C/POSIX
1330	* fastpath, because such a fastpath should work even on platforms without
1331	* locale_t support in the C library.
1332	*
1333	* For simplicity, we always generate COLLATE + CTYPE even though we
1334	* might only need one of them. Since this is called only once per session,
1335	* it shouldn't cost much.
1336	*/
1337	pg_locale_t
1338	pg_newlocale_from_collation(Oid collid)
1339	{
1340	collation_cache_entry *cache_entry;
1341
1342	/ Callers must pass a valid OID /
1343	Assert(OidIsValid(collid));
1344
1345	/ Return 0 for "default" collation, just in case caller forgets /
1346	if (collid == DEFAULT_COLLATION_OID)
1347	return (pg_locale_t) `0`;
1348
1349	cache_entry = lookup_collation_cache(collid, false);
1350
1351	if (cache_entry->locale == `0`)
1352	{
1353	/ We haven't computed this yet in this session, so do it /
1354	HeapTuple tp;
1355	Form_pg_collation collform;
1356	const char *collcollate;
1357	const char *collctype pg_attribute_unused();
1358	struct pg_locale_struct result;
1359	pg_locale_t resultp;
1360	Datum collversion;
1361	bool isnull;
1362
1363	tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1364	if (!HeapTupleIsValid(tp))
1365	elog(ERROR, "cache lookup failed for collation %u", collid);
1366	collform = (Form_pg_collation) GETSTRUCT(tp);
1367
1368	collcollate = NameStr(collform->collcollate);
1369	collctype = NameStr(collform->collctype);
1370
1371	/ We'll fill in the result struct locally before allocating memory /
1372	memset(&result, `0`, sizeof(result));
1373	result.provider = collform->collprovider;
1374	result.deterministic = collform->collisdeterministic;
1375
1376	if (collform->collprovider == COLLPROVIDER_LIBC)
1377	{
1378	#ifdef HAVE_LOCALE_T
1379	locale_t loc;
1380
1381	if (strcmp(collcollate, collctype) == `0`)
1382	{
1383	/ Normal case where they're the same /
1384	errno = `0`;
1385	#ifndef WIN32
1386	loc = newlocale(LC_COLLATE_MASK \| LC_CTYPE_MASK, collcollate,
1387	NULL);
1388	#else
1389	loc = _create_locale(LC_ALL, collcollate);
1390	#endif
1391	if (!loc)
1392	report_newlocale_failure(collcollate);
1393	}
1394	else
1395	{
1396	#ifndef WIN32
1397	/ We need two newlocale() steps /
1398	locale_t loc1;
1399
1400	errno = `0`;
1401	loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1402	if (!loc1)
1403	report_newlocale_failure(collcollate);
1404	errno = `0`;
1405	loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
1406	if (!loc)
1407	report_newlocale_failure(collctype);
1408	#else
1409
1410	/*
1411	* XXX The _create_locale() API doesn't appear to support
1412	* this. Could perhaps be worked around by changing
1413	* pg_locale_t to contain two separate fields.
1414	*/
1415	ereport(ERROR,
1416	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1417	errmsg("collations with different collate and ctype values are not supported on this platform")));
1418	#endif
1419	}
1420
1421	result.info.lt = loc;
1422	#else /* not HAVE_LOCALE_T */
1423	/ platform that doesn't support locale_t /
1424	ereport(ERROR,
1425	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1426	errmsg("collation provider LIBC is not supported on this platform")));
1427	#endif /* not HAVE_LOCALE_T */
1428	}
1429	else if (collform->collprovider == COLLPROVIDER_ICU)
1430	{
1431	#ifdef USE_ICU
1432	UCollator *collator;
1433	UErrorCode status;
1434
1435	if (strcmp(collcollate, collctype) != `0`)
1436	ereport(ERROR,
1437	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1438	errmsg("collations with different collate and ctype values are not supported by ICU")));
1439
1440	status = U_ZERO_ERROR;
1441	collator = ucol_open(collcollate, &status);
1442	if (U_FAILURE(status))
1443	ereport(ERROR,
1444	(errmsg("could not open collator for locale \"%s\": %s",
1445	collcollate, u_errorName(status))));
1446
1447	if (U_ICU_VERSION_MAJOR_NUM < `54`)
1448	icu_set_collation_attributes(collator, collcollate);
1449
1450	/ We will leak this string if we get an error below :-( /
1451	result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
1452	collcollate);
1453	result.info.icu.ucol = collator;
1454	#else /* not USE_ICU */
1455	/ could get here if a collation was created by a build with ICU /
1456	ereport(ERROR,
1457	(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1458	errmsg("ICU is not supported in this build"), \
1459	errhint("You need to rebuild PostgreSQL using --with-icu.")));
1460	#endif /* not USE_ICU */
1461	}
1462
1463	collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1464	&isnull);
1465	if (!isnull)
1466	{
1467	char *actual_versionstr;
1468	char *collversionstr;
1469
1470	actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate);
1471	if (!actual_versionstr)
1472	{
1473	/*
1474	* This could happen when specifying a version in CREATE
1475	* COLLATION for a libc locale, or manually creating a mess in
1476	* the catalogs.
1477	*/
1478	ereport(ERROR,
1479	(errmsg("collation \"%s\" has no actual version, but a version was specified",
1480	NameStr(collform->collname))));
1481	}
1482	collversionstr = TextDatumGetCString(collversion);
1483
1484	if (strcmp(actual_versionstr, collversionstr) != `0`)
1485	ereport(WARNING,
1486	(errmsg("collation \"%s\" has version mismatch",
1487	NameStr(collform->collname)),
1488	errdetail("The collation in the database was created using version %s, "
1489	"but the operating system provides version %s.",
1490	collversionstr, actual_versionstr),
1491	errhint("Rebuild all objects affected by this collation and run "
1492	"ALTER COLLATION %s REFRESH VERSION, "
1493	"or build PostgreSQL with the right library version.",
1494	quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1495	NameStr(collform->collname)))));
1496	}
1497
1498	ReleaseSysCache(tp);
1499
1500	/ We'll keep the pg_locale_t structures in TopMemoryContext /
1501	resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
1502	*resultp = result;
1503
1504	cache_entry->locale = resultp;
1505	}
1506
1507	return cache_entry->locale;
1508	}
1509
1510	/*
1511	* Get provider-specific collation version string for the given collation from
1512	* the operating system/library.
1513	*
1514	* A particular provider must always either return a non-NULL string or return
1515	* NULL (if it doesn't support versions). It must not return NULL for some
1516	* collcollate and not NULL for others.
1517	*/
1518	char *
1519	get_collation_actual_version(char collprovider, const char *collcollate)
1520	{
1521	char *collversion;
1522
1523	#ifdef USE_ICU
1524	if (collprovider == COLLPROVIDER_ICU)
1525	{
1526	UCollator *collator;
1527	UErrorCode status;
1528	UVersionInfo versioninfo;
1529	char buf[U_MAX_VERSION_STRING_LENGTH];
1530
1531	status = U_ZERO_ERROR;
1532	collator = ucol_open(collcollate, &status);
1533	if (U_FAILURE(status))
1534	ereport(ERROR,
1535	(errmsg("could not open collator for locale \"%s\": %s",
1536	collcollate, u_errorName(status))));
1537	ucol_getVersion(collator, versioninfo);
1538	ucol_close(collator);
1539
1540	u_versionToString(versioninfo, buf);
1541	collversion = pstrdup(buf);
1542	}
1543	else
1544	#endif
1545	collversion = NULL;
1546
1547	return collversion;
1548	}
1549
1550
1551	#ifdef USE_ICU
1552	/*
1553	* Converter object for converting between ICU's UChar strings and C strings
1554	* in database encoding. Since the database encoding doesn't change, we only
1555	* need one of these per session.
1556	*/
1557	static UConverter *icu_converter = NULL;
1558
1559	static void
1560	init_icu_converter(void)
1561	{
1562	const char *icu_encoding_name;
1563	UErrorCode status;
1564	UConverter *conv;
1565
1566	if (icu_converter)
1567	return;
1568
1569	icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
1570
1571	status = U_ZERO_ERROR;
1572	conv = ucnv_open(icu_encoding_name, &status);
1573	if (U_FAILURE(status))
1574	ereport(ERROR,
1575	(errmsg("could not open ICU converter for encoding \"%s\": %s",
1576	icu_encoding_name, u_errorName(status))));
1577
1578	icu_converter = conv;
1579	}
1580
1581	/*
1582	* Convert a string in the database encoding into a string of UChars.
1583	*
1584	* The source string at buff is of length nbytes
1585	* (it needn't be nul-terminated)
1586	*
1587	* *buff_uchar receives a pointer to the palloc'd result string, and
1588	* the function's result is the number of UChars generated.
1589	*
1590	* The result string is nul-terminated, though most callers rely on the
1591	* result length instead.
1592	*/
1593	int32_t
1594	icu_to_uchar(UChar *buff_uchar, const* char *buff, size_t nbytes)
1595	{
1596	UErrorCode status;
1597	int32_t len_uchar;
1598
1599	init_icu_converter();
1600
1601	status = U_ZERO_ERROR;
1602	len_uchar = ucnv_toUChars(icu_converter, NULL, `0`,
1603	buff, nbytes, &status);
1604	if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1605	ereport(ERROR,
1606	(errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
1607
1608	buff_uchar = palloc((len_uchar + `1`) sizeof(**buff_uchar));
1609
1610	status = U_ZERO_ERROR;
1611	len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + `1`,
1612	buff, nbytes, &status);
1613	if (U_FAILURE(status))
1614	ereport(ERROR,
1615	(errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
1616
1617	return len_uchar;
1618	}
1619
1620	/*
1621	* Convert a string of UChars into the database encoding.
1622	*
1623	* The source string at buff_uchar is of length len_uchar
1624	* (it needn't be nul-terminated)
1625	*
1626	* *result receives a pointer to the palloc'd result string, and the
1627	* function's result is the number of bytes generated (not counting nul).
1628	*
1629	* The result string is nul-terminated.
1630	*/
1631	int32_t
1632	icu_from_uchar(char *result, const* UChar *buff_uchar, int32_t len_uchar)
1633	{
1634	UErrorCode status;
1635	int32_t len_result;
1636
1637	init_icu_converter();
1638
1639	status = U_ZERO_ERROR;
1640	len_result = ucnv_fromUChars(icu_converter, NULL, `0`,
1641	buff_uchar, len_uchar, &status);
1642	if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1643	ereport(ERROR,
1644	(errmsg("%s failed: %s", "ucnv_fromUChars",
1645	u_errorName(status))));
1646
1647	*result = palloc(len_result + `1`);
1648
1649	status = U_ZERO_ERROR;
1650	len_result = ucnv_fromUChars(icu_converter, *result, len_result + `1`,
1651	buff_uchar, len_uchar, &status);
1652	if (U_FAILURE(status))
1653	ereport(ERROR,
1654	(errmsg("%s failed: %s", "ucnv_fromUChars",
1655	u_errorName(status))));
1656
1657	return len_result;
1658	}
1659
1660	/*
1661	* Parse collation attributes and apply them to the open collator. This takes
1662	* a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
1663	* applies the key-value arguments.
1664	*
1665	* Starting with ICU version 54, the attributes are processed automatically by
1666	* ucol_open(), so this is only necessary for emulating this behavior on older
1667	* versions.
1668	*/
1669	pg_attribute_unused()
1670	static void
1671	icu_set_collation_attributes(UCollator collator, const* char *loc)
1672	{
1673	char *str = asc_tolower(loc, strlen(loc));
1674
1675	str = strchr(str, `'@'`);
1676	if (!str)
1677	return;
1678	str++;
1679
1680	for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
1681	{
1682	char *e = strchr(token, `'='`);
1683
1684	if (e)
1685	{
1686	char *name;
1687	char *value;
1688	UColAttribute uattr;
1689	UColAttributeValue uvalue;
1690	UErrorCode status;
1691
1692	status = U_ZERO_ERROR;
1693
1694	*e = `'\0'`;
1695	name = token;
1696	value = e + `1`;
1697
1698	/*
1699	* See attribute name and value lists in ICU i18n/coll.cpp
1700	*/
1701	if (strcmp(name, "colstrength") == `0`)
1702	uattr = UCOL_STRENGTH;
1703	else if (strcmp(name, "colbackwards") == `0`)
1704	uattr = UCOL_FRENCH_COLLATION;
1705	else if (strcmp(name, "colcaselevel") == `0`)
1706	uattr = UCOL_CASE_LEVEL;
1707	else if (strcmp(name, "colcasefirst") == `0`)
1708	uattr = UCOL_CASE_FIRST;
1709	else if (strcmp(name, "colalternate") == `0`)
1710	uattr = UCOL_ALTERNATE_HANDLING;
1711	else if (strcmp(name, "colnormalization") == `0`)
1712	uattr = UCOL_NORMALIZATION_MODE;
1713	else if (strcmp(name, "colnumeric") == `0`)
1714	uattr = UCOL_NUMERIC_COLLATION;
1715	else
1716	/ ignore if unknown /
1717	continue;
1718
1719	if (strcmp(value, "primary") == `0`)
1720	uvalue = UCOL_PRIMARY;
1721	else if (strcmp(value, "secondary") == `0`)
1722	uvalue = UCOL_SECONDARY;
1723	else if (strcmp(value, "tertiary") == `0`)
1724	uvalue = UCOL_TERTIARY;
1725	else if (strcmp(value, "quaternary") == `0`)
1726	uvalue = UCOL_QUATERNARY;
1727	else if (strcmp(value, "identical") == `0`)
1728	uvalue = UCOL_IDENTICAL;
1729	else if (strcmp(value, "no") == `0`)
1730	uvalue = UCOL_OFF;
1731	else if (strcmp(value, "yes") == `0`)
1732	uvalue = UCOL_ON;
1733	else if (strcmp(value, "shifted") == `0`)
1734	uvalue = UCOL_SHIFTED;
1735	else if (strcmp(value, "non-ignorable") == `0`)
1736	uvalue = UCOL_NON_IGNORABLE;
1737	else if (strcmp(value, "lower") == `0`)
1738	uvalue = UCOL_LOWER_FIRST;
1739	else if (strcmp(value, "upper") == `0`)
1740	uvalue = UCOL_UPPER_FIRST;
1741	else
1742	status = U_ILLEGAL_ARGUMENT_ERROR;
1743
1744	if (status == U_ZERO_ERROR)
1745	ucol_setAttribute(collator, uattr, uvalue, &status);
1746
1747	/*
1748	* Pretend the error came from ucol_open(), for consistent error
1749	* message across ICU versions.
1750	*/
1751	if (U_FAILURE(status))
1752	ereport(ERROR,
1753	(errmsg("could not open collator for locale \"%s\": %s",
1754	loc, u_errorName(status))));
1755	}
1756	}
1757	}
1758
1759	#endif /* USE_ICU */
1760
1761	/*
1762	* These functions convert from/to libc's wchar_t, not pg_wchar_t.
1763	* Therefore we keep them here rather than with the mbutils code.
1764	*/
1765
1766	/*
1767	* wchar2char --- convert wide characters to multibyte format
1768	*
1769	* This has the same API as the standard wcstombs_l() function; in particular,
1770	* tolen is the maximum number of bytes to store at to, and from must be
1771	* zero-terminated. The output will be zero-terminated iff there is room.
1772	*/
1773	size_t
1774	wchar2char(char to, const* wchar_t *from, size_t tolen, pg_locale_t locale)
1775	{
1776	size_t result;
1777
1778	Assert(!locale \|\| locale->provider == COLLPROVIDER_LIBC);
1779
1780	if (tolen == `0`)
1781	return `0`;
1782
1783	#ifdef WIN32
1784
1785	/*
1786	* On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
1787	* for some reason mbstowcs and wcstombs won't do this for us, so we use
1788	* MultiByteToWideChar().
1789	*/
1790	if (GetDatabaseEncoding() == PG_UTF8)
1791	{
1792	result = WideCharToMultiByte(CP_UTF8, `0`, from, -`1`, to, tolen,
1793	NULL, NULL);
1794	/ A zero return is failure /
1795	if (result <= `0`)
1796	result = -`1`;
1797	else
1798	{
1799	Assert(result <= tolen);
1800	/ Microsoft counts the zero terminator in the result /
1801	result--;
1802	}
1803	}
1804	else
1805	#endif /* WIN32 */
1806	if (locale == (pg_locale_t) `0`)
1807	{
1808	/ Use wcstombs directly for the default locale /
1809	result = wcstombs(to, from, tolen);
1810	}
1811	else
1812	{
1813	#ifdef HAVE_LOCALE_T
1814	#ifdef HAVE_WCSTOMBS_L
1815	/ Use wcstombs_l for nondefault locales /
1816	result = wcstombs_l(to, from, tolen, locale->info.lt);
1817	#else /* !HAVE_WCSTOMBS_L */
1818	/ We have to temporarily set the locale as current ... ugh /
1819	locale_t save_locale = uselocale(locale->info.lt);
1820
1821	result = wcstombs(to, from, tolen);
1822
1823	uselocale(save_locale);
1824	#endif /* HAVE_WCSTOMBS_L */
1825	#else /* !HAVE_LOCALE_T */
1826	/ Can't have locale != 0 without HAVE_LOCALE_T /
1827	elog(ERROR, "wcstombs_l is not available");
1828	result = `0`; / keep compiler quiet /
1829	#endif /* HAVE_LOCALE_T */
1830	}
1831
1832	return result;
1833	}
1834
1835	/*
1836	* char2wchar --- convert multibyte characters to wide characters
1837	*
1838	* This has almost the API of mbstowcs_l(), except that *from need not be
1839	* null-terminated; instead, the number of input bytes is specified as
1840	* fromlen. Also, we ereport() rather than returning -1 for invalid
1841	* input encoding. tolen is the maximum number of wchar_t's to store at *to.
1842	* The output will be zero-terminated iff there is room.
1843	*/
1844	size_t
1845	char2wchar(wchar_t to, size_t tolen, const* char *from, size_t fromlen,
1846	pg_locale_t locale)
1847	{
1848	size_t result;
1849
1850	Assert(!locale \|\| locale->provider == COLLPROVIDER_LIBC);
1851
1852	if (tolen == `0`)
1853	return `0`;
1854
1855	#ifdef WIN32
1856	/ See WIN32 "Unicode" comment above /
1857	if (GetDatabaseEncoding() == PG_UTF8)
1858	{
1859	/ Win32 API does not work for zero-length input /
1860	if (fromlen == `0`)
1861	result = `0`;
1862	else
1863	{
1864	result = MultiByteToWideChar(CP_UTF8, `0`, from, fromlen, to, tolen - `1`);
1865	/ A zero return is failure /
1866	if (result == `0`)
1867	result = -`1`;
1868	}
1869
1870	if (result != -`1`)
1871	{
1872	Assert(result < tolen);
1873	/ Append trailing null wchar (MultiByteToWideChar() does not) /
1874	to[result] = `0`;
1875	}
1876	}
1877	else
1878	#endif /* WIN32 */
1879	{
1880	/ mbstowcs requires ending '\0' /
1881	char *str = pnstrdup(from, fromlen);
1882
1883	if (locale == (pg_locale_t) `0`)
1884	{
1885	/ Use mbstowcs directly for the default locale /
1886	result = mbstowcs(to, str, tolen);
1887	}
1888	else
1889	{
1890	#ifdef HAVE_LOCALE_T
1891	#ifdef HAVE_MBSTOWCS_L
1892	/ Use mbstowcs_l for nondefault locales /
1893	result = mbstowcs_l(to, str, tolen, locale->info.lt);
1894	#else /* !HAVE_MBSTOWCS_L */
1895	/ We have to temporarily set the locale as current ... ugh /
1896	locale_t save_locale = uselocale(locale->info.lt);
1897
1898	result = mbstowcs(to, str, tolen);
1899
1900	uselocale(save_locale);
1901	#endif /* HAVE_MBSTOWCS_L */
1902	#else /* !HAVE_LOCALE_T */
1903	/ Can't have locale != 0 without HAVE_LOCALE_T /
1904	elog(ERROR, "mbstowcs_l is not available");
1905	result = `0`; / keep compiler quiet /
1906	#endif /* HAVE_LOCALE_T */
1907	}
1908
1909	pfree(str);
1910	}
1911
1912	if (result == -`1`)
1913	{
1914	/*
1915	* Invalid multibyte character encountered. We try to give a useful
1916	* error message by letting pg_verifymbstr check the string. But it's
1917	* possible that the string is OK to us, and not OK to mbstowcs ---
1918	* this suggests that the LC_CTYPE locale is different from the
1919	* database encoding. Give a generic error message if verifymbstr
1920	* can't find anything wrong.
1921	*/
1922	pg_verifymbstr(from, fromlen, false); / might not return /
1923	/ but if it does ... /
1924	ereport(ERROR,
1925	(errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1926	errmsg("invalid multibyte character for locale"),
1927	errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
1928	}
1929
1930	return result;
1931	}
1932

Browse the source code of PostgreSQL/src/backend/utils/adt/pg_locale.c