1/*-----------------------------------------------------------------------
2 *
3 * PostgreSQL locale utilities
4 *
5 * Portions Copyright (c) 2002-2019, PostgreSQL Global Development Group
6 *
7 * src/backend/utils/adt/pg_locale.c
8 *
9 *-----------------------------------------------------------------------
10 */
11
12/*----------
13 * Here is how the locale stuff is handled: LC_COLLATE and LC_CTYPE
14 * are fixed at CREATE DATABASE time, stored in pg_database, and cannot
15 * be changed. Thus, the effects of strcoll(), strxfrm(), isupper(),
16 * toupper(), etc. are always in the same fixed locale.
17 *
18 * LC_MESSAGES is settable at run time and will take effect
19 * immediately.
20 *
21 * The other categories, LC_MONETARY, LC_NUMERIC, and LC_TIME are also
22 * settable at run-time. However, we don't actually set those locale
23 * categories permanently. This would have bizarre effects like no
24 * longer accepting standard floating-point literals in some locales.
25 * Instead, we only set these locale categories briefly when needed,
26 * cache the required information obtained from localeconv() or
27 * strftime(), and then set the locale categories back to "C".
28 * The cached information is only used by the formatting functions
29 * (to_char, etc.) and the money type. For the user, this should all be
30 * transparent.
31 *
32 * !!! NOW HEAR THIS !!!
33 *
34 * We've been bitten repeatedly by this bug, so let's try to keep it in
35 * mind in future: on some platforms, the locale functions return pointers
36 * to static data that will be overwritten by any later locale function.
37 * Thus, for example, the obvious-looking sequence
38 * save = setlocale(category, NULL);
39 * if (!setlocale(category, value))
40 * fail = true;
41 * setlocale(category, save);
42 * DOES NOT WORK RELIABLY: on some platforms the second setlocale() call
43 * will change the memory save is pointing at. To do this sort of thing
44 * safely, you *must* pstrdup what setlocale returns the first time.
45 *
46 * The POSIX locale standard is available here:
47 *
48 * http://www.opengroup.org/onlinepubs/009695399/basedefs/xbd_chap07.html
49 *----------
50 */
51
52
53#include "postgres.h"
54
55#include <time.h>
56
57#include "access/htup_details.h"
58#include "catalog/pg_collation.h"
59#include "catalog/pg_control.h"
60#include "mb/pg_wchar.h"
61#include "utils/builtins.h"
62#include "utils/formatting.h"
63#include "utils/hsearch.h"
64#include "utils/lsyscache.h"
65#include "utils/memutils.h"
66#include "utils/pg_locale.h"
67#include "utils/syscache.h"
68
69#ifdef USE_ICU
70#include <unicode/ucnv.h>
71#endif
72
73#ifdef WIN32
74/*
75 * This Windows file defines StrNCpy. We don't need it here, so we undefine
76 * it to keep the compiler quiet, and undefine it again after the file is
77 * included, so we don't accidentally use theirs.
78 */
79#undef StrNCpy
80#include <shlwapi.h>
81#ifdef StrNCpy
82#undef STrNCpy
83#endif
84#endif
85
86#define MAX_L10N_DATA 80
87
88
89/* GUC settings */
90char *locale_messages;
91char *locale_monetary;
92char *locale_numeric;
93char *locale_time;
94
95/* lc_time localization cache */
96char *localized_abbrev_days[7];
97char *localized_full_days[7];
98char *localized_abbrev_months[12];
99char *localized_full_months[12];
100
101/* indicates whether locale information cache is valid */
102static bool CurrentLocaleConvValid = false;
103static bool CurrentLCTimeValid = false;
104
105/* Environment variable storage area */
106
107#define LC_ENV_BUFSIZE (NAMEDATALEN + 20)
108
109static char lc_collate_envbuf[LC_ENV_BUFSIZE];
110static char lc_ctype_envbuf[LC_ENV_BUFSIZE];
111
112#ifdef LC_MESSAGES
113static char lc_messages_envbuf[LC_ENV_BUFSIZE];
114#endif
115static char lc_monetary_envbuf[LC_ENV_BUFSIZE];
116static char lc_numeric_envbuf[LC_ENV_BUFSIZE];
117static char lc_time_envbuf[LC_ENV_BUFSIZE];
118
119/* Cache for collation-related knowledge */
120
121typedef struct
122{
123 Oid collid; /* hash key: pg_collation OID */
124 bool collate_is_c; /* is collation's LC_COLLATE C? */
125 bool ctype_is_c; /* is collation's LC_CTYPE C? */
126 bool flags_valid; /* true if above flags are valid */
127 pg_locale_t locale; /* locale_t struct, or 0 if not valid */
128} collation_cache_entry;
129
130static HTAB *collation_cache = NULL;
131
132
133#if defined(WIN32) && defined(LC_MESSAGES)
134static char *IsoLocaleName(const char *); /* MSVC specific */
135#endif
136
137#ifdef USE_ICU
138static void icu_set_collation_attributes(UCollator *collator, const char *loc);
139#endif
140
141/*
142 * pg_perm_setlocale
143 *
144 * This wraps the libc function setlocale(), with two additions. First, when
145 * changing LC_CTYPE, update gettext's encoding for the current message
146 * domain. GNU gettext automatically tracks LC_CTYPE on most platforms, but
147 * not on Windows. Second, if the operation is successful, the corresponding
148 * LC_XXX environment variable is set to match. By setting the environment
149 * variable, we ensure that any subsequent use of setlocale(..., "") will
150 * preserve the settings made through this routine. Of course, LC_ALL must
151 * also be unset to fully ensure that, but that has to be done elsewhere after
152 * all the individual LC_XXX variables have been set correctly. (Thank you
153 * Perl for making this kluge necessary.)
154 */
155char *
156pg_perm_setlocale(int category, const char *locale)
157{
158 char *result;
159 const char *envvar;
160 char *envbuf;
161
162#ifndef WIN32
163 result = setlocale(category, locale);
164#else
165
166 /*
167 * On Windows, setlocale(LC_MESSAGES) does not work, so just assume that
168 * the given value is good and set it in the environment variables. We
169 * must ignore attempts to set to "", which means "keep using the old
170 * environment value".
171 */
172#ifdef LC_MESSAGES
173 if (category == LC_MESSAGES)
174 {
175 result = (char *) locale;
176 if (locale == NULL || locale[0] == '\0')
177 return result;
178 }
179 else
180#endif
181 result = setlocale(category, locale);
182#endif /* WIN32 */
183
184 if (result == NULL)
185 return result; /* fall out immediately on failure */
186
187 /*
188 * Use the right encoding in translated messages. Under ENABLE_NLS, let
189 * pg_bind_textdomain_codeset() figure it out. Under !ENABLE_NLS, message
190 * format strings are ASCII, but database-encoding strings may enter the
191 * message via %s. This makes the overall message encoding equal to the
192 * database encoding.
193 */
194 if (category == LC_CTYPE)
195 {
196 static char save_lc_ctype[LC_ENV_BUFSIZE];
197
198 /* copy setlocale() return value before callee invokes it again */
199 strlcpy(save_lc_ctype, result, sizeof(save_lc_ctype));
200 result = save_lc_ctype;
201
202#ifdef ENABLE_NLS
203 SetMessageEncoding(pg_bind_textdomain_codeset(textdomain(NULL)));
204#else
205 SetMessageEncoding(GetDatabaseEncoding());
206#endif
207 }
208
209 switch (category)
210 {
211 case LC_COLLATE:
212 envvar = "LC_COLLATE";
213 envbuf = lc_collate_envbuf;
214 break;
215 case LC_CTYPE:
216 envvar = "LC_CTYPE";
217 envbuf = lc_ctype_envbuf;
218 break;
219#ifdef LC_MESSAGES
220 case LC_MESSAGES:
221 envvar = "LC_MESSAGES";
222 envbuf = lc_messages_envbuf;
223#ifdef WIN32
224 result = IsoLocaleName(locale);
225 if (result == NULL)
226 result = (char *) locale;
227#endif /* WIN32 */
228 break;
229#endif /* LC_MESSAGES */
230 case LC_MONETARY:
231 envvar = "LC_MONETARY";
232 envbuf = lc_monetary_envbuf;
233 break;
234 case LC_NUMERIC:
235 envvar = "LC_NUMERIC";
236 envbuf = lc_numeric_envbuf;
237 break;
238 case LC_TIME:
239 envvar = "LC_TIME";
240 envbuf = lc_time_envbuf;
241 break;
242 default:
243 elog(FATAL, "unrecognized LC category: %d", category);
244 envvar = NULL; /* keep compiler quiet */
245 envbuf = NULL;
246 return NULL;
247 }
248
249 snprintf(envbuf, LC_ENV_BUFSIZE - 1, "%s=%s", envvar, result);
250
251 if (putenv(envbuf))
252 return NULL;
253
254 return result;
255}
256
257
258/*
259 * Is the locale name valid for the locale category?
260 *
261 * If successful, and canonname isn't NULL, a palloc'd copy of the locale's
262 * canonical name is stored there. This is especially useful for figuring out
263 * what locale name "" means (ie, the server environment value). (Actually,
264 * it seems that on most implementations that's the only thing it's good for;
265 * we could wish that setlocale gave back a canonically spelled version of
266 * the locale name, but typically it doesn't.)
267 */
268bool
269check_locale(int category, const char *locale, char **canonname)
270{
271 char *save;
272 char *res;
273
274 if (canonname)
275 *canonname = NULL; /* in case of failure */
276
277 save = setlocale(category, NULL);
278 if (!save)
279 return false; /* won't happen, we hope */
280
281 /* save may be pointing at a modifiable scratch variable, see above. */
282 save = pstrdup(save);
283
284 /* set the locale with setlocale, to see if it accepts it. */
285 res = setlocale(category, locale);
286
287 /* save canonical name if requested. */
288 if (res && canonname)
289 *canonname = pstrdup(res);
290
291 /* restore old value. */
292 if (!setlocale(category, save))
293 elog(WARNING, "failed to restore old locale \"%s\"", save);
294 pfree(save);
295
296 return (res != NULL);
297}
298
299
300/*
301 * GUC check/assign hooks
302 *
303 * For most locale categories, the assign hook doesn't actually set the locale
304 * permanently, just reset flags so that the next use will cache the
305 * appropriate values. (See explanation at the top of this file.)
306 *
307 * Note: we accept value = "" as selecting the postmaster's environment
308 * value, whatever it was (so long as the environment setting is legal).
309 * This will have been locked down by an earlier call to pg_perm_setlocale.
310 */
311bool
312check_locale_monetary(char **newval, void **extra, GucSource source)
313{
314 return check_locale(LC_MONETARY, *newval, NULL);
315}
316
317void
318assign_locale_monetary(const char *newval, void *extra)
319{
320 CurrentLocaleConvValid = false;
321}
322
323bool
324check_locale_numeric(char **newval, void **extra, GucSource source)
325{
326 return check_locale(LC_NUMERIC, *newval, NULL);
327}
328
329void
330assign_locale_numeric(const char *newval, void *extra)
331{
332 CurrentLocaleConvValid = false;
333}
334
335bool
336check_locale_time(char **newval, void **extra, GucSource source)
337{
338 return check_locale(LC_TIME, *newval, NULL);
339}
340
341void
342assign_locale_time(const char *newval, void *extra)
343{
344 CurrentLCTimeValid = false;
345}
346
347/*
348 * We allow LC_MESSAGES to actually be set globally.
349 *
350 * Note: we normally disallow value = "" because it wouldn't have consistent
351 * semantics (it'd effectively just use the previous value). However, this
352 * is the value passed for PGC_S_DEFAULT, so don't complain in that case,
353 * not even if the attempted setting fails due to invalid environment value.
354 * The idea there is just to accept the environment setting *if possible*
355 * during startup, until we can read the proper value from postgresql.conf.
356 */
357bool
358check_locale_messages(char **newval, void **extra, GucSource source)
359{
360 if (**newval == '\0')
361 {
362 if (source == PGC_S_DEFAULT)
363 return true;
364 else
365 return false;
366 }
367
368 /*
369 * LC_MESSAGES category does not exist everywhere, but accept it anyway
370 *
371 * On Windows, we can't even check the value, so accept blindly
372 */
373#if defined(LC_MESSAGES) && !defined(WIN32)
374 return check_locale(LC_MESSAGES, *newval, NULL);
375#else
376 return true;
377#endif
378}
379
380void
381assign_locale_messages(const char *newval, void *extra)
382{
383 /*
384 * LC_MESSAGES category does not exist everywhere, but accept it anyway.
385 * We ignore failure, as per comment above.
386 */
387#ifdef LC_MESSAGES
388 (void) pg_perm_setlocale(LC_MESSAGES, newval);
389#endif
390}
391
392
393/*
394 * Frees the malloced content of a struct lconv. (But not the struct
395 * itself.) It's important that this not throw elog(ERROR).
396 */
397static void
398free_struct_lconv(struct lconv *s)
399{
400 if (s->decimal_point)
401 free(s->decimal_point);
402 if (s->thousands_sep)
403 free(s->thousands_sep);
404 if (s->grouping)
405 free(s->grouping);
406 if (s->int_curr_symbol)
407 free(s->int_curr_symbol);
408 if (s->currency_symbol)
409 free(s->currency_symbol);
410 if (s->mon_decimal_point)
411 free(s->mon_decimal_point);
412 if (s->mon_thousands_sep)
413 free(s->mon_thousands_sep);
414 if (s->mon_grouping)
415 free(s->mon_grouping);
416 if (s->positive_sign)
417 free(s->positive_sign);
418 if (s->negative_sign)
419 free(s->negative_sign);
420}
421
422/*
423 * Check that all fields of a struct lconv (or at least, the ones we care
424 * about) are non-NULL. The field list must match free_struct_lconv().
425 */
426static bool
427struct_lconv_is_valid(struct lconv *s)
428{
429 if (s->decimal_point == NULL)
430 return false;
431 if (s->thousands_sep == NULL)
432 return false;
433 if (s->grouping == NULL)
434 return false;
435 if (s->int_curr_symbol == NULL)
436 return false;
437 if (s->currency_symbol == NULL)
438 return false;
439 if (s->mon_decimal_point == NULL)
440 return false;
441 if (s->mon_thousands_sep == NULL)
442 return false;
443 if (s->mon_grouping == NULL)
444 return false;
445 if (s->positive_sign == NULL)
446 return false;
447 if (s->negative_sign == NULL)
448 return false;
449 return true;
450}
451
452
453/*
454 * Convert the strdup'd string at *str from the specified encoding to the
455 * database encoding.
456 */
457static void
458db_encoding_convert(int encoding, char **str)
459{
460 char *pstr;
461 char *mstr;
462
463 /* convert the string to the database encoding */
464 pstr = pg_any_to_server(*str, strlen(*str), encoding);
465 if (pstr == *str)
466 return; /* no conversion happened */
467
468 /* need it malloc'd not palloc'd */
469 mstr = strdup(pstr);
470 if (mstr == NULL)
471 ereport(ERROR,
472 (errcode(ERRCODE_OUT_OF_MEMORY),
473 errmsg("out of memory")));
474
475 /* replace old string */
476 free(*str);
477 *str = mstr;
478
479 pfree(pstr);
480}
481
482
483/*
484 * Return the POSIX lconv struct (contains number/money formatting
485 * information) with locale information for all categories.
486 */
487struct lconv *
488PGLC_localeconv(void)
489{
490 static struct lconv CurrentLocaleConv;
491 static bool CurrentLocaleConvAllocated = false;
492 struct lconv *extlconv;
493 struct lconv worklconv;
494 char *save_lc_monetary;
495 char *save_lc_numeric;
496#ifdef WIN32
497 char *save_lc_ctype;
498#endif
499
500 /* Did we do it already? */
501 if (CurrentLocaleConvValid)
502 return &CurrentLocaleConv;
503
504 /* Free any already-allocated storage */
505 if (CurrentLocaleConvAllocated)
506 {
507 free_struct_lconv(&CurrentLocaleConv);
508 CurrentLocaleConvAllocated = false;
509 }
510
511 /*
512 * This is tricky because we really don't want to risk throwing error
513 * while the locale is set to other than our usual settings. Therefore,
514 * the process is: collect the usual settings, set locale to special
515 * setting, copy relevant data into worklconv using strdup(), restore
516 * normal settings, convert data to desired encoding, and finally stash
517 * the collected data in CurrentLocaleConv. This makes it safe if we
518 * throw an error during encoding conversion or run out of memory anywhere
519 * in the process. All data pointed to by struct lconv members is
520 * allocated with strdup, to avoid premature elog(ERROR) and to allow
521 * using a single cleanup routine.
522 */
523 memset(&worklconv, 0, sizeof(worklconv));
524
525 /* Save prevailing values of monetary and numeric locales */
526 save_lc_monetary = setlocale(LC_MONETARY, NULL);
527 if (!save_lc_monetary)
528 elog(ERROR, "setlocale(NULL) failed");
529 save_lc_monetary = pstrdup(save_lc_monetary);
530
531 save_lc_numeric = setlocale(LC_NUMERIC, NULL);
532 if (!save_lc_numeric)
533 elog(ERROR, "setlocale(NULL) failed");
534 save_lc_numeric = pstrdup(save_lc_numeric);
535
536#ifdef WIN32
537
538 /*
539 * The POSIX standard explicitly says that it is undefined what happens if
540 * LC_MONETARY or LC_NUMERIC imply an encoding (codeset) different from
541 * that implied by LC_CTYPE. In practice, all Unix-ish platforms seem to
542 * believe that localeconv() should return strings that are encoded in the
543 * codeset implied by the LC_MONETARY or LC_NUMERIC locale name. Hence,
544 * once we have successfully collected the localeconv() results, we will
545 * convert them from that codeset to the desired server encoding.
546 *
547 * Windows, of course, resolutely does things its own way; on that
548 * platform LC_CTYPE has to match LC_MONETARY/LC_NUMERIC to get sane
549 * results. Hence, we must temporarily set that category as well.
550 */
551
552 /* Save prevailing value of ctype locale */
553 save_lc_ctype = setlocale(LC_CTYPE, NULL);
554 if (!save_lc_ctype)
555 elog(ERROR, "setlocale(NULL) failed");
556 save_lc_ctype = pstrdup(save_lc_ctype);
557
558 /* Here begins the critical section where we must not throw error */
559
560 /* use numeric to set the ctype */
561 setlocale(LC_CTYPE, locale_numeric);
562#endif
563
564 /* Get formatting information for numeric */
565 setlocale(LC_NUMERIC, locale_numeric);
566 extlconv = localeconv();
567
568 /* Must copy data now in case setlocale() overwrites it */
569 worklconv.decimal_point = strdup(extlconv->decimal_point);
570 worklconv.thousands_sep = strdup(extlconv->thousands_sep);
571 worklconv.grouping = strdup(extlconv->grouping);
572
573#ifdef WIN32
574 /* use monetary to set the ctype */
575 setlocale(LC_CTYPE, locale_monetary);
576#endif
577
578 /* Get formatting information for monetary */
579 setlocale(LC_MONETARY, locale_monetary);
580 extlconv = localeconv();
581
582 /* Must copy data now in case setlocale() overwrites it */
583 worklconv.int_curr_symbol = strdup(extlconv->int_curr_symbol);
584 worklconv.currency_symbol = strdup(extlconv->currency_symbol);
585 worklconv.mon_decimal_point = strdup(extlconv->mon_decimal_point);
586 worklconv.mon_thousands_sep = strdup(extlconv->mon_thousands_sep);
587 worklconv.mon_grouping = strdup(extlconv->mon_grouping);
588 worklconv.positive_sign = strdup(extlconv->positive_sign);
589 worklconv.negative_sign = strdup(extlconv->negative_sign);
590 /* Copy scalar fields as well */
591 worklconv.int_frac_digits = extlconv->int_frac_digits;
592 worklconv.frac_digits = extlconv->frac_digits;
593 worklconv.p_cs_precedes = extlconv->p_cs_precedes;
594 worklconv.p_sep_by_space = extlconv->p_sep_by_space;
595 worklconv.n_cs_precedes = extlconv->n_cs_precedes;
596 worklconv.n_sep_by_space = extlconv->n_sep_by_space;
597 worklconv.p_sign_posn = extlconv->p_sign_posn;
598 worklconv.n_sign_posn = extlconv->n_sign_posn;
599
600 /*
601 * Restore the prevailing locale settings; failure to do so is fatal.
602 * Possibly we could limp along with nondefault LC_MONETARY or LC_NUMERIC,
603 * but proceeding with the wrong value of LC_CTYPE would certainly be bad
604 * news; and considering that the prevailing LC_MONETARY and LC_NUMERIC
605 * are almost certainly "C", there's really no reason that restoring those
606 * should fail.
607 */
608#ifdef WIN32
609 if (!setlocale(LC_CTYPE, save_lc_ctype))
610 elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
611#endif
612 if (!setlocale(LC_MONETARY, save_lc_monetary))
613 elog(FATAL, "failed to restore LC_MONETARY to \"%s\"", save_lc_monetary);
614 if (!setlocale(LC_NUMERIC, save_lc_numeric))
615 elog(FATAL, "failed to restore LC_NUMERIC to \"%s\"", save_lc_numeric);
616
617 /*
618 * At this point we've done our best to clean up, and can call functions
619 * that might possibly throw errors with a clean conscience. But let's
620 * make sure we don't leak any already-strdup'd fields in worklconv.
621 */
622 PG_TRY();
623 {
624 int encoding;
625
626 /* Release the pstrdup'd locale names */
627 pfree(save_lc_monetary);
628 pfree(save_lc_numeric);
629#ifdef WIN32
630 pfree(save_lc_ctype);
631#endif
632
633 /* If any of the preceding strdup calls failed, complain now. */
634 if (!struct_lconv_is_valid(&worklconv))
635 ereport(ERROR,
636 (errcode(ERRCODE_OUT_OF_MEMORY),
637 errmsg("out of memory")));
638
639 /*
640 * Now we must perform encoding conversion from whatever's associated
641 * with the locales into the database encoding. If we can't identify
642 * the encoding implied by LC_NUMERIC or LC_MONETARY (ie we get -1),
643 * use PG_SQL_ASCII, which will result in just validating that the
644 * strings are OK in the database encoding.
645 */
646 encoding = pg_get_encoding_from_locale(locale_numeric, true);
647 if (encoding < 0)
648 encoding = PG_SQL_ASCII;
649
650 db_encoding_convert(encoding, &worklconv.decimal_point);
651 db_encoding_convert(encoding, &worklconv.thousands_sep);
652 /* grouping is not text and does not require conversion */
653
654 encoding = pg_get_encoding_from_locale(locale_monetary, true);
655 if (encoding < 0)
656 encoding = PG_SQL_ASCII;
657
658 db_encoding_convert(encoding, &worklconv.int_curr_symbol);
659 db_encoding_convert(encoding, &worklconv.currency_symbol);
660 db_encoding_convert(encoding, &worklconv.mon_decimal_point);
661 db_encoding_convert(encoding, &worklconv.mon_thousands_sep);
662 /* mon_grouping is not text and does not require conversion */
663 db_encoding_convert(encoding, &worklconv.positive_sign);
664 db_encoding_convert(encoding, &worklconv.negative_sign);
665 }
666 PG_CATCH();
667 {
668 free_struct_lconv(&worklconv);
669 PG_RE_THROW();
670 }
671 PG_END_TRY();
672
673 /*
674 * Everything is good, so save the results.
675 */
676 CurrentLocaleConv = worklconv;
677 CurrentLocaleConvAllocated = true;
678 CurrentLocaleConvValid = true;
679 return &CurrentLocaleConv;
680}
681
682#ifdef WIN32
683/*
684 * On Windows, strftime() returns its output in encoding CP_ACP (the default
685 * operating system codepage for the computer), which is likely different
686 * from SERVER_ENCODING. This is especially important in Japanese versions
687 * of Windows which will use SJIS encoding, which we don't support as a
688 * server encoding.
689 *
690 * So, instead of using strftime(), use wcsftime() to return the value in
691 * wide characters (internally UTF16) and then convert to UTF8, which we
692 * know how to handle directly.
693 *
694 * Note that this only affects the calls to strftime() in this file, which are
695 * used to get the locale-aware strings. Other parts of the backend use
696 * pg_strftime(), which isn't locale-aware and does not need to be replaced.
697 */
698static size_t
699strftime_win32(char *dst, size_t dstlen,
700 const char *format, const struct tm *tm)
701{
702 size_t len;
703 wchar_t wformat[8]; /* formats used below need 3 chars */
704 wchar_t wbuf[MAX_L10N_DATA];
705
706 /*
707 * Get a wchar_t version of the format string. We only actually use
708 * plain-ASCII formats in this file, so we can say that they're UTF8.
709 */
710 len = MultiByteToWideChar(CP_UTF8, 0, format, -1,
711 wformat, lengthof(wformat));
712 if (len == 0)
713 elog(ERROR, "could not convert format string from UTF-8: error code %lu",
714 GetLastError());
715
716 len = wcsftime(wbuf, MAX_L10N_DATA, wformat, tm);
717 if (len == 0)
718 {
719 /*
720 * wcsftime failed, possibly because the result would not fit in
721 * MAX_L10N_DATA. Return 0 with the contents of dst unspecified.
722 */
723 return 0;
724 }
725
726 len = WideCharToMultiByte(CP_UTF8, 0, wbuf, len, dst, dstlen - 1,
727 NULL, NULL);
728 if (len == 0)
729 elog(ERROR, "could not convert string to UTF-8: error code %lu",
730 GetLastError());
731
732 dst[len] = '\0';
733
734 return len;
735}
736
737/* redefine strftime() */
738#define strftime(a,b,c,d) strftime_win32(a,b,c,d)
739#endif /* WIN32 */
740
741/*
742 * Subroutine for cache_locale_time().
743 * Convert the given string from encoding "encoding" to the database
744 * encoding, and store the result at *dst, replacing any previous value.
745 */
746static void
747cache_single_string(char **dst, const char *src, int encoding)
748{
749 char *ptr;
750 char *olddst;
751
752 /* Convert the string to the database encoding, or validate it's OK */
753 ptr = pg_any_to_server(src, strlen(src), encoding);
754
755 /* Store the string in long-lived storage, replacing any previous value */
756 olddst = *dst;
757 *dst = MemoryContextStrdup(TopMemoryContext, ptr);
758 if (olddst)
759 pfree(olddst);
760
761 /* Might as well clean up any palloc'd conversion result, too */
762 if (ptr != src)
763 pfree(ptr);
764}
765
766/*
767 * Update the lc_time localization cache variables if needed.
768 */
769void
770cache_locale_time(void)
771{
772 char buf[(2 * 7 + 2 * 12) * MAX_L10N_DATA];
773 char *bufptr;
774 time_t timenow;
775 struct tm *timeinfo;
776 bool strftimefail = false;
777 int encoding;
778 int i;
779 char *save_lc_time;
780#ifdef WIN32
781 char *save_lc_ctype;
782#endif
783
784 /* did we do this already? */
785 if (CurrentLCTimeValid)
786 return;
787
788 elog(DEBUG3, "cache_locale_time() executed; locale: \"%s\"", locale_time);
789
790 /*
791 * As in PGLC_localeconv(), it's critical that we not throw error while
792 * libc's locale settings have nondefault values. Hence, we just call
793 * strftime() within the critical section, and then convert and save its
794 * results afterwards.
795 */
796
797 /* Save prevailing value of time locale */
798 save_lc_time = setlocale(LC_TIME, NULL);
799 if (!save_lc_time)
800 elog(ERROR, "setlocale(NULL) failed");
801 save_lc_time = pstrdup(save_lc_time);
802
803#ifdef WIN32
804
805 /*
806 * On Windows, it appears that wcsftime() internally uses LC_CTYPE, so we
807 * must set it here. This code looks the same as what PGLC_localeconv()
808 * does, but the underlying reason is different: this does NOT determine
809 * the encoding we'll get back from strftime_win32().
810 */
811
812 /* Save prevailing value of ctype locale */
813 save_lc_ctype = setlocale(LC_CTYPE, NULL);
814 if (!save_lc_ctype)
815 elog(ERROR, "setlocale(NULL) failed");
816 save_lc_ctype = pstrdup(save_lc_ctype);
817
818 /* use lc_time to set the ctype */
819 setlocale(LC_CTYPE, locale_time);
820#endif
821
822 setlocale(LC_TIME, locale_time);
823
824 /* We use times close to current time as data for strftime(). */
825 timenow = time(NULL);
826 timeinfo = localtime(&timenow);
827
828 /* Store the strftime results in MAX_L10N_DATA-sized portions of buf[] */
829 bufptr = buf;
830
831 /*
832 * MAX_L10N_DATA is sufficient buffer space for every known locale, and
833 * POSIX defines no strftime() errors. (Buffer space exhaustion is not an
834 * error.) An implementation might report errors (e.g. ENOMEM) by
835 * returning 0 (or, less plausibly, a negative value) and setting errno.
836 * Report errno just in case the implementation did that, but clear it in
837 * advance of the calls so we don't emit a stale, unrelated errno.
838 */
839 errno = 0;
840
841 /* localized days */
842 for (i = 0; i < 7; i++)
843 {
844 timeinfo->tm_wday = i;
845 if (strftime(bufptr, MAX_L10N_DATA, "%a", timeinfo) <= 0)
846 strftimefail = true;
847 bufptr += MAX_L10N_DATA;
848 if (strftime(bufptr, MAX_L10N_DATA, "%A", timeinfo) <= 0)
849 strftimefail = true;
850 bufptr += MAX_L10N_DATA;
851 }
852
853 /* localized months */
854 for (i = 0; i < 12; i++)
855 {
856 timeinfo->tm_mon = i;
857 timeinfo->tm_mday = 1; /* make sure we don't have invalid date */
858 if (strftime(bufptr, MAX_L10N_DATA, "%b", timeinfo) <= 0)
859 strftimefail = true;
860 bufptr += MAX_L10N_DATA;
861 if (strftime(bufptr, MAX_L10N_DATA, "%B", timeinfo) <= 0)
862 strftimefail = true;
863 bufptr += MAX_L10N_DATA;
864 }
865
866 /*
867 * Restore the prevailing locale settings; as in PGLC_localeconv(),
868 * failure to do so is fatal.
869 */
870#ifdef WIN32
871 if (!setlocale(LC_CTYPE, save_lc_ctype))
872 elog(FATAL, "failed to restore LC_CTYPE to \"%s\"", save_lc_ctype);
873#endif
874 if (!setlocale(LC_TIME, save_lc_time))
875 elog(FATAL, "failed to restore LC_TIME to \"%s\"", save_lc_time);
876
877 /*
878 * At this point we've done our best to clean up, and can throw errors, or
879 * call functions that might throw errors, with a clean conscience.
880 */
881 if (strftimefail)
882 elog(ERROR, "strftime() failed: %m");
883
884 /* Release the pstrdup'd locale names */
885 pfree(save_lc_time);
886#ifdef WIN32
887 pfree(save_lc_ctype);
888#endif
889
890#ifndef WIN32
891
892 /*
893 * As in PGLC_localeconv(), we must convert strftime()'s output from the
894 * encoding implied by LC_TIME to the database encoding. If we can't
895 * identify the LC_TIME encoding, just perform encoding validation.
896 */
897 encoding = pg_get_encoding_from_locale(locale_time, true);
898 if (encoding < 0)
899 encoding = PG_SQL_ASCII;
900
901#else
902
903 /*
904 * On Windows, strftime_win32() always returns UTF8 data, so convert from
905 * that if necessary.
906 */
907 encoding = PG_UTF8;
908
909#endif /* WIN32 */
910
911 bufptr = buf;
912
913 /* localized days */
914 for (i = 0; i < 7; i++)
915 {
916 cache_single_string(&localized_abbrev_days[i], bufptr, encoding);
917 bufptr += MAX_L10N_DATA;
918 cache_single_string(&localized_full_days[i], bufptr, encoding);
919 bufptr += MAX_L10N_DATA;
920 }
921
922 /* localized months */
923 for (i = 0; i < 12; i++)
924 {
925 cache_single_string(&localized_abbrev_months[i], bufptr, encoding);
926 bufptr += MAX_L10N_DATA;
927 cache_single_string(&localized_full_months[i], bufptr, encoding);
928 bufptr += MAX_L10N_DATA;
929 }
930
931 CurrentLCTimeValid = true;
932}
933
934
935#if defined(WIN32) && defined(LC_MESSAGES)
936/*
937 * Convert a Windows setlocale() argument to a Unix-style one.
938 *
939 * Regardless of platform, we install message catalogs under a Unix-style
940 * LL[_CC][.ENCODING][@VARIANT] naming convention. Only LC_MESSAGES settings
941 * following that style will elicit localized interface strings.
942 *
943 * Before Visual Studio 2012 (msvcr110.dll), Windows setlocale() accepted "C"
944 * (but not "c") and strings of the form <Language>[_<Country>][.<CodePage>],
945 * case-insensitive. setlocale() returns the fully-qualified form; for
946 * example, setlocale("thaI") returns "Thai_Thailand.874". Internally,
947 * setlocale() and _create_locale() select a "locale identifier"[1] and store
948 * it in an undocumented _locale_t field. From that LCID, we can retrieve the
949 * ISO 639 language and the ISO 3166 country. Character encoding does not
950 * matter, because the server and client encodings govern that.
951 *
952 * Windows Vista introduced the "locale name" concept[2], closely following
953 * RFC 4646. Locale identifiers are now deprecated. Starting with Visual
954 * Studio 2012, setlocale() accepts locale names in addition to the strings it
955 * accepted historically. It does not standardize them; setlocale("Th-tH")
956 * returns "Th-tH". setlocale(category, "") still returns a traditional
957 * string. Furthermore, msvcr110.dll changed the undocumented _locale_t
958 * content to carry locale names instead of locale identifiers.
959 *
960 * MinGW headers declare _create_locale(), but msvcrt.dll lacks that symbol.
961 * IsoLocaleName() always fails in a MinGW-built postgres.exe, so only
962 * Unix-style values of the lc_messages GUC can elicit localized messages. In
963 * particular, every lc_messages setting that initdb can select automatically
964 * will yield only C-locale messages. XXX This could be fixed by running the
965 * fully-qualified locale name through a lookup table.
966 *
967 * This function returns a pointer to a static buffer bearing the converted
968 * name or NULL if conversion fails.
969 *
970 * [1] http://msdn.microsoft.com/en-us/library/windows/desktop/dd373763.aspx
971 * [2] http://msdn.microsoft.com/en-us/library/windows/desktop/dd373814.aspx
972 */
973static char *
974IsoLocaleName(const char *winlocname)
975{
976#if (_MSC_VER >= 1400) /* VC8.0 or later */
977 static char iso_lc_messages[32];
978 _locale_t loct = NULL;
979
980 if (pg_strcasecmp("c", winlocname) == 0 ||
981 pg_strcasecmp("posix", winlocname) == 0)
982 {
983 strcpy(iso_lc_messages, "C");
984 return iso_lc_messages;
985 }
986
987 loct = _create_locale(LC_CTYPE, winlocname);
988 if (loct != NULL)
989 {
990#if (_MSC_VER >= 1700) /* Visual Studio 2012 or later */
991 size_t rc;
992 char *hyphen;
993
994 /* Locale names use only ASCII, any conversion locale suffices. */
995 rc = wchar2char(iso_lc_messages, loct->locinfo->locale_name[LC_CTYPE],
996 sizeof(iso_lc_messages), NULL);
997 _free_locale(loct);
998 if (rc == -1 || rc == sizeof(iso_lc_messages))
999 return NULL;
1000
1001 /*
1002 * Since the message catalogs sit on a case-insensitive filesystem, we
1003 * need not standardize letter case here. So long as we do not ship
1004 * message catalogs for which it would matter, we also need not
1005 * translate the script/variant portion, e.g. uz-Cyrl-UZ to
1006 * uz_UZ@cyrillic. Simply replace the hyphen with an underscore.
1007 *
1008 * Note that the locale name can be less-specific than the value we
1009 * would derive under earlier Visual Studio releases. For example,
1010 * French_France.1252 yields just "fr". This does not affect any of
1011 * the country-specific message catalogs available as of this writing
1012 * (pt_BR, zh_CN, zh_TW).
1013 */
1014 hyphen = strchr(iso_lc_messages, '-');
1015 if (hyphen)
1016 *hyphen = '_';
1017#else
1018 char isolang[32],
1019 isocrty[32];
1020 LCID lcid;
1021
1022 lcid = loct->locinfo->lc_handle[LC_CTYPE];
1023 if (lcid == 0)
1024 lcid = MAKELCID(MAKELANGID(LANG_ENGLISH, SUBLANG_ENGLISH_US), SORT_DEFAULT);
1025 _free_locale(loct);
1026
1027 if (!GetLocaleInfoA(lcid, LOCALE_SISO639LANGNAME, isolang, sizeof(isolang)))
1028 return NULL;
1029 if (!GetLocaleInfoA(lcid, LOCALE_SISO3166CTRYNAME, isocrty, sizeof(isocrty)))
1030 return NULL;
1031 snprintf(iso_lc_messages, sizeof(iso_lc_messages) - 1, "%s_%s", isolang, isocrty);
1032#endif
1033 return iso_lc_messages;
1034 }
1035 return NULL;
1036#else
1037 return NULL; /* Not supported on this version of msvc/mingw */
1038#endif /* _MSC_VER >= 1400 */
1039}
1040#endif /* WIN32 && LC_MESSAGES */
1041
1042
1043/*
1044 * Detect aging strxfrm() implementations that, in a subset of locales, write
1045 * past the specified buffer length. Affected users must update OS packages
1046 * before using PostgreSQL 9.5 or later.
1047 *
1048 * Assume that the bug can come and go from one postmaster startup to another
1049 * due to physical replication among diverse machines. Assume that the bug's
1050 * presence will not change during the life of a particular postmaster. Given
1051 * those assumptions, call this no less than once per postmaster startup per
1052 * LC_COLLATE setting used. No known-affected system offers strxfrm_l(), so
1053 * there is no need to consider pg_collation locales.
1054 */
1055void
1056check_strxfrm_bug(void)
1057{
1058 char buf[32];
1059 const int canary = 0x7F;
1060 bool ok = true;
1061
1062 /*
1063 * Given a two-byte ASCII string and length limit 7, 8 or 9, Solaris 10
1064 * 05/08 returns 18 and modifies 10 bytes. It respects limits above or
1065 * below that range.
1066 *
1067 * The bug is present in Solaris 8 as well; it is absent in Solaris 10
1068 * 01/13 and Solaris 11.2. Affected locales include is_IS.ISO8859-1,
1069 * en_US.UTF-8, en_US.ISO8859-1, and ru_RU.KOI8-R. Unaffected locales
1070 * include de_DE.UTF-8, de_DE.ISO8859-1, zh_TW.UTF-8, and C.
1071 */
1072 buf[7] = canary;
1073 (void) strxfrm(buf, "ab", 7);
1074 if (buf[7] != canary)
1075 ok = false;
1076
1077 /*
1078 * illumos bug #1594 was present in the source tree from 2010-10-11 to
1079 * 2012-02-01. Given an ASCII string of any length and length limit 1,
1080 * affected systems ignore the length limit and modify a number of bytes
1081 * one less than the return value. The problem inputs for this bug do not
1082 * overlap those for the Solaris bug, hence a distinct test.
1083 *
1084 * Affected systems include smartos-20110926T021612Z. Affected locales
1085 * include en_US.ISO8859-1 and en_US.UTF-8. Unaffected locales include C.
1086 */
1087 buf[1] = canary;
1088 (void) strxfrm(buf, "a", 1);
1089 if (buf[1] != canary)
1090 ok = false;
1091
1092 if (!ok)
1093 ereport(ERROR,
1094 (errcode(ERRCODE_SYSTEM_ERROR),
1095 errmsg_internal("strxfrm(), in locale \"%s\", writes past the specified array length",
1096 setlocale(LC_COLLATE, NULL)),
1097 errhint("Apply system library package updates.")));
1098}
1099
1100
1101/*
1102 * Cache mechanism for collation information.
1103 *
1104 * We cache two flags: whether the collation's LC_COLLATE or LC_CTYPE is C
1105 * (or POSIX), so we can optimize a few code paths in various places.
1106 * For the built-in C and POSIX collations, we can know that without even
1107 * doing a cache lookup, but we want to support aliases for C/POSIX too.
1108 * For the "default" collation, there are separate static cache variables,
1109 * since consulting the pg_collation catalog doesn't tell us what we need.
1110 *
1111 * Also, if a pg_locale_t has been requested for a collation, we cache that
1112 * for the life of a backend.
1113 *
1114 * Note that some code relies on the flags not reporting false negatives
1115 * (that is, saying it's not C when it is). For example, char2wchar()
1116 * could fail if the locale is C, so str_tolower() shouldn't call it
1117 * in that case.
1118 *
1119 * Note that we currently lack any way to flush the cache. Since we don't
1120 * support ALTER COLLATION, this is OK. The worst case is that someone
1121 * drops a collation, and a useless cache entry hangs around in existing
1122 * backends.
1123 */
1124
1125static collation_cache_entry *
1126lookup_collation_cache(Oid collation, bool set_flags)
1127{
1128 collation_cache_entry *cache_entry;
1129 bool found;
1130
1131 Assert(OidIsValid(collation));
1132 Assert(collation != DEFAULT_COLLATION_OID);
1133
1134 if (collation_cache == NULL)
1135 {
1136 /* First time through, initialize the hash table */
1137 HASHCTL ctl;
1138
1139 memset(&ctl, 0, sizeof(ctl));
1140 ctl.keysize = sizeof(Oid);
1141 ctl.entrysize = sizeof(collation_cache_entry);
1142 collation_cache = hash_create("Collation cache", 100, &ctl,
1143 HASH_ELEM | HASH_BLOBS);
1144 }
1145
1146 cache_entry = hash_search(collation_cache, &collation, HASH_ENTER, &found);
1147 if (!found)
1148 {
1149 /*
1150 * Make sure cache entry is marked invalid, in case we fail before
1151 * setting things.
1152 */
1153 cache_entry->flags_valid = false;
1154 cache_entry->locale = 0;
1155 }
1156
1157 if (set_flags && !cache_entry->flags_valid)
1158 {
1159 /* Attempt to set the flags */
1160 HeapTuple tp;
1161 Form_pg_collation collform;
1162 const char *collcollate;
1163 const char *collctype;
1164
1165 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collation));
1166 if (!HeapTupleIsValid(tp))
1167 elog(ERROR, "cache lookup failed for collation %u", collation);
1168 collform = (Form_pg_collation) GETSTRUCT(tp);
1169
1170 collcollate = NameStr(collform->collcollate);
1171 collctype = NameStr(collform->collctype);
1172
1173 cache_entry->collate_is_c = ((strcmp(collcollate, "C") == 0) ||
1174 (strcmp(collcollate, "POSIX") == 0));
1175 cache_entry->ctype_is_c = ((strcmp(collctype, "C") == 0) ||
1176 (strcmp(collctype, "POSIX") == 0));
1177
1178 cache_entry->flags_valid = true;
1179
1180 ReleaseSysCache(tp);
1181 }
1182
1183 return cache_entry;
1184}
1185
1186
1187/*
1188 * Detect whether collation's LC_COLLATE property is C
1189 */
1190bool
1191lc_collate_is_c(Oid collation)
1192{
1193 /*
1194 * If we're asked about "collation 0", return false, so that the code will
1195 * go into the non-C path and report that the collation is bogus.
1196 */
1197 if (!OidIsValid(collation))
1198 return false;
1199
1200 /*
1201 * If we're asked about the default collation, we have to inquire of the C
1202 * library. Cache the result so we only have to compute it once.
1203 */
1204 if (collation == DEFAULT_COLLATION_OID)
1205 {
1206 static int result = -1;
1207 char *localeptr;
1208
1209 if (result >= 0)
1210 return (bool) result;
1211 localeptr = setlocale(LC_COLLATE, NULL);
1212 if (!localeptr)
1213 elog(ERROR, "invalid LC_COLLATE setting");
1214
1215 if (strcmp(localeptr, "C") == 0)
1216 result = true;
1217 else if (strcmp(localeptr, "POSIX") == 0)
1218 result = true;
1219 else
1220 result = false;
1221 return (bool) result;
1222 }
1223
1224 /*
1225 * If we're asked about the built-in C/POSIX collations, we know that.
1226 */
1227 if (collation == C_COLLATION_OID ||
1228 collation == POSIX_COLLATION_OID)
1229 return true;
1230
1231 /*
1232 * Otherwise, we have to consult pg_collation, but we cache that.
1233 */
1234 return (lookup_collation_cache(collation, true))->collate_is_c;
1235}
1236
1237/*
1238 * Detect whether collation's LC_CTYPE property is C
1239 */
1240bool
1241lc_ctype_is_c(Oid collation)
1242{
1243 /*
1244 * If we're asked about "collation 0", return false, so that the code will
1245 * go into the non-C path and report that the collation is bogus.
1246 */
1247 if (!OidIsValid(collation))
1248 return false;
1249
1250 /*
1251 * If we're asked about the default collation, we have to inquire of the C
1252 * library. Cache the result so we only have to compute it once.
1253 */
1254 if (collation == DEFAULT_COLLATION_OID)
1255 {
1256 static int result = -1;
1257 char *localeptr;
1258
1259 if (result >= 0)
1260 return (bool) result;
1261 localeptr = setlocale(LC_CTYPE, NULL);
1262 if (!localeptr)
1263 elog(ERROR, "invalid LC_CTYPE setting");
1264
1265 if (strcmp(localeptr, "C") == 0)
1266 result = true;
1267 else if (strcmp(localeptr, "POSIX") == 0)
1268 result = true;
1269 else
1270 result = false;
1271 return (bool) result;
1272 }
1273
1274 /*
1275 * If we're asked about the built-in C/POSIX collations, we know that.
1276 */
1277 if (collation == C_COLLATION_OID ||
1278 collation == POSIX_COLLATION_OID)
1279 return true;
1280
1281 /*
1282 * Otherwise, we have to consult pg_collation, but we cache that.
1283 */
1284 return (lookup_collation_cache(collation, true))->ctype_is_c;
1285}
1286
1287
1288/* simple subroutine for reporting errors from newlocale() */
1289#ifdef HAVE_LOCALE_T
1290static void
1291report_newlocale_failure(const char *localename)
1292{
1293 int save_errno;
1294
1295 /*
1296 * Windows doesn't provide any useful error indication from
1297 * _create_locale(), and BSD-derived platforms don't seem to feel they
1298 * need to set errno either (even though POSIX is pretty clear that
1299 * newlocale should do so). So, if errno hasn't been set, assume ENOENT
1300 * is what to report.
1301 */
1302 if (errno == 0)
1303 errno = ENOENT;
1304
1305 /*
1306 * ENOENT means "no such locale", not "no such file", so clarify that
1307 * errno with an errdetail message.
1308 */
1309 save_errno = errno; /* auxiliary funcs might change errno */
1310 ereport(ERROR,
1311 (errcode(ERRCODE_INVALID_PARAMETER_VALUE),
1312 errmsg("could not create locale \"%s\": %m",
1313 localename),
1314 (save_errno == ENOENT ?
1315 errdetail("The operating system could not find any locale data for the locale name \"%s\".",
1316 localename) : 0)));
1317}
1318#endif /* HAVE_LOCALE_T */
1319
1320
1321/*
1322 * Create a locale_t from a collation OID. Results are cached for the
1323 * lifetime of the backend. Thus, do not free the result with freelocale().
1324 *
1325 * As a special optimization, the default/database collation returns 0.
1326 * Callers should then revert to the non-locale_t-enabled code path.
1327 * In fact, they shouldn't call this function at all when they are dealing
1328 * with the default locale. That can save quite a bit in hotspots.
1329 * Also, callers should avoid calling this before going down a C/POSIX
1330 * fastpath, because such a fastpath should work even on platforms without
1331 * locale_t support in the C library.
1332 *
1333 * For simplicity, we always generate COLLATE + CTYPE even though we
1334 * might only need one of them. Since this is called only once per session,
1335 * it shouldn't cost much.
1336 */
1337pg_locale_t
1338pg_newlocale_from_collation(Oid collid)
1339{
1340 collation_cache_entry *cache_entry;
1341
1342 /* Callers must pass a valid OID */
1343 Assert(OidIsValid(collid));
1344
1345 /* Return 0 for "default" collation, just in case caller forgets */
1346 if (collid == DEFAULT_COLLATION_OID)
1347 return (pg_locale_t) 0;
1348
1349 cache_entry = lookup_collation_cache(collid, false);
1350
1351 if (cache_entry->locale == 0)
1352 {
1353 /* We haven't computed this yet in this session, so do it */
1354 HeapTuple tp;
1355 Form_pg_collation collform;
1356 const char *collcollate;
1357 const char *collctype pg_attribute_unused();
1358 struct pg_locale_struct result;
1359 pg_locale_t resultp;
1360 Datum collversion;
1361 bool isnull;
1362
1363 tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
1364 if (!HeapTupleIsValid(tp))
1365 elog(ERROR, "cache lookup failed for collation %u", collid);
1366 collform = (Form_pg_collation) GETSTRUCT(tp);
1367
1368 collcollate = NameStr(collform->collcollate);
1369 collctype = NameStr(collform->collctype);
1370
1371 /* We'll fill in the result struct locally before allocating memory */
1372 memset(&result, 0, sizeof(result));
1373 result.provider = collform->collprovider;
1374 result.deterministic = collform->collisdeterministic;
1375
1376 if (collform->collprovider == COLLPROVIDER_LIBC)
1377 {
1378#ifdef HAVE_LOCALE_T
1379 locale_t loc;
1380
1381 if (strcmp(collcollate, collctype) == 0)
1382 {
1383 /* Normal case where they're the same */
1384 errno = 0;
1385#ifndef WIN32
1386 loc = newlocale(LC_COLLATE_MASK | LC_CTYPE_MASK, collcollate,
1387 NULL);
1388#else
1389 loc = _create_locale(LC_ALL, collcollate);
1390#endif
1391 if (!loc)
1392 report_newlocale_failure(collcollate);
1393 }
1394 else
1395 {
1396#ifndef WIN32
1397 /* We need two newlocale() steps */
1398 locale_t loc1;
1399
1400 errno = 0;
1401 loc1 = newlocale(LC_COLLATE_MASK, collcollate, NULL);
1402 if (!loc1)
1403 report_newlocale_failure(collcollate);
1404 errno = 0;
1405 loc = newlocale(LC_CTYPE_MASK, collctype, loc1);
1406 if (!loc)
1407 report_newlocale_failure(collctype);
1408#else
1409
1410 /*
1411 * XXX The _create_locale() API doesn't appear to support
1412 * this. Could perhaps be worked around by changing
1413 * pg_locale_t to contain two separate fields.
1414 */
1415 ereport(ERROR,
1416 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1417 errmsg("collations with different collate and ctype values are not supported on this platform")));
1418#endif
1419 }
1420
1421 result.info.lt = loc;
1422#else /* not HAVE_LOCALE_T */
1423 /* platform that doesn't support locale_t */
1424 ereport(ERROR,
1425 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1426 errmsg("collation provider LIBC is not supported on this platform")));
1427#endif /* not HAVE_LOCALE_T */
1428 }
1429 else if (collform->collprovider == COLLPROVIDER_ICU)
1430 {
1431#ifdef USE_ICU
1432 UCollator *collator;
1433 UErrorCode status;
1434
1435 if (strcmp(collcollate, collctype) != 0)
1436 ereport(ERROR,
1437 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1438 errmsg("collations with different collate and ctype values are not supported by ICU")));
1439
1440 status = U_ZERO_ERROR;
1441 collator = ucol_open(collcollate, &status);
1442 if (U_FAILURE(status))
1443 ereport(ERROR,
1444 (errmsg("could not open collator for locale \"%s\": %s",
1445 collcollate, u_errorName(status))));
1446
1447 if (U_ICU_VERSION_MAJOR_NUM < 54)
1448 icu_set_collation_attributes(collator, collcollate);
1449
1450 /* We will leak this string if we get an error below :-( */
1451 result.info.icu.locale = MemoryContextStrdup(TopMemoryContext,
1452 collcollate);
1453 result.info.icu.ucol = collator;
1454#else /* not USE_ICU */
1455 /* could get here if a collation was created by a build with ICU */
1456 ereport(ERROR,
1457 (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
1458 errmsg("ICU is not supported in this build"), \
1459 errhint("You need to rebuild PostgreSQL using --with-icu.")));
1460#endif /* not USE_ICU */
1461 }
1462
1463 collversion = SysCacheGetAttr(COLLOID, tp, Anum_pg_collation_collversion,
1464 &isnull);
1465 if (!isnull)
1466 {
1467 char *actual_versionstr;
1468 char *collversionstr;
1469
1470 actual_versionstr = get_collation_actual_version(collform->collprovider, collcollate);
1471 if (!actual_versionstr)
1472 {
1473 /*
1474 * This could happen when specifying a version in CREATE
1475 * COLLATION for a libc locale, or manually creating a mess in
1476 * the catalogs.
1477 */
1478 ereport(ERROR,
1479 (errmsg("collation \"%s\" has no actual version, but a version was specified",
1480 NameStr(collform->collname))));
1481 }
1482 collversionstr = TextDatumGetCString(collversion);
1483
1484 if (strcmp(actual_versionstr, collversionstr) != 0)
1485 ereport(WARNING,
1486 (errmsg("collation \"%s\" has version mismatch",
1487 NameStr(collform->collname)),
1488 errdetail("The collation in the database was created using version %s, "
1489 "but the operating system provides version %s.",
1490 collversionstr, actual_versionstr),
1491 errhint("Rebuild all objects affected by this collation and run "
1492 "ALTER COLLATION %s REFRESH VERSION, "
1493 "or build PostgreSQL with the right library version.",
1494 quote_qualified_identifier(get_namespace_name(collform->collnamespace),
1495 NameStr(collform->collname)))));
1496 }
1497
1498 ReleaseSysCache(tp);
1499
1500 /* We'll keep the pg_locale_t structures in TopMemoryContext */
1501 resultp = MemoryContextAlloc(TopMemoryContext, sizeof(*resultp));
1502 *resultp = result;
1503
1504 cache_entry->locale = resultp;
1505 }
1506
1507 return cache_entry->locale;
1508}
1509
1510/*
1511 * Get provider-specific collation version string for the given collation from
1512 * the operating system/library.
1513 *
1514 * A particular provider must always either return a non-NULL string or return
1515 * NULL (if it doesn't support versions). It must not return NULL for some
1516 * collcollate and not NULL for others.
1517 */
1518char *
1519get_collation_actual_version(char collprovider, const char *collcollate)
1520{
1521 char *collversion;
1522
1523#ifdef USE_ICU
1524 if (collprovider == COLLPROVIDER_ICU)
1525 {
1526 UCollator *collator;
1527 UErrorCode status;
1528 UVersionInfo versioninfo;
1529 char buf[U_MAX_VERSION_STRING_LENGTH];
1530
1531 status = U_ZERO_ERROR;
1532 collator = ucol_open(collcollate, &status);
1533 if (U_FAILURE(status))
1534 ereport(ERROR,
1535 (errmsg("could not open collator for locale \"%s\": %s",
1536 collcollate, u_errorName(status))));
1537 ucol_getVersion(collator, versioninfo);
1538 ucol_close(collator);
1539
1540 u_versionToString(versioninfo, buf);
1541 collversion = pstrdup(buf);
1542 }
1543 else
1544#endif
1545 collversion = NULL;
1546
1547 return collversion;
1548}
1549
1550
1551#ifdef USE_ICU
1552/*
1553 * Converter object for converting between ICU's UChar strings and C strings
1554 * in database encoding. Since the database encoding doesn't change, we only
1555 * need one of these per session.
1556 */
1557static UConverter *icu_converter = NULL;
1558
1559static void
1560init_icu_converter(void)
1561{
1562 const char *icu_encoding_name;
1563 UErrorCode status;
1564 UConverter *conv;
1565
1566 if (icu_converter)
1567 return;
1568
1569 icu_encoding_name = get_encoding_name_for_icu(GetDatabaseEncoding());
1570
1571 status = U_ZERO_ERROR;
1572 conv = ucnv_open(icu_encoding_name, &status);
1573 if (U_FAILURE(status))
1574 ereport(ERROR,
1575 (errmsg("could not open ICU converter for encoding \"%s\": %s",
1576 icu_encoding_name, u_errorName(status))));
1577
1578 icu_converter = conv;
1579}
1580
1581/*
1582 * Convert a string in the database encoding into a string of UChars.
1583 *
1584 * The source string at buff is of length nbytes
1585 * (it needn't be nul-terminated)
1586 *
1587 * *buff_uchar receives a pointer to the palloc'd result string, and
1588 * the function's result is the number of UChars generated.
1589 *
1590 * The result string is nul-terminated, though most callers rely on the
1591 * result length instead.
1592 */
1593int32_t
1594icu_to_uchar(UChar **buff_uchar, const char *buff, size_t nbytes)
1595{
1596 UErrorCode status;
1597 int32_t len_uchar;
1598
1599 init_icu_converter();
1600
1601 status = U_ZERO_ERROR;
1602 len_uchar = ucnv_toUChars(icu_converter, NULL, 0,
1603 buff, nbytes, &status);
1604 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1605 ereport(ERROR,
1606 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
1607
1608 *buff_uchar = palloc((len_uchar + 1) * sizeof(**buff_uchar));
1609
1610 status = U_ZERO_ERROR;
1611 len_uchar = ucnv_toUChars(icu_converter, *buff_uchar, len_uchar + 1,
1612 buff, nbytes, &status);
1613 if (U_FAILURE(status))
1614 ereport(ERROR,
1615 (errmsg("%s failed: %s", "ucnv_toUChars", u_errorName(status))));
1616
1617 return len_uchar;
1618}
1619
1620/*
1621 * Convert a string of UChars into the database encoding.
1622 *
1623 * The source string at buff_uchar is of length len_uchar
1624 * (it needn't be nul-terminated)
1625 *
1626 * *result receives a pointer to the palloc'd result string, and the
1627 * function's result is the number of bytes generated (not counting nul).
1628 *
1629 * The result string is nul-terminated.
1630 */
1631int32_t
1632icu_from_uchar(char **result, const UChar *buff_uchar, int32_t len_uchar)
1633{
1634 UErrorCode status;
1635 int32_t len_result;
1636
1637 init_icu_converter();
1638
1639 status = U_ZERO_ERROR;
1640 len_result = ucnv_fromUChars(icu_converter, NULL, 0,
1641 buff_uchar, len_uchar, &status);
1642 if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR)
1643 ereport(ERROR,
1644 (errmsg("%s failed: %s", "ucnv_fromUChars",
1645 u_errorName(status))));
1646
1647 *result = palloc(len_result + 1);
1648
1649 status = U_ZERO_ERROR;
1650 len_result = ucnv_fromUChars(icu_converter, *result, len_result + 1,
1651 buff_uchar, len_uchar, &status);
1652 if (U_FAILURE(status))
1653 ereport(ERROR,
1654 (errmsg("%s failed: %s", "ucnv_fromUChars",
1655 u_errorName(status))));
1656
1657 return len_result;
1658}
1659
1660/*
1661 * Parse collation attributes and apply them to the open collator. This takes
1662 * a string like "und@colStrength=primary;colCaseLevel=yes" and parses and
1663 * applies the key-value arguments.
1664 *
1665 * Starting with ICU version 54, the attributes are processed automatically by
1666 * ucol_open(), so this is only necessary for emulating this behavior on older
1667 * versions.
1668 */
1669pg_attribute_unused()
1670static void
1671icu_set_collation_attributes(UCollator *collator, const char *loc)
1672{
1673 char *str = asc_tolower(loc, strlen(loc));
1674
1675 str = strchr(str, '@');
1676 if (!str)
1677 return;
1678 str++;
1679
1680 for (char *token = strtok(str, ";"); token; token = strtok(NULL, ";"))
1681 {
1682 char *e = strchr(token, '=');
1683
1684 if (e)
1685 {
1686 char *name;
1687 char *value;
1688 UColAttribute uattr;
1689 UColAttributeValue uvalue;
1690 UErrorCode status;
1691
1692 status = U_ZERO_ERROR;
1693
1694 *e = '\0';
1695 name = token;
1696 value = e + 1;
1697
1698 /*
1699 * See attribute name and value lists in ICU i18n/coll.cpp
1700 */
1701 if (strcmp(name, "colstrength") == 0)
1702 uattr = UCOL_STRENGTH;
1703 else if (strcmp(name, "colbackwards") == 0)
1704 uattr = UCOL_FRENCH_COLLATION;
1705 else if (strcmp(name, "colcaselevel") == 0)
1706 uattr = UCOL_CASE_LEVEL;
1707 else if (strcmp(name, "colcasefirst") == 0)
1708 uattr = UCOL_CASE_FIRST;
1709 else if (strcmp(name, "colalternate") == 0)
1710 uattr = UCOL_ALTERNATE_HANDLING;
1711 else if (strcmp(name, "colnormalization") == 0)
1712 uattr = UCOL_NORMALIZATION_MODE;
1713 else if (strcmp(name, "colnumeric") == 0)
1714 uattr = UCOL_NUMERIC_COLLATION;
1715 else
1716 /* ignore if unknown */
1717 continue;
1718
1719 if (strcmp(value, "primary") == 0)
1720 uvalue = UCOL_PRIMARY;
1721 else if (strcmp(value, "secondary") == 0)
1722 uvalue = UCOL_SECONDARY;
1723 else if (strcmp(value, "tertiary") == 0)
1724 uvalue = UCOL_TERTIARY;
1725 else if (strcmp(value, "quaternary") == 0)
1726 uvalue = UCOL_QUATERNARY;
1727 else if (strcmp(value, "identical") == 0)
1728 uvalue = UCOL_IDENTICAL;
1729 else if (strcmp(value, "no") == 0)
1730 uvalue = UCOL_OFF;
1731 else if (strcmp(value, "yes") == 0)
1732 uvalue = UCOL_ON;
1733 else if (strcmp(value, "shifted") == 0)
1734 uvalue = UCOL_SHIFTED;
1735 else if (strcmp(value, "non-ignorable") == 0)
1736 uvalue = UCOL_NON_IGNORABLE;
1737 else if (strcmp(value, "lower") == 0)
1738 uvalue = UCOL_LOWER_FIRST;
1739 else if (strcmp(value, "upper") == 0)
1740 uvalue = UCOL_UPPER_FIRST;
1741 else
1742 status = U_ILLEGAL_ARGUMENT_ERROR;
1743
1744 if (status == U_ZERO_ERROR)
1745 ucol_setAttribute(collator, uattr, uvalue, &status);
1746
1747 /*
1748 * Pretend the error came from ucol_open(), for consistent error
1749 * message across ICU versions.
1750 */
1751 if (U_FAILURE(status))
1752 ereport(ERROR,
1753 (errmsg("could not open collator for locale \"%s\": %s",
1754 loc, u_errorName(status))));
1755 }
1756 }
1757}
1758
1759#endif /* USE_ICU */
1760
1761/*
1762 * These functions convert from/to libc's wchar_t, *not* pg_wchar_t.
1763 * Therefore we keep them here rather than with the mbutils code.
1764 */
1765
1766/*
1767 * wchar2char --- convert wide characters to multibyte format
1768 *
1769 * This has the same API as the standard wcstombs_l() function; in particular,
1770 * tolen is the maximum number of bytes to store at *to, and *from must be
1771 * zero-terminated. The output will be zero-terminated iff there is room.
1772 */
1773size_t
1774wchar2char(char *to, const wchar_t *from, size_t tolen, pg_locale_t locale)
1775{
1776 size_t result;
1777
1778 Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1779
1780 if (tolen == 0)
1781 return 0;
1782
1783#ifdef WIN32
1784
1785 /*
1786 * On Windows, the "Unicode" locales assume UTF16 not UTF8 encoding, and
1787 * for some reason mbstowcs and wcstombs won't do this for us, so we use
1788 * MultiByteToWideChar().
1789 */
1790 if (GetDatabaseEncoding() == PG_UTF8)
1791 {
1792 result = WideCharToMultiByte(CP_UTF8, 0, from, -1, to, tolen,
1793 NULL, NULL);
1794 /* A zero return is failure */
1795 if (result <= 0)
1796 result = -1;
1797 else
1798 {
1799 Assert(result <= tolen);
1800 /* Microsoft counts the zero terminator in the result */
1801 result--;
1802 }
1803 }
1804 else
1805#endif /* WIN32 */
1806 if (locale == (pg_locale_t) 0)
1807 {
1808 /* Use wcstombs directly for the default locale */
1809 result = wcstombs(to, from, tolen);
1810 }
1811 else
1812 {
1813#ifdef HAVE_LOCALE_T
1814#ifdef HAVE_WCSTOMBS_L
1815 /* Use wcstombs_l for nondefault locales */
1816 result = wcstombs_l(to, from, tolen, locale->info.lt);
1817#else /* !HAVE_WCSTOMBS_L */
1818 /* We have to temporarily set the locale as current ... ugh */
1819 locale_t save_locale = uselocale(locale->info.lt);
1820
1821 result = wcstombs(to, from, tolen);
1822
1823 uselocale(save_locale);
1824#endif /* HAVE_WCSTOMBS_L */
1825#else /* !HAVE_LOCALE_T */
1826 /* Can't have locale != 0 without HAVE_LOCALE_T */
1827 elog(ERROR, "wcstombs_l is not available");
1828 result = 0; /* keep compiler quiet */
1829#endif /* HAVE_LOCALE_T */
1830 }
1831
1832 return result;
1833}
1834
1835/*
1836 * char2wchar --- convert multibyte characters to wide characters
1837 *
1838 * This has almost the API of mbstowcs_l(), except that *from need not be
1839 * null-terminated; instead, the number of input bytes is specified as
1840 * fromlen. Also, we ereport() rather than returning -1 for invalid
1841 * input encoding. tolen is the maximum number of wchar_t's to store at *to.
1842 * The output will be zero-terminated iff there is room.
1843 */
1844size_t
1845char2wchar(wchar_t *to, size_t tolen, const char *from, size_t fromlen,
1846 pg_locale_t locale)
1847{
1848 size_t result;
1849
1850 Assert(!locale || locale->provider == COLLPROVIDER_LIBC);
1851
1852 if (tolen == 0)
1853 return 0;
1854
1855#ifdef WIN32
1856 /* See WIN32 "Unicode" comment above */
1857 if (GetDatabaseEncoding() == PG_UTF8)
1858 {
1859 /* Win32 API does not work for zero-length input */
1860 if (fromlen == 0)
1861 result = 0;
1862 else
1863 {
1864 result = MultiByteToWideChar(CP_UTF8, 0, from, fromlen, to, tolen - 1);
1865 /* A zero return is failure */
1866 if (result == 0)
1867 result = -1;
1868 }
1869
1870 if (result != -1)
1871 {
1872 Assert(result < tolen);
1873 /* Append trailing null wchar (MultiByteToWideChar() does not) */
1874 to[result] = 0;
1875 }
1876 }
1877 else
1878#endif /* WIN32 */
1879 {
1880 /* mbstowcs requires ending '\0' */
1881 char *str = pnstrdup(from, fromlen);
1882
1883 if (locale == (pg_locale_t) 0)
1884 {
1885 /* Use mbstowcs directly for the default locale */
1886 result = mbstowcs(to, str, tolen);
1887 }
1888 else
1889 {
1890#ifdef HAVE_LOCALE_T
1891#ifdef HAVE_MBSTOWCS_L
1892 /* Use mbstowcs_l for nondefault locales */
1893 result = mbstowcs_l(to, str, tolen, locale->info.lt);
1894#else /* !HAVE_MBSTOWCS_L */
1895 /* We have to temporarily set the locale as current ... ugh */
1896 locale_t save_locale = uselocale(locale->info.lt);
1897
1898 result = mbstowcs(to, str, tolen);
1899
1900 uselocale(save_locale);
1901#endif /* HAVE_MBSTOWCS_L */
1902#else /* !HAVE_LOCALE_T */
1903 /* Can't have locale != 0 without HAVE_LOCALE_T */
1904 elog(ERROR, "mbstowcs_l is not available");
1905 result = 0; /* keep compiler quiet */
1906#endif /* HAVE_LOCALE_T */
1907 }
1908
1909 pfree(str);
1910 }
1911
1912 if (result == -1)
1913 {
1914 /*
1915 * Invalid multibyte character encountered. We try to give a useful
1916 * error message by letting pg_verifymbstr check the string. But it's
1917 * possible that the string is OK to us, and not OK to mbstowcs ---
1918 * this suggests that the LC_CTYPE locale is different from the
1919 * database encoding. Give a generic error message if verifymbstr
1920 * can't find anything wrong.
1921 */
1922 pg_verifymbstr(from, fromlen, false); /* might not return */
1923 /* but if it does ... */
1924 ereport(ERROR,
1925 (errcode(ERRCODE_CHARACTER_NOT_IN_REPERTOIRE),
1926 errmsg("invalid multibyte character for locale"),
1927 errhint("The server's LC_CTYPE locale is probably incompatible with the database encoding.")));
1928 }
1929
1930 return result;
1931}
1932