| 1 | /*------------------------------------------------------------------------- |
| 2 | * |
| 3 | * collationcmds.c |
| 4 | * collation-related commands support code |
| 5 | * |
| 6 | * Portions Copyright (c) 1996-2019, PostgreSQL Global Development Group |
| 7 | * Portions Copyright (c) 1994, Regents of the University of California |
| 8 | * |
| 9 | * |
| 10 | * IDENTIFICATION |
| 11 | * src/backend/commands/collationcmds.c |
| 12 | * |
| 13 | *------------------------------------------------------------------------- |
| 14 | */ |
| 15 | #include "postgres.h" |
| 16 | |
| 17 | #include "access/htup_details.h" |
| 18 | #include "access/table.h" |
| 19 | #include "access/xact.h" |
| 20 | #include "catalog/dependency.h" |
| 21 | #include "catalog/indexing.h" |
| 22 | #include "catalog/namespace.h" |
| 23 | #include "catalog/objectaccess.h" |
| 24 | #include "catalog/pg_collation.h" |
| 25 | #include "commands/alter.h" |
| 26 | #include "commands/collationcmds.h" |
| 27 | #include "commands/comment.h" |
| 28 | #include "commands/dbcommands.h" |
| 29 | #include "commands/defrem.h" |
| 30 | #include "mb/pg_wchar.h" |
| 31 | #include "miscadmin.h" |
| 32 | #include "utils/builtins.h" |
| 33 | #include "utils/lsyscache.h" |
| 34 | #include "utils/pg_locale.h" |
| 35 | #include "utils/rel.h" |
| 36 | #include "utils/syscache.h" |
| 37 | |
| 38 | |
| 39 | typedef struct |
| 40 | { |
| 41 | char *localename; /* name of locale, as per "locale -a" */ |
| 42 | char *alias; /* shortened alias for same */ |
| 43 | int enc; /* encoding */ |
| 44 | } CollAliasData; |
| 45 | |
| 46 | |
| 47 | /* |
| 48 | * CREATE COLLATION |
| 49 | */ |
| 50 | ObjectAddress |
| 51 | DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_exists) |
| 52 | { |
| 53 | char *collName; |
| 54 | Oid collNamespace; |
| 55 | AclResult aclresult; |
| 56 | ListCell *pl; |
| 57 | DefElem *fromEl = NULL; |
| 58 | DefElem *localeEl = NULL; |
| 59 | DefElem *lccollateEl = NULL; |
| 60 | DefElem *lcctypeEl = NULL; |
| 61 | DefElem *providerEl = NULL; |
| 62 | DefElem *deterministicEl = NULL; |
| 63 | DefElem *versionEl = NULL; |
| 64 | char *collcollate = NULL; |
| 65 | char *collctype = NULL; |
| 66 | char *collproviderstr = NULL; |
| 67 | bool collisdeterministic = true; |
| 68 | int collencoding = 0; |
| 69 | char collprovider = 0; |
| 70 | char *collversion = NULL; |
| 71 | Oid newoid; |
| 72 | ObjectAddress address; |
| 73 | |
| 74 | collNamespace = QualifiedNameGetCreationNamespace(names, &collName); |
| 75 | |
| 76 | aclresult = pg_namespace_aclcheck(collNamespace, GetUserId(), ACL_CREATE); |
| 77 | if (aclresult != ACLCHECK_OK) |
| 78 | aclcheck_error(aclresult, OBJECT_SCHEMA, |
| 79 | get_namespace_name(collNamespace)); |
| 80 | |
| 81 | foreach(pl, parameters) |
| 82 | { |
| 83 | DefElem *defel = lfirst_node(DefElem, pl); |
| 84 | DefElem **defelp; |
| 85 | |
| 86 | if (strcmp(defel->defname, "from" ) == 0) |
| 87 | defelp = &fromEl; |
| 88 | else if (strcmp(defel->defname, "locale" ) == 0) |
| 89 | defelp = &localeEl; |
| 90 | else if (strcmp(defel->defname, "lc_collate" ) == 0) |
| 91 | defelp = &lccollateEl; |
| 92 | else if (strcmp(defel->defname, "lc_ctype" ) == 0) |
| 93 | defelp = &lcctypeEl; |
| 94 | else if (strcmp(defel->defname, "provider" ) == 0) |
| 95 | defelp = &providerEl; |
| 96 | else if (strcmp(defel->defname, "deterministic" ) == 0) |
| 97 | defelp = &deterministicEl; |
| 98 | else if (strcmp(defel->defname, "version" ) == 0) |
| 99 | defelp = &versionEl; |
| 100 | else |
| 101 | { |
| 102 | ereport(ERROR, |
| 103 | (errcode(ERRCODE_SYNTAX_ERROR), |
| 104 | errmsg("collation attribute \"%s\" not recognized" , |
| 105 | defel->defname), |
| 106 | parser_errposition(pstate, defel->location))); |
| 107 | break; |
| 108 | } |
| 109 | |
| 110 | *defelp = defel; |
| 111 | } |
| 112 | |
| 113 | if ((localeEl && (lccollateEl || lcctypeEl)) |
| 114 | || (fromEl && list_length(parameters) != 1)) |
| 115 | ereport(ERROR, |
| 116 | (errcode(ERRCODE_SYNTAX_ERROR), |
| 117 | errmsg("conflicting or redundant options" ))); |
| 118 | |
| 119 | if (fromEl) |
| 120 | { |
| 121 | Oid collid; |
| 122 | HeapTuple tp; |
| 123 | |
| 124 | collid = get_collation_oid(defGetQualifiedName(fromEl), false); |
| 125 | tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); |
| 126 | if (!HeapTupleIsValid(tp)) |
| 127 | elog(ERROR, "cache lookup failed for collation %u" , collid); |
| 128 | |
| 129 | collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate)); |
| 130 | collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype)); |
| 131 | collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; |
| 132 | collisdeterministic = ((Form_pg_collation) GETSTRUCT(tp))->collisdeterministic; |
| 133 | collencoding = ((Form_pg_collation) GETSTRUCT(tp))->collencoding; |
| 134 | |
| 135 | ReleaseSysCache(tp); |
| 136 | |
| 137 | /* |
| 138 | * Copying the "default" collation is not allowed because most code |
| 139 | * checks for DEFAULT_COLLATION_OID instead of COLLPROVIDER_DEFAULT, |
| 140 | * and so having a second collation with COLLPROVIDER_DEFAULT would |
| 141 | * not work and potentially confuse or crash some code. This could be |
| 142 | * fixed with some legwork. |
| 143 | */ |
| 144 | if (collprovider == COLLPROVIDER_DEFAULT) |
| 145 | ereport(ERROR, |
| 146 | (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), |
| 147 | errmsg("collation \"default\" cannot be copied" ))); |
| 148 | } |
| 149 | |
| 150 | if (localeEl) |
| 151 | { |
| 152 | collcollate = defGetString(localeEl); |
| 153 | collctype = defGetString(localeEl); |
| 154 | } |
| 155 | |
| 156 | if (lccollateEl) |
| 157 | collcollate = defGetString(lccollateEl); |
| 158 | |
| 159 | if (lcctypeEl) |
| 160 | collctype = defGetString(lcctypeEl); |
| 161 | |
| 162 | if (providerEl) |
| 163 | collproviderstr = defGetString(providerEl); |
| 164 | |
| 165 | if (deterministicEl) |
| 166 | collisdeterministic = defGetBoolean(deterministicEl); |
| 167 | |
| 168 | if (versionEl) |
| 169 | collversion = defGetString(versionEl); |
| 170 | |
| 171 | if (collproviderstr) |
| 172 | { |
| 173 | if (pg_strcasecmp(collproviderstr, "icu" ) == 0) |
| 174 | collprovider = COLLPROVIDER_ICU; |
| 175 | else if (pg_strcasecmp(collproviderstr, "libc" ) == 0) |
| 176 | collprovider = COLLPROVIDER_LIBC; |
| 177 | else |
| 178 | ereport(ERROR, |
| 179 | (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), |
| 180 | errmsg("unrecognized collation provider: %s" , |
| 181 | collproviderstr))); |
| 182 | } |
| 183 | else if (!fromEl) |
| 184 | collprovider = COLLPROVIDER_LIBC; |
| 185 | |
| 186 | if (!collcollate) |
| 187 | ereport(ERROR, |
| 188 | (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), |
| 189 | errmsg("parameter \"lc_collate\" must be specified" ))); |
| 190 | |
| 191 | if (!collctype) |
| 192 | ereport(ERROR, |
| 193 | (errcode(ERRCODE_INVALID_OBJECT_DEFINITION), |
| 194 | errmsg("parameter \"lc_ctype\" must be specified" ))); |
| 195 | |
| 196 | /* |
| 197 | * Nondeterministic collations are currently only supported with ICU |
| 198 | * because that's the only case where it can actually make a difference. |
| 199 | * So we can save writing the code for the other providers. |
| 200 | */ |
| 201 | if (!collisdeterministic && collprovider != COLLPROVIDER_ICU) |
| 202 | ereport(ERROR, |
| 203 | (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), |
| 204 | errmsg("nondeterministic collations not supported with this provider" ))); |
| 205 | |
| 206 | if (!fromEl) |
| 207 | { |
| 208 | if (collprovider == COLLPROVIDER_ICU) |
| 209 | collencoding = -1; |
| 210 | else |
| 211 | { |
| 212 | collencoding = GetDatabaseEncoding(); |
| 213 | check_encoding_locale_matches(collencoding, collcollate, collctype); |
| 214 | } |
| 215 | } |
| 216 | |
| 217 | if (!collversion) |
| 218 | collversion = get_collation_actual_version(collprovider, collcollate); |
| 219 | |
| 220 | newoid = CollationCreate(collName, |
| 221 | collNamespace, |
| 222 | GetUserId(), |
| 223 | collprovider, |
| 224 | collisdeterministic, |
| 225 | collencoding, |
| 226 | collcollate, |
| 227 | collctype, |
| 228 | collversion, |
| 229 | if_not_exists, |
| 230 | false); /* not quiet */ |
| 231 | |
| 232 | if (!OidIsValid(newoid)) |
| 233 | return InvalidObjectAddress; |
| 234 | |
| 235 | /* |
| 236 | * Check that the locales can be loaded. NB: pg_newlocale_from_collation |
| 237 | * is only supposed to be called on non-C-equivalent locales. |
| 238 | */ |
| 239 | CommandCounterIncrement(); |
| 240 | if (!lc_collate_is_c(newoid) || !lc_ctype_is_c(newoid)) |
| 241 | (void) pg_newlocale_from_collation(newoid); |
| 242 | |
| 243 | ObjectAddressSet(address, CollationRelationId, newoid); |
| 244 | |
| 245 | return address; |
| 246 | } |
| 247 | |
| 248 | /* |
| 249 | * Subroutine for ALTER COLLATION SET SCHEMA and RENAME |
| 250 | * |
| 251 | * Is there a collation with the same name of the given collation already in |
| 252 | * the given namespace? If so, raise an appropriate error message. |
| 253 | */ |
| 254 | void |
| 255 | IsThereCollationInNamespace(const char *collname, Oid nspOid) |
| 256 | { |
| 257 | /* make sure the name doesn't already exist in new schema */ |
| 258 | if (SearchSysCacheExists3(COLLNAMEENCNSP, |
| 259 | CStringGetDatum(collname), |
| 260 | Int32GetDatum(GetDatabaseEncoding()), |
| 261 | ObjectIdGetDatum(nspOid))) |
| 262 | ereport(ERROR, |
| 263 | (errcode(ERRCODE_DUPLICATE_OBJECT), |
| 264 | errmsg("collation \"%s\" for encoding \"%s\" already exists in schema \"%s\"" , |
| 265 | collname, GetDatabaseEncodingName(), |
| 266 | get_namespace_name(nspOid)))); |
| 267 | |
| 268 | /* mustn't match an any-encoding entry, either */ |
| 269 | if (SearchSysCacheExists3(COLLNAMEENCNSP, |
| 270 | CStringGetDatum(collname), |
| 271 | Int32GetDatum(-1), |
| 272 | ObjectIdGetDatum(nspOid))) |
| 273 | ereport(ERROR, |
| 274 | (errcode(ERRCODE_DUPLICATE_OBJECT), |
| 275 | errmsg("collation \"%s\" already exists in schema \"%s\"" , |
| 276 | collname, get_namespace_name(nspOid)))); |
| 277 | } |
| 278 | |
| 279 | /* |
| 280 | * ALTER COLLATION |
| 281 | */ |
| 282 | ObjectAddress |
| 283 | AlterCollation(AlterCollationStmt *stmt) |
| 284 | { |
| 285 | Relation rel; |
| 286 | Oid collOid; |
| 287 | HeapTuple tup; |
| 288 | Form_pg_collation collForm; |
| 289 | Datum collversion; |
| 290 | bool isnull; |
| 291 | char *oldversion; |
| 292 | char *newversion; |
| 293 | ObjectAddress address; |
| 294 | |
| 295 | rel = table_open(CollationRelationId, RowExclusiveLock); |
| 296 | collOid = get_collation_oid(stmt->collname, false); |
| 297 | |
| 298 | if (!pg_collation_ownercheck(collOid, GetUserId())) |
| 299 | aclcheck_error(ACLCHECK_NOT_OWNER, OBJECT_COLLATION, |
| 300 | NameListToString(stmt->collname)); |
| 301 | |
| 302 | tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid)); |
| 303 | if (!HeapTupleIsValid(tup)) |
| 304 | elog(ERROR, "cache lookup failed for collation %u" , collOid); |
| 305 | |
| 306 | collForm = (Form_pg_collation) GETSTRUCT(tup); |
| 307 | collversion = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion, |
| 308 | &isnull); |
| 309 | oldversion = isnull ? NULL : TextDatumGetCString(collversion); |
| 310 | |
| 311 | newversion = get_collation_actual_version(collForm->collprovider, NameStr(collForm->collcollate)); |
| 312 | |
| 313 | /* cannot change from NULL to non-NULL or vice versa */ |
| 314 | if ((!oldversion && newversion) || (oldversion && !newversion)) |
| 315 | elog(ERROR, "invalid collation version change" ); |
| 316 | else if (oldversion && newversion && strcmp(newversion, oldversion) != 0) |
| 317 | { |
| 318 | bool nulls[Natts_pg_collation]; |
| 319 | bool replaces[Natts_pg_collation]; |
| 320 | Datum values[Natts_pg_collation]; |
| 321 | |
| 322 | ereport(NOTICE, |
| 323 | (errmsg("changing version from %s to %s" , |
| 324 | oldversion, newversion))); |
| 325 | |
| 326 | memset(values, 0, sizeof(values)); |
| 327 | memset(nulls, false, sizeof(nulls)); |
| 328 | memset(replaces, false, sizeof(replaces)); |
| 329 | |
| 330 | values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion); |
| 331 | replaces[Anum_pg_collation_collversion - 1] = true; |
| 332 | |
| 333 | tup = heap_modify_tuple(tup, RelationGetDescr(rel), |
| 334 | values, nulls, replaces); |
| 335 | } |
| 336 | else |
| 337 | ereport(NOTICE, |
| 338 | (errmsg("version has not changed" ))); |
| 339 | |
| 340 | CatalogTupleUpdate(rel, &tup->t_self, tup); |
| 341 | |
| 342 | InvokeObjectPostAlterHook(CollationRelationId, collOid, 0); |
| 343 | |
| 344 | ObjectAddressSet(address, CollationRelationId, collOid); |
| 345 | |
| 346 | heap_freetuple(tup); |
| 347 | table_close(rel, NoLock); |
| 348 | |
| 349 | return address; |
| 350 | } |
| 351 | |
| 352 | |
| 353 | Datum |
| 354 | pg_collation_actual_version(PG_FUNCTION_ARGS) |
| 355 | { |
| 356 | Oid collid = PG_GETARG_OID(0); |
| 357 | HeapTuple tp; |
| 358 | char *collcollate; |
| 359 | char collprovider; |
| 360 | char *version; |
| 361 | |
| 362 | tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid)); |
| 363 | if (!HeapTupleIsValid(tp)) |
| 364 | ereport(ERROR, |
| 365 | (errcode(ERRCODE_UNDEFINED_OBJECT), |
| 366 | errmsg("collation with OID %u does not exist" , collid))); |
| 367 | |
| 368 | collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate)); |
| 369 | collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider; |
| 370 | |
| 371 | ReleaseSysCache(tp); |
| 372 | |
| 373 | version = get_collation_actual_version(collprovider, collcollate); |
| 374 | |
| 375 | if (version) |
| 376 | PG_RETURN_TEXT_P(cstring_to_text(version)); |
| 377 | else |
| 378 | PG_RETURN_NULL(); |
| 379 | } |
| 380 | |
| 381 | |
| 382 | /* will we use "locale -a" in pg_import_system_collations? */ |
| 383 | #if defined(HAVE_LOCALE_T) && !defined(WIN32) |
| 384 | #define READ_LOCALE_A_OUTPUT |
| 385 | #endif |
| 386 | |
| 387 | #if defined(READ_LOCALE_A_OUTPUT) || defined(USE_ICU) |
| 388 | /* |
| 389 | * Check a string to see if it is pure ASCII |
| 390 | */ |
| 391 | static bool |
| 392 | is_all_ascii(const char *str) |
| 393 | { |
| 394 | while (*str) |
| 395 | { |
| 396 | if (IS_HIGHBIT_SET(*str)) |
| 397 | return false; |
| 398 | str++; |
| 399 | } |
| 400 | return true; |
| 401 | } |
| 402 | #endif /* READ_LOCALE_A_OUTPUT || USE_ICU */ |
| 403 | |
| 404 | #ifdef READ_LOCALE_A_OUTPUT |
| 405 | /* |
| 406 | * "Normalize" a libc locale name, stripping off encoding tags such as |
| 407 | * ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro" |
| 408 | * -> "br_FR@euro"). Return true if a new, different name was |
| 409 | * generated. |
| 410 | */ |
| 411 | static bool |
| 412 | normalize_libc_locale_name(char *new, const char *old) |
| 413 | { |
| 414 | char *n = new; |
| 415 | const char *o = old; |
| 416 | bool changed = false; |
| 417 | |
| 418 | while (*o) |
| 419 | { |
| 420 | if (*o == '.') |
| 421 | { |
| 422 | /* skip over encoding tag such as ".utf8" or ".UTF-8" */ |
| 423 | o++; |
| 424 | while ((*o >= 'A' && *o <= 'Z') |
| 425 | || (*o >= 'a' && *o <= 'z') |
| 426 | || (*o >= '0' && *o <= '9') |
| 427 | || (*o == '-')) |
| 428 | o++; |
| 429 | changed = true; |
| 430 | } |
| 431 | else |
| 432 | *n++ = *o++; |
| 433 | } |
| 434 | *n = '\0'; |
| 435 | |
| 436 | return changed; |
| 437 | } |
| 438 | |
| 439 | /* |
| 440 | * qsort comparator for CollAliasData items |
| 441 | */ |
| 442 | static int |
| 443 | cmpaliases(const void *a, const void *b) |
| 444 | { |
| 445 | const CollAliasData *ca = (const CollAliasData *) a; |
| 446 | const CollAliasData *cb = (const CollAliasData *) b; |
| 447 | |
| 448 | /* comparing localename is enough because other fields are derived */ |
| 449 | return strcmp(ca->localename, cb->localename); |
| 450 | } |
| 451 | #endif /* READ_LOCALE_A_OUTPUT */ |
| 452 | |
| 453 | |
| 454 | #ifdef USE_ICU |
| 455 | /* |
| 456 | * Get the ICU language tag for a locale name. |
| 457 | * The result is a palloc'd string. |
| 458 | */ |
| 459 | static char * |
| 460 | get_icu_language_tag(const char *localename) |
| 461 | { |
| 462 | char buf[ULOC_FULLNAME_CAPACITY]; |
| 463 | UErrorCode status; |
| 464 | |
| 465 | status = U_ZERO_ERROR; |
| 466 | uloc_toLanguageTag(localename, buf, sizeof(buf), TRUE, &status); |
| 467 | if (U_FAILURE(status)) |
| 468 | ereport(ERROR, |
| 469 | (errmsg("could not convert locale name \"%s\" to language tag: %s" , |
| 470 | localename, u_errorName(status)))); |
| 471 | |
| 472 | return pstrdup(buf); |
| 473 | } |
| 474 | |
| 475 | /* |
| 476 | * Get a comment (specifically, the display name) for an ICU locale. |
| 477 | * The result is a palloc'd string, or NULL if we can't get a comment |
| 478 | * or find that it's not all ASCII. (We can *not* accept non-ASCII |
| 479 | * comments, because the contents of template0 must be encoding-agnostic.) |
| 480 | */ |
| 481 | static char * |
| 482 | get_icu_locale_comment(const char *localename) |
| 483 | { |
| 484 | UErrorCode status; |
| 485 | UChar displayname[128]; |
| 486 | int32 len_uchar; |
| 487 | int32 i; |
| 488 | char *result; |
| 489 | |
| 490 | status = U_ZERO_ERROR; |
| 491 | len_uchar = uloc_getDisplayName(localename, "en" , |
| 492 | displayname, lengthof(displayname), |
| 493 | &status); |
| 494 | if (U_FAILURE(status)) |
| 495 | return NULL; /* no good reason to raise an error */ |
| 496 | |
| 497 | /* Check for non-ASCII comment (can't use is_all_ascii for this) */ |
| 498 | for (i = 0; i < len_uchar; i++) |
| 499 | { |
| 500 | if (displayname[i] > 127) |
| 501 | return NULL; |
| 502 | } |
| 503 | |
| 504 | /* OK, transcribe */ |
| 505 | result = palloc(len_uchar + 1); |
| 506 | for (i = 0; i < len_uchar; i++) |
| 507 | result[i] = displayname[i]; |
| 508 | result[len_uchar] = '\0'; |
| 509 | |
| 510 | return result; |
| 511 | } |
| 512 | #endif /* USE_ICU */ |
| 513 | |
| 514 | |
| 515 | /* |
| 516 | * pg_import_system_collations: add known system collations to pg_collation |
| 517 | */ |
| 518 | Datum |
| 519 | pg_import_system_collations(PG_FUNCTION_ARGS) |
| 520 | { |
| 521 | Oid nspid = PG_GETARG_OID(0); |
| 522 | int ncreated = 0; |
| 523 | |
| 524 | /* silence compiler warning if we have no locale implementation at all */ |
| 525 | (void) nspid; |
| 526 | |
| 527 | if (!superuser()) |
| 528 | ereport(ERROR, |
| 529 | (errcode(ERRCODE_INSUFFICIENT_PRIVILEGE), |
| 530 | (errmsg("must be superuser to import system collations" )))); |
| 531 | |
| 532 | /* Load collations known to libc, using "locale -a" to enumerate them */ |
| 533 | #ifdef READ_LOCALE_A_OUTPUT |
| 534 | { |
| 535 | FILE *locale_a_handle; |
| 536 | char localebuf[NAMEDATALEN]; /* we assume ASCII so this is fine */ |
| 537 | int nvalid = 0; |
| 538 | Oid collid; |
| 539 | CollAliasData *aliases; |
| 540 | int naliases, |
| 541 | maxaliases, |
| 542 | i; |
| 543 | |
| 544 | /* expansible array of aliases */ |
| 545 | maxaliases = 100; |
| 546 | aliases = (CollAliasData *) palloc(maxaliases * sizeof(CollAliasData)); |
| 547 | naliases = 0; |
| 548 | |
| 549 | locale_a_handle = OpenPipeStream("locale -a" , "r" ); |
| 550 | if (locale_a_handle == NULL) |
| 551 | ereport(ERROR, |
| 552 | (errcode_for_file_access(), |
| 553 | errmsg("could not execute command \"%s\": %m" , |
| 554 | "locale -a" ))); |
| 555 | |
| 556 | while (fgets(localebuf, sizeof(localebuf), locale_a_handle)) |
| 557 | { |
| 558 | size_t len; |
| 559 | int enc; |
| 560 | char alias[NAMEDATALEN]; |
| 561 | |
| 562 | len = strlen(localebuf); |
| 563 | |
| 564 | if (len == 0 || localebuf[len - 1] != '\n') |
| 565 | { |
| 566 | elog(DEBUG1, "locale name too long, skipped: \"%s\"" , localebuf); |
| 567 | continue; |
| 568 | } |
| 569 | localebuf[len - 1] = '\0'; |
| 570 | |
| 571 | /* |
| 572 | * Some systems have locale names that don't consist entirely of |
| 573 | * ASCII letters (such as "bokmål" or "français"). |
| 574 | * This is pretty silly, since we need the locale itself to |
| 575 | * interpret the non-ASCII characters. We can't do much with |
| 576 | * those, so we filter them out. |
| 577 | */ |
| 578 | if (!is_all_ascii(localebuf)) |
| 579 | { |
| 580 | elog(DEBUG1, "locale name has non-ASCII characters, skipped: \"%s\"" , localebuf); |
| 581 | continue; |
| 582 | } |
| 583 | |
| 584 | enc = pg_get_encoding_from_locale(localebuf, false); |
| 585 | if (enc < 0) |
| 586 | { |
| 587 | /* error message printed by pg_get_encoding_from_locale() */ |
| 588 | continue; |
| 589 | } |
| 590 | if (!PG_VALID_BE_ENCODING(enc)) |
| 591 | continue; /* ignore locales for client-only encodings */ |
| 592 | if (enc == PG_SQL_ASCII) |
| 593 | continue; /* C/POSIX are already in the catalog */ |
| 594 | |
| 595 | /* count valid locales found in operating system */ |
| 596 | nvalid++; |
| 597 | |
| 598 | /* |
| 599 | * Create a collation named the same as the locale, but quietly |
| 600 | * doing nothing if it already exists. This is the behavior we |
| 601 | * need even at initdb time, because some versions of "locale -a" |
| 602 | * can report the same locale name more than once. And it's |
| 603 | * convenient for later import runs, too, since you just about |
| 604 | * always want to add on new locales without a lot of chatter |
| 605 | * about existing ones. |
| 606 | */ |
| 607 | collid = CollationCreate(localebuf, nspid, GetUserId(), |
| 608 | COLLPROVIDER_LIBC, true, enc, |
| 609 | localebuf, localebuf, |
| 610 | get_collation_actual_version(COLLPROVIDER_LIBC, localebuf), |
| 611 | true, true); |
| 612 | if (OidIsValid(collid)) |
| 613 | { |
| 614 | ncreated++; |
| 615 | |
| 616 | /* Must do CCI between inserts to handle duplicates correctly */ |
| 617 | CommandCounterIncrement(); |
| 618 | } |
| 619 | |
| 620 | /* |
| 621 | * Generate aliases such as "en_US" in addition to "en_US.utf8" |
| 622 | * for ease of use. Note that collation names are unique per |
| 623 | * encoding only, so this doesn't clash with "en_US" for LATIN1, |
| 624 | * say. |
| 625 | * |
| 626 | * However, it might conflict with a name we'll see later in the |
| 627 | * "locale -a" output. So save up the aliases and try to add them |
| 628 | * after we've read all the output. |
| 629 | */ |
| 630 | if (normalize_libc_locale_name(alias, localebuf)) |
| 631 | { |
| 632 | if (naliases >= maxaliases) |
| 633 | { |
| 634 | maxaliases *= 2; |
| 635 | aliases = (CollAliasData *) |
| 636 | repalloc(aliases, maxaliases * sizeof(CollAliasData)); |
| 637 | } |
| 638 | aliases[naliases].localename = pstrdup(localebuf); |
| 639 | aliases[naliases].alias = pstrdup(alias); |
| 640 | aliases[naliases].enc = enc; |
| 641 | naliases++; |
| 642 | } |
| 643 | } |
| 644 | |
| 645 | ClosePipeStream(locale_a_handle); |
| 646 | |
| 647 | /* |
| 648 | * Before processing the aliases, sort them by locale name. The point |
| 649 | * here is that if "locale -a" gives us multiple locale names with the |
| 650 | * same encoding and base name, say "en_US.utf8" and "en_US.utf-8", we |
| 651 | * want to pick a deterministic one of them. First in ASCII sort |
| 652 | * order is a good enough rule. (Before PG 10, the code corresponding |
| 653 | * to this logic in initdb.c had an additional ordering rule, to |
| 654 | * prefer the locale name exactly matching the alias, if any. We |
| 655 | * don't need to consider that here, because we would have already |
| 656 | * created such a pg_collation entry above, and that one will win.) |
| 657 | */ |
| 658 | if (naliases > 1) |
| 659 | qsort((void *) aliases, naliases, sizeof(CollAliasData), cmpaliases); |
| 660 | |
| 661 | /* Now add aliases, ignoring any that match pre-existing entries */ |
| 662 | for (i = 0; i < naliases; i++) |
| 663 | { |
| 664 | char *locale = aliases[i].localename; |
| 665 | char *alias = aliases[i].alias; |
| 666 | int enc = aliases[i].enc; |
| 667 | |
| 668 | collid = CollationCreate(alias, nspid, GetUserId(), |
| 669 | COLLPROVIDER_LIBC, true, enc, |
| 670 | locale, locale, |
| 671 | get_collation_actual_version(COLLPROVIDER_LIBC, locale), |
| 672 | true, true); |
| 673 | if (OidIsValid(collid)) |
| 674 | { |
| 675 | ncreated++; |
| 676 | |
| 677 | CommandCounterIncrement(); |
| 678 | } |
| 679 | } |
| 680 | |
| 681 | /* Give a warning if "locale -a" seems to be malfunctioning */ |
| 682 | if (nvalid == 0) |
| 683 | ereport(WARNING, |
| 684 | (errmsg("no usable system locales were found" ))); |
| 685 | } |
| 686 | #endif /* READ_LOCALE_A_OUTPUT */ |
| 687 | |
| 688 | /* |
| 689 | * Load collations known to ICU |
| 690 | * |
| 691 | * We use uloc_countAvailable()/uloc_getAvailable() rather than |
| 692 | * ucol_countAvailable()/ucol_getAvailable(). The former returns a full |
| 693 | * set of language+region combinations, whereas the latter only returns |
| 694 | * language+region combinations of they are distinct from the language's |
| 695 | * base collation. So there might not be a de-DE or en-GB, which would be |
| 696 | * confusing. |
| 697 | */ |
| 698 | #ifdef USE_ICU |
| 699 | { |
| 700 | int i; |
| 701 | |
| 702 | /* |
| 703 | * Start the loop at -1 to sneak in the root locale without too much |
| 704 | * code duplication. |
| 705 | */ |
| 706 | for (i = -1; i < uloc_countAvailable(); i++) |
| 707 | { |
| 708 | const char *name; |
| 709 | char *langtag; |
| 710 | char *icucomment; |
| 711 | const char *collcollate; |
| 712 | Oid collid; |
| 713 | |
| 714 | if (i == -1) |
| 715 | name = "" ; /* ICU root locale */ |
| 716 | else |
| 717 | name = uloc_getAvailable(i); |
| 718 | |
| 719 | langtag = get_icu_language_tag(name); |
| 720 | collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name; |
| 721 | |
| 722 | /* |
| 723 | * Be paranoid about not allowing any non-ASCII strings into |
| 724 | * pg_collation |
| 725 | */ |
| 726 | if (!is_all_ascii(langtag) || !is_all_ascii(collcollate)) |
| 727 | continue; |
| 728 | |
| 729 | collid = CollationCreate(psprintf("%s-x-icu" , langtag), |
| 730 | nspid, GetUserId(), |
| 731 | COLLPROVIDER_ICU, true, -1, |
| 732 | collcollate, collcollate, |
| 733 | get_collation_actual_version(COLLPROVIDER_ICU, collcollate), |
| 734 | true, true); |
| 735 | if (OidIsValid(collid)) |
| 736 | { |
| 737 | ncreated++; |
| 738 | |
| 739 | CommandCounterIncrement(); |
| 740 | |
| 741 | icucomment = get_icu_locale_comment(name); |
| 742 | if (icucomment) |
| 743 | CreateComments(collid, CollationRelationId, 0, |
| 744 | icucomment); |
| 745 | } |
| 746 | } |
| 747 | } |
| 748 | #endif /* USE_ICU */ |
| 749 | |
| 750 | PG_RETURN_INT32(ncreated); |
| 751 | } |
| 752 | |