summaryrefslogtreecommitdiff
path: root/src/backend/commands/collationcmds.c
diff options
context:
space:
mode:
authorPeter Eisentraut2017-03-23 19:25:34 +0000
committerPeter Eisentraut2017-03-23 19:28:48 +0000
commiteccfef81e1f73ee41f1d8bfe4fa4e80576945048 (patch)
tree52bd1b2468bcf9682b356cf5b5f6199ae9d80ee4 /src/backend/commands/collationcmds.c
parentea42cc18c35381f639d45628d792e790ff39e271 (diff)
ICU support
Add a column collprovider to pg_collation that determines which library provides the collation data. The existing choices are default and libc, and this adds an icu choice, which uses the ICU4C library. The pg_locale_t type is changed to a union that contains the provider-specific locale handles. Users of locale information are changed to look into that struct for the appropriate handle to use. Also add a collversion column that records the version of the collation when it is created, and check at run time whether it is still the same. This detects potentially incompatible library upgrades that can corrupt indexes and other structures. This is currently only supported by ICU-provided collations. initdb initializes the default collation set as before from the `locale -a` output but also adds all available ICU locales with a "-x-icu" appended. Currently, ICU-provided collations can only be explicitly named collations. The global database locales are still always libc-provided. ICU support is enabled by configure --with-icu. Reviewed-by: Thomas Munro <[email protected]> Reviewed-by: Andreas Karlsson <[email protected]>
Diffstat (limited to 'src/backend/commands/collationcmds.c')
-rw-r--r--src/backend/commands/collationcmds.c288
1 files changed, 279 insertions, 9 deletions
diff --git a/src/backend/commands/collationcmds.c b/src/backend/commands/collationcmds.c
index 919cfc6a067..835cb263db3 100644
--- a/src/backend/commands/collationcmds.c
+++ b/src/backend/commands/collationcmds.c
@@ -14,15 +14,18 @@
*/
#include "postgres.h"
+#include "access/heapam.h"
#include "access/htup_details.h"
#include "access/xact.h"
#include "catalog/dependency.h"
#include "catalog/indexing.h"
#include "catalog/namespace.h"
+#include "catalog/objectaccess.h"
#include "catalog/pg_collation.h"
#include "catalog/pg_collation_fn.h"
#include "commands/alter.h"
#include "commands/collationcmds.h"
+#include "commands/comment.h"
#include "commands/dbcommands.h"
#include "commands/defrem.h"
#include "mb/pg_wchar.h"
@@ -33,6 +36,7 @@
#include "utils/rel.h"
#include "utils/syscache.h"
+
/*
* CREATE COLLATION
*/
@@ -47,8 +51,14 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
DefElem *localeEl = NULL;
DefElem *lccollateEl = NULL;
DefElem *lcctypeEl = NULL;
+ DefElem *providerEl = NULL;
+ DefElem *versionEl = NULL;
char *collcollate = NULL;
char *collctype = NULL;
+ char *collproviderstr = NULL;
+ int collencoding;
+ char collprovider = 0;
+ char *collversion = NULL;
Oid newoid;
ObjectAddress address;
@@ -72,6 +82,10 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
defelp = &lccollateEl;
else if (pg_strcasecmp(defel->defname, "lc_ctype") == 0)
defelp = &lcctypeEl;
+ else if (pg_strcasecmp(defel->defname, "provider") == 0)
+ defelp = &providerEl;
+ else if (pg_strcasecmp(defel->defname, "version") == 0)
+ defelp = &versionEl;
else
{
ereport(ERROR,
@@ -103,6 +117,7 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
collctype = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collctype));
+ collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
ReleaseSysCache(tp);
}
@@ -119,6 +134,27 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
if (lcctypeEl)
collctype = defGetString(lcctypeEl);
+ if (providerEl)
+ collproviderstr = defGetString(providerEl);
+
+ if (versionEl)
+ collversion = defGetString(versionEl);
+
+ if (collproviderstr)
+ {
+ if (pg_strcasecmp(collproviderstr, "icu") == 0)
+ collprovider = COLLPROVIDER_ICU;
+ else if (pg_strcasecmp(collproviderstr, "libc") == 0)
+ collprovider = COLLPROVIDER_LIBC;
+ else
+ ereport(ERROR,
+ (errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
+ errmsg("unrecognized collation provider: %s",
+ collproviderstr)));
+ }
+ else if (!fromEl)
+ collprovider = COLLPROVIDER_LIBC;
+
if (!collcollate)
ereport(ERROR,
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
@@ -129,14 +165,25 @@ DefineCollation(ParseState *pstate, List *names, List *parameters, bool if_not_e
(errcode(ERRCODE_INVALID_OBJECT_DEFINITION),
errmsg("parameter \"lc_ctype\" must be specified")));
- check_encoding_locale_matches(GetDatabaseEncoding(), collcollate, collctype);
+ if (collprovider == COLLPROVIDER_ICU)
+ collencoding = -1;
+ else
+ {
+ collencoding = GetDatabaseEncoding();
+ check_encoding_locale_matches(collencoding, collcollate, collctype);
+ }
+
+ if (!collversion)
+ collversion = get_collation_actual_version(collprovider, collcollate);
newoid = CollationCreate(collName,
collNamespace,
GetUserId(),
- GetDatabaseEncoding(),
+ collprovider,
+ collencoding,
collcollate,
collctype,
+ collversion,
if_not_exists);
if (!OidIsValid(newoid))
@@ -182,16 +229,118 @@ IsThereCollationInNamespace(const char *collname, Oid nspOid)
collname, get_namespace_name(nspOid))));
}
+/*
+ * ALTER COLLATION
+ */
+ObjectAddress
+AlterCollation(AlterCollationStmt *stmt)
+{
+ Relation rel;
+ Oid collOid;
+ HeapTuple tup;
+ Form_pg_collation collForm;
+ Datum collversion;
+ bool isnull;
+ char *oldversion;
+ char *newversion;
+ ObjectAddress address;
+
+ rel = heap_open(CollationRelationId, RowExclusiveLock);
+ collOid = get_collation_oid(stmt->collname, false);
+
+ if (!pg_collation_ownercheck(collOid, GetUserId()))
+ aclcheck_error(ACLCHECK_NOT_OWNER, ACL_KIND_COLLATION,
+ NameListToString(stmt->collname));
+
+ tup = SearchSysCacheCopy1(COLLOID, ObjectIdGetDatum(collOid));
+ if (!HeapTupleIsValid(tup))
+ elog(ERROR, "cache lookup failed for collation %u", collOid);
+
+ collForm = (Form_pg_collation) GETSTRUCT(tup);
+ collversion = SysCacheGetAttr(COLLOID, tup, Anum_pg_collation_collversion,
+ &isnull);
+ oldversion = isnull ? NULL : TextDatumGetCString(collversion);
+
+ newversion = get_collation_actual_version(collForm->collprovider, NameStr(collForm->collcollate));
+
+ /* cannot change from NULL to non-NULL or vice versa */
+ if ((!oldversion && newversion) || (oldversion && !newversion))
+ elog(ERROR, "invalid collation version change");
+ else if (oldversion && newversion && strcmp(newversion, oldversion) != 0)
+ {
+ bool nulls[Natts_pg_collation];
+ bool replaces[Natts_pg_collation];
+ Datum values[Natts_pg_collation];
+
+ ereport(NOTICE,
+ (errmsg("changing version from %s to %s",
+ oldversion, newversion)));
+
+ memset(values, 0, sizeof(values));
+ memset(nulls, false, sizeof(nulls));
+ memset(replaces, false, sizeof(replaces));
+
+ values[Anum_pg_collation_collversion - 1] = CStringGetTextDatum(newversion);
+ replaces[Anum_pg_collation_collversion - 1] = true;
+
+ tup = heap_modify_tuple(tup, RelationGetDescr(rel),
+ values, nulls, replaces);
+ }
+ else
+ ereport(NOTICE,
+ (errmsg("version has not changed")));
+
+ CatalogTupleUpdate(rel, &tup->t_self, tup);
+
+ InvokeObjectPostAlterHook(CollationRelationId, collOid, 0);
+
+ ObjectAddressSet(address, CollationRelationId, collOid);
+
+ heap_freetuple(tup);
+ heap_close(rel, NoLock);
+
+ return address;
+}
+
+
+Datum
+pg_collation_actual_version(PG_FUNCTION_ARGS)
+{
+ Oid collid = PG_GETARG_OID(0);
+ HeapTuple tp;
+ char *collcollate;
+ char collprovider;
+ char *version;
+
+ tp = SearchSysCache1(COLLOID, ObjectIdGetDatum(collid));
+ if (!HeapTupleIsValid(tp))
+ ereport(ERROR,
+ (errcode(ERRCODE_UNDEFINED_OBJECT),
+ errmsg("collation with OID %u does not exist", collid)));
+
+ collcollate = pstrdup(NameStr(((Form_pg_collation) GETSTRUCT(tp))->collcollate));
+ collprovider = ((Form_pg_collation) GETSTRUCT(tp))->collprovider;
+
+ ReleaseSysCache(tp);
+
+ version = get_collation_actual_version(collprovider, collcollate);
+
+ if (version)
+ PG_RETURN_TEXT_P(cstring_to_text(version));
+ else
+ PG_RETURN_NULL();
+}
+
/*
- * "Normalize" a locale name, stripping off encoding tags such as
+ * "Normalize" a libc locale name, stripping off encoding tags such as
* ".utf8" (e.g., "en_US.utf8" -> "en_US", but "br_FR.iso885915@euro"
* -> "br_FR@euro"). Return true if a new, different name was
* generated.
*/
pg_attribute_unused()
static bool
-normalize_locale_name(char *new, const char *old)
+normalize_libc_locale_name(char *new, const char *old)
{
char *n = new;
const char *o = old;
@@ -219,6 +368,46 @@ normalize_locale_name(char *new, const char *old)
}
+#ifdef USE_ICU
+static char *
+get_icu_language_tag(const char *localename)
+{
+ char buf[ULOC_FULLNAME_CAPACITY];
+ UErrorCode status;
+
+ status = U_ZERO_ERROR;
+ uloc_toLanguageTag(localename, buf, sizeof(buf), TRUE, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not convert locale name \"%s\" to language tag: %s",
+ localename, u_errorName(status))));
+
+ return pstrdup(buf);
+}
+
+
+static char *
+get_icu_locale_comment(const char *localename)
+{
+ UErrorCode status;
+ UChar displayname[128];
+ int32 len_uchar;
+ char *result;
+
+ status = U_ZERO_ERROR;
+ len_uchar = uloc_getDisplayName(localename, "en", &displayname[0], sizeof(displayname), &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could get display name for locale \"%s\": %s",
+ localename, u_errorName(status))));
+
+ icu_from_uchar(&result, displayname, len_uchar);
+
+ return result;
+}
+#endif /* USE_ICU */
+
+
Datum
pg_import_system_collations(PG_FUNCTION_ARGS)
{
@@ -302,8 +491,10 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
count++;
- CollationCreate(localebuf, nspid, GetUserId(), enc,
- localebuf, localebuf, if_not_exists);
+ CollationCreate(localebuf, nspid, GetUserId(), COLLPROVIDER_LIBC, enc,
+ localebuf, localebuf,
+ get_collation_actual_version(COLLPROVIDER_LIBC, localebuf),
+ if_not_exists);
CommandCounterIncrement();
@@ -316,7 +507,7 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
* "locale -a" output. So save up the aliases and try to add them
* after we've read all the output.
*/
- if (normalize_locale_name(alias, localebuf))
+ if (normalize_libc_locale_name(alias, localebuf))
{
aliaslist = lappend(aliaslist, pstrdup(alias));
localelist = lappend(localelist, pstrdup(localebuf));
@@ -333,8 +524,10 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
char *locale = (char *) lfirst(lcl);
int enc = lfirst_int(lce);
- CollationCreate(alias, nspid, GetUserId(), enc,
- locale, locale, true);
+ CollationCreate(alias, nspid, GetUserId(), COLLPROVIDER_LIBC, enc,
+ locale, locale,
+ get_collation_actual_version(COLLPROVIDER_LIBC, locale),
+ true);
CommandCounterIncrement();
}
@@ -343,5 +536,82 @@ pg_import_system_collations(PG_FUNCTION_ARGS)
(errmsg("no usable system locales were found")));
#endif /* not HAVE_LOCALE_T && not WIN32 */
+#ifdef USE_ICU
+ if (!is_encoding_supported_by_icu(GetDatabaseEncoding()))
+ {
+ ereport(NOTICE,
+ (errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
+ errmsg("encoding \"%s\" not supported by ICU",
+ pg_encoding_to_char(GetDatabaseEncoding()))));
+ }
+ else
+ {
+ int i;
+
+ /*
+ * Start the loop at -1 to sneak in the root locale without too much
+ * code duplication.
+ */
+ for (i = -1; i < ucol_countAvailable(); i++)
+ {
+ const char *name;
+ char *langtag;
+ const char *collcollate;
+ UEnumeration *en;
+ UErrorCode status;
+ const char *val;
+ Oid collid;
+
+ if (i == -1)
+ name = ""; /* ICU root locale */
+ else
+ name = ucol_getAvailable(i);
+
+ langtag = get_icu_language_tag(name);
+ collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : name;
+ collid = CollationCreate(psprintf("%s-x-icu", langtag),
+ nspid, GetUserId(), COLLPROVIDER_ICU, -1,
+ collcollate, collcollate,
+ get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
+ if_not_exists);
+
+ CreateComments(collid, CollationRelationId, 0,
+ get_icu_locale_comment(name));
+
+ /*
+ * Add keyword variants
+ */
+ status = U_ZERO_ERROR;
+ en = ucol_getKeywordValuesForLocale("collation", name, TRUE, &status);
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not get keyword values for locale \"%s\": %s",
+ name, u_errorName(status))));
+
+ status = U_ZERO_ERROR;
+ uenum_reset(en, &status);
+ while ((val = uenum_next(en, NULL, &status)))
+ {
+ char *localeid = psprintf("%s@collation=%s", name, val);
+
+ langtag = get_icu_language_tag(localeid);
+ collcollate = U_ICU_VERSION_MAJOR_NUM >= 54 ? langtag : localeid;
+ collid = CollationCreate(psprintf("%s-x-icu", langtag),
+ nspid, GetUserId(), COLLPROVIDER_ICU, -1,
+ collcollate, collcollate,
+ get_collation_actual_version(COLLPROVIDER_ICU, collcollate),
+ if_not_exists);
+ CreateComments(collid, CollationRelationId, 0,
+ get_icu_locale_comment(localeid));
+ }
+ if (U_FAILURE(status))
+ ereport(ERROR,
+ (errmsg("could not get keyword values for locale \"%s\": %s",
+ name, u_errorName(status))));
+ uenum_close(en);
+ }
+ }
+#endif
+
PG_RETURN_VOID();
}