summaryrefslogtreecommitdiff
path: root/src/backend/regex
diff options
context:
space:
mode:
authorJeff Davis2025-01-17 23:56:30 +0000
committerJeff Davis2025-01-17 23:56:30 +0000
commitd3d0983169130a9b81e3fe48d5c2ca4931480956 (patch)
tree75e680ff03b4af3fd21a36be49515367133e6d02 /src/backend/regex
parent286a365b9c25479f8ad82043ed136748733adfa6 (diff)
Support PG_UNICODE_FAST locale in the builtin collation provider.
The PG_UNICODE_FAST locale uses code point sort order (fast, memcmp-based) combined with Unicode character semantics. The character semantics are based on Unicode full case mapping. Full case mapping can map a single codepoint to multiple codepoints, such as "ß" uppercasing to "SS". Additionally, it handles context-sensitive mappings like the "final sigma", and it uses titlecase mappings such as "Dž" when titlecasing (rather than plain uppercase mappings). Importantly, the uppercasing of "ß" as "SS" is specifically mentioned by the SQL standard. In Postgres, UCS_BASIC uses plain ASCII semantics for case mapping and pattern matching, so if we changed it to use the PG_UNICODE_FAST locale, it would offer better compliance with the standard. For now, though, do not change the behavior of UCS_BASIC. Discussion: https://postgr.es/m/[email protected] Discussion: https://postgr.es/m/[email protected] Reviewed-by: Peter Eisentraut, Daniel Verite
Diffstat (limited to 'src/backend/regex')
-rw-r--r--src/backend/regex/regc_pg_locale.c6
1 files changed, 3 insertions, 3 deletions
diff --git a/src/backend/regex/regc_pg_locale.c b/src/backend/regex/regc_pg_locale.c
index 2360d08efae..ed7411df83d 100644
--- a/src/backend/regex/regc_pg_locale.c
+++ b/src/backend/regex/regc_pg_locale.c
@@ -307,7 +307,7 @@ pg_wc_isdigit(pg_wchar c)
return (c <= (pg_wchar) 127 &&
(pg_char_properties[c] & PG_ISDIGIT));
case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isdigit(c, true);
+ return pg_u_isdigit(c, !pg_regex_locale->info.builtin.casemap_full);
case PG_REGEX_STRATEGY_LIBC_WIDE:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswdigit_l((wint_t) c, pg_regex_locale->info.lt);
@@ -361,7 +361,7 @@ pg_wc_isalnum(pg_wchar c)
return (c <= (pg_wchar) 127 &&
(pg_char_properties[c] & PG_ISALNUM));
case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_isalnum(c, true);
+ return pg_u_isalnum(c, !pg_regex_locale->info.builtin.casemap_full);
case PG_REGEX_STRATEGY_LIBC_WIDE:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswalnum_l((wint_t) c, pg_regex_locale->info.lt);
@@ -505,7 +505,7 @@ pg_wc_ispunct(pg_wchar c)
return (c <= (pg_wchar) 127 &&
(pg_char_properties[c] & PG_ISPUNCT));
case PG_REGEX_STRATEGY_BUILTIN:
- return pg_u_ispunct(c, true);
+ return pg_u_ispunct(c, !pg_regex_locale->info.builtin.casemap_full);
case PG_REGEX_STRATEGY_LIBC_WIDE:
if (sizeof(wchar_t) >= 4 || c <= (pg_wchar) 0xFFFF)
return iswpunct_l((wint_t) c, pg_regex_locale->info.lt);