1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
|
/*
* This test is for collations and character operations when using the
* builtin provider with the C.UTF-8 locale.
*/
/* skip test if not UTF8 server encoding */
SELECT getdatabaseencoding() <> 'UTF8' AS skip_test \gset
\if :skip_test
\quit
\endif
SET client_encoding TO UTF8;
--
-- Test PG_C_UTF8
--
CREATE COLLATION regress_pg_c_utf8 (
provider = builtin, locale = 'C_UTF8'); -- fails
ERROR: invalid locale name "C_UTF8" for builtin provider
CREATE COLLATION regress_pg_c_utf8 (
provider = builtin, locale = 'C.UTF8');
DROP COLLATION regress_pg_c_utf8;
CREATE COLLATION regress_pg_c_utf8 (
provider = builtin, locale = 'C.UTF-8');
CREATE TABLE test_pg_c_utf8 (
t TEXT COLLATE PG_C_UTF8
);
INSERT INTO test_pg_c_utf8 VALUES
('abc DEF 123abc'),
('ábc sßs ßss DÉF'),
('DŽxxDŽ džxxDž Džxxdž'),
('ȺȺȺ'),
('ⱥⱥⱥ'),
('ⱥȺ');
SELECT
t, lower(t), initcap(t), upper(t),
length(convert_to(t, 'UTF8')) AS t_bytes,
length(convert_to(lower(t), 'UTF8')) AS lower_t_bytes,
length(convert_to(initcap(t), 'UTF8')) AS initcap_t_bytes,
length(convert_to(upper(t), 'UTF8')) AS upper_t_bytes
FROM test_pg_c_utf8;
t | lower | initcap | upper | t_bytes | lower_t_bytes | initcap_t_bytes | upper_t_bytes
-----------------+-----------------+-----------------+-----------------+---------+---------------+-----------------+---------------
abc DEF 123abc | abc def 123abc | Abc Def 123abc | ABC DEF 123ABC | 14 | 14 | 14 | 14
ábc sßs ßss DÉF | ábc sßs ßss déf | Ábc Sßs ßss Déf | ÁBC SßS ßSS DÉF | 19 | 19 | 19 | 19
DŽxxDŽ džxxDž Džxxdž | džxxdž džxxdž džxxdž | DŽxxdž DŽxxdž DŽxxdž | DŽXXDŽ DŽXXDŽ DŽXXDŽ | 20 | 20 | 20 | 20
ȺȺȺ | ⱥⱥⱥ | Ⱥⱥⱥ | ȺȺȺ | 6 | 9 | 8 | 6
ⱥⱥⱥ | ⱥⱥⱥ | Ⱥⱥⱥ | ȺȺȺ | 9 | 9 | 8 | 6
ⱥȺ | ⱥⱥ | Ⱥⱥ | ȺȺ | 5 | 6 | 5 | 4
(6 rows)
DROP TABLE test_pg_c_utf8;
-- negative test: Final_Sigma not used for builtin locale C.UTF-8
SELECT lower('ΑΣ' COLLATE PG_C_UTF8);
lower
-------
ασ
(1 row)
SELECT lower('ΑͺΣͺ' COLLATE PG_C_UTF8);
lower
-------
αͺσͺ
(1 row)
SELECT lower('Α΄Σ΄' COLLATE PG_C_UTF8);
lower
-------
α΄σ΄
(1 row)
-- properties
SELECT 'xyz' ~ '[[:alnum:]]' COLLATE PG_C_UTF8;
?column?
----------
t
(1 row)
SELECT 'xyz' !~ '[[:upper:]]' COLLATE PG_C_UTF8;
?column?
----------
t
(1 row)
SELECT '@' !~ '[[:alnum:]]' COLLATE PG_C_UTF8;
?column?
----------
t
(1 row)
SELECT '=' ~ '[[:punct:]]' COLLATE PG_C_UTF8; -- symbols are punctuation in posix
?column?
----------
t
(1 row)
SELECT 'a8a' ~ '[[:digit:]]' COLLATE PG_C_UTF8;
?column?
----------
t
(1 row)
SELECT '൧' !~ '\d' COLLATE PG_C_UTF8; -- only 0-9 considered digits in posix
?column?
----------
t
(1 row)
-- case mapping
SELECT 'xYz' ~* 'XyZ' COLLATE PG_C_UTF8;
?column?
----------
t
(1 row)
SELECT 'xAb' ~* '[W-Y]' COLLATE PG_C_UTF8;
?column?
----------
t
(1 row)
SELECT 'xAb' !~* '[c-d]' COLLATE PG_C_UTF8;
?column?
----------
t
(1 row)
SELECT 'Δ' ~* '[γ-λ]' COLLATE PG_C_UTF8;
?column?
----------
t
(1 row)
SELECT 'δ' ~* '[Γ-Λ]' COLLATE PG_C_UTF8; -- same as above with cases reversed
?column?
----------
t
(1 row)
|