summaryrefslogtreecommitdiff
path: root/src/test/regress/sql/conversion.sql
diff options
context:
space:
mode:
Diffstat (limited to 'src/test/regress/sql/conversion.sql')
-rw-r--r--src/test/regress/sql/conversion.sql133
1 files changed, 133 insertions, 0 deletions
diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql
index 5576999e42e..e178e2479b0 100644
--- a/src/test/regress/sql/conversion.sql
+++ b/src/test/regress/sql/conversion.sql
@@ -74,6 +74,139 @@ $$;
--
-- UTF-8
--
+-- The description column must be unique.
+CREATE TABLE utf8_verification_inputs (inbytes bytea, description text PRIMARY KEY);
+insert into utf8_verification_inputs values
+ ('\x66006f', 'NUL byte'),
+ ('\xaf', 'bare continuation'),
+ ('\xc5', 'missing second byte in 2-byte char'),
+ ('\xc080', 'smallest 2-byte overlong'),
+ ('\xc1bf', 'largest 2-byte overlong'),
+ ('\xc280', 'next 2-byte after overlongs'),
+ ('\xdfbf', 'largest 2-byte'),
+ ('\xe9af', 'missing third byte in 3-byte char'),
+ ('\xe08080', 'smallest 3-byte overlong'),
+ ('\xe09fbf', 'largest 3-byte overlong'),
+ ('\xe0a080', 'next 3-byte after overlong'),
+ ('\xed9fbf', 'last before surrogates'),
+ ('\xeda080', 'smallest surrogate'),
+ ('\xedbfbf', 'largest surrogate'),
+ ('\xee8080', 'next after surrogates'),
+ ('\xefbfbf', 'largest 3-byte'),
+ ('\xf1afbf', 'missing fourth byte in 4-byte char'),
+ ('\xf0808080', 'smallest 4-byte overlong'),
+ ('\xf08fbfbf', 'largest 4-byte overlong'),
+ ('\xf0908080', 'next 4-byte after overlong'),
+ ('\xf48fbfbf', 'largest 4-byte'),
+ ('\xf4908080', 'smallest too large'),
+ ('\xfa9a9a8a8a', '5-byte');
+
+-- Test UTF-8 verification slow path
+select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_verification_inputs;
+
+-- Test UTF-8 verification with ASCII padding appended to provide
+-- coverage for algorithms that work on multiple bytes at a time.
+-- The error message for a sequence starting with a 4-byte lead
+-- will contain all 4 bytes if they are present, so various
+-- expressions below add 3 ASCII bytes to the end to ensure
+-- consistent error messages.
+-- The number 64 below needs to be at least the value of STRIDE_LENGTH in wchar.c.
+
+-- Test multibyte verification in fast path
+with test_bytes as (
+ select
+ inbytes,
+ description,
+ (test_conv(inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error
+ from utf8_verification_inputs
+), test_padded as (
+ select
+ description,
+ (test_conv(inbytes || repeat('.', 64)::bytea, 'utf8', 'utf8')).error
+ from test_bytes
+)
+select
+ description,
+ b.error as orig_error,
+ p.error as error_after_padding
+from test_padded p
+join test_bytes b
+using (description)
+where p.error is distinct from b.error
+order by description;
+
+-- Test ASCII verification in fast path where incomplete
+-- UTF-8 sequences fall at the end of the preceding chunk.
+with test_bytes as (
+ select
+ inbytes,
+ description,
+ (test_conv(inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error
+ from utf8_verification_inputs
+), test_padded as (
+ select
+ description,
+ (test_conv(repeat('.', 64 - length(inbytes))::bytea || inbytes || repeat('.', 64)::bytea, 'utf8', 'utf8')).error
+ from test_bytes
+)
+select
+ description,
+ b.error as orig_error,
+ p.error as error_after_padding
+from test_padded p
+join test_bytes b
+using (description)
+where p.error is distinct from b.error
+order by description;
+
+-- Test cases where UTF-8 sequences within short text
+-- come after the fast path returns.
+with test_bytes as (
+ select
+ inbytes,
+ description,
+ (test_conv(inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error
+ from utf8_verification_inputs
+), test_padded as (
+ select
+ description,
+ (test_conv(repeat('.', 64)::bytea || inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error
+ from test_bytes
+)
+select
+ description,
+ b.error as orig_error,
+ p.error as error_after_padding
+from test_padded p
+join test_bytes b
+using (description)
+where p.error is distinct from b.error
+order by description;
+
+-- Test cases where incomplete UTF-8 sequences fall at the
+-- end of the part checked by the fast path.
+with test_bytes as (
+ select
+ inbytes,
+ description,
+ (test_conv(inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error
+ from utf8_verification_inputs
+), test_padded as (
+ select
+ description,
+ (test_conv(repeat('.', 64 - length(inbytes))::bytea || inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error
+ from test_bytes
+)
+select
+ description,
+ b.error as orig_error,
+ p.error as error_after_padding
+from test_padded p
+join test_bytes b
+using (description)
+where p.error is distinct from b.error
+order by description;
+
CREATE TABLE utf8_inputs (inbytes bytea, description text);
insert into utf8_inputs values
('\x666f6f', 'valid, pure ASCII'),