diff options
Diffstat (limited to 'src/test/regress/sql/conversion.sql')
-rw-r--r-- | src/test/regress/sql/conversion.sql | 133 |
1 files changed, 133 insertions, 0 deletions
diff --git a/src/test/regress/sql/conversion.sql b/src/test/regress/sql/conversion.sql index 5576999e42e..e178e2479b0 100644 --- a/src/test/regress/sql/conversion.sql +++ b/src/test/regress/sql/conversion.sql @@ -74,6 +74,139 @@ $$; -- -- UTF-8 -- +-- The description column must be unique. +CREATE TABLE utf8_verification_inputs (inbytes bytea, description text PRIMARY KEY); +insert into utf8_verification_inputs values + ('\x66006f', 'NUL byte'), + ('\xaf', 'bare continuation'), + ('\xc5', 'missing second byte in 2-byte char'), + ('\xc080', 'smallest 2-byte overlong'), + ('\xc1bf', 'largest 2-byte overlong'), + ('\xc280', 'next 2-byte after overlongs'), + ('\xdfbf', 'largest 2-byte'), + ('\xe9af', 'missing third byte in 3-byte char'), + ('\xe08080', 'smallest 3-byte overlong'), + ('\xe09fbf', 'largest 3-byte overlong'), + ('\xe0a080', 'next 3-byte after overlong'), + ('\xed9fbf', 'last before surrogates'), + ('\xeda080', 'smallest surrogate'), + ('\xedbfbf', 'largest surrogate'), + ('\xee8080', 'next after surrogates'), + ('\xefbfbf', 'largest 3-byte'), + ('\xf1afbf', 'missing fourth byte in 4-byte char'), + ('\xf0808080', 'smallest 4-byte overlong'), + ('\xf08fbfbf', 'largest 4-byte overlong'), + ('\xf0908080', 'next 4-byte after overlong'), + ('\xf48fbfbf', 'largest 4-byte'), + ('\xf4908080', 'smallest too large'), + ('\xfa9a9a8a8a', '5-byte'); + +-- Test UTF-8 verification slow path +select description, (test_conv(inbytes, 'utf8', 'utf8')).* from utf8_verification_inputs; + +-- Test UTF-8 verification with ASCII padding appended to provide +-- coverage for algorithms that work on multiple bytes at a time. +-- The error message for a sequence starting with a 4-byte lead +-- will contain all 4 bytes if they are present, so various +-- expressions below add 3 ASCII bytes to the end to ensure +-- consistent error messages. +-- The number 64 below needs to be at least the value of STRIDE_LENGTH in wchar.c. + +-- Test multibyte verification in fast path +with test_bytes as ( + select + inbytes, + description, + (test_conv(inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error + from utf8_verification_inputs +), test_padded as ( + select + description, + (test_conv(inbytes || repeat('.', 64)::bytea, 'utf8', 'utf8')).error + from test_bytes +) +select + description, + b.error as orig_error, + p.error as error_after_padding +from test_padded p +join test_bytes b +using (description) +where p.error is distinct from b.error +order by description; + +-- Test ASCII verification in fast path where incomplete +-- UTF-8 sequences fall at the end of the preceding chunk. +with test_bytes as ( + select + inbytes, + description, + (test_conv(inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error + from utf8_verification_inputs +), test_padded as ( + select + description, + (test_conv(repeat('.', 64 - length(inbytes))::bytea || inbytes || repeat('.', 64)::bytea, 'utf8', 'utf8')).error + from test_bytes +) +select + description, + b.error as orig_error, + p.error as error_after_padding +from test_padded p +join test_bytes b +using (description) +where p.error is distinct from b.error +order by description; + +-- Test cases where UTF-8 sequences within short text +-- come after the fast path returns. +with test_bytes as ( + select + inbytes, + description, + (test_conv(inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error + from utf8_verification_inputs +), test_padded as ( + select + description, + (test_conv(repeat('.', 64)::bytea || inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error + from test_bytes +) +select + description, + b.error as orig_error, + p.error as error_after_padding +from test_padded p +join test_bytes b +using (description) +where p.error is distinct from b.error +order by description; + +-- Test cases where incomplete UTF-8 sequences fall at the +-- end of the part checked by the fast path. +with test_bytes as ( + select + inbytes, + description, + (test_conv(inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error + from utf8_verification_inputs +), test_padded as ( + select + description, + (test_conv(repeat('.', 64 - length(inbytes))::bytea || inbytes || repeat('.', 3)::bytea, 'utf8', 'utf8')).error + from test_bytes +) +select + description, + b.error as orig_error, + p.error as error_after_padding +from test_padded p +join test_bytes b +using (description) +where p.error is distinct from b.error +order by description; + CREATE TABLE utf8_inputs (inbytes bytea, description text); insert into utf8_inputs values ('\x666f6f', 'valid, pure ASCII'), |