diff options
author | Kevin Menard <[email protected]> | 2024-01-24 16:39:06 -0500 |
---|---|---|
committer | git <[email protected]> | 2024-01-26 20:15:19 +0000 |
commit | 2a509787cb8869301b614139218432aef9b68f9b (patch) | |
tree | 93c7e278fd5124be8697581b1a53d78b923ffed7 /test/prism/encoding_test.rb | |
parent | 3d996e827f2ff74a1bb7e978d754cea7d957b9eb (diff) |
[ruby/prism] Track whether a Symbol should have its encoding changed from the source encoding.
Ruby sets a Symbol literal's encoding to US-ASCII if the symbols consists only of US ASCII code points. Character escapes can also lead a Symbol to have a different encoding than its source's encoding.
https://github.com/ruby/prism/commit/f315660b31
Diffstat (limited to 'test/prism/encoding_test.rb')
-rw-r--r-- | test/prism/encoding_test.rb | 98 |
1 files changed, 98 insertions, 0 deletions
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb index e755cdaba2..f6caeeb769 100644 --- a/test/prism/encoding_test.rb +++ b/test/prism/encoding_test.rb @@ -148,6 +148,7 @@ module Prism # encoding that prism supports. escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"] escapes = escapes.concat(escapes.product(escapes).map(&:join)) + symbols = [:a, :ą, :+] encodings.each_key do |encoding| define_method(:"test_encoding_flags_#{encoding.name}") do @@ -155,6 +156,18 @@ module Prism end end + encodings.each_key do |encoding| + define_method(:"test_symbol_encoding_flags_#{encoding.name}") do + assert_symbol_encoding_flags(encoding, symbols) + end + end + + encodings.each_key do |encoding| + define_method(:"test_symbol_character_escape_encoding_flags_#{encoding.name}") do + assert_symbol_character_escape_encoding_flags(encoding, escapes) + end + end + def test_coding result = Prism.parse("# coding: utf-8\n'string'") actual = result.value.statements.body.first.unescaped.encoding @@ -343,5 +356,90 @@ module Prism assert_equal expected, actual end end + + # Test Symbol literals without any interpolation or escape sequences. + def assert_symbol_encoding_flags(encoding, symbols) + symbols.each do |symbol| + source = "# encoding: #{encoding.name}\n#{symbol.inspect}" + + expected = + begin + eval(source).encoding + rescue SyntaxError => error + unless error.message.include?("invalid multibyte char") + raise + end + end + + actual = + Prism.parse(source).then do |result| + if result.success? + symbol = result.value.statements.body.first + + if symbol.forced_utf8_encoding? + Encoding::UTF_8 + elsif symbol.forced_binary_encoding? + Encoding::ASCII_8BIT + elsif symbol.forced_us_ascii_encoding? + Encoding::US_ASCII + else + encoding + end + else + error = result.errors.last + + unless error.message.include?("invalid symbol") + raise error.message + end + end + end + + assert_equal expected, actual + end + end + + def assert_symbol_character_escape_encoding_flags(encoding, escapes) + escapes.each do |escaped| + source = "# encoding: #{encoding.name}\n:\"#{escaped}\"" + + expected = + begin + eval(source).encoding + rescue SyntaxError => error + if error.message.include?("UTF-8 mixed within") + error.message[/: (.+?)\n/, 1] + else + raise + end + end + + actual = + Prism.parse(source).then do |result| + if result.success? + symbol = result.value.statements.body.first + + if symbol.forced_utf8_encoding? + Encoding::UTF_8 + elsif symbol.forced_binary_encoding? + Encoding::ASCII_8BIT + elsif symbol.forced_us_ascii_encoding? + Encoding::US_ASCII + else + encoding + end + else + error = result.errors.first + + if error.message.include?("mixed") + error.message + else + raise error.message + end + end + end + + assert_equal expected, actual + end + end end end |