summaryrefslogtreecommitdiff
path: root/test/prism/encoding_test.rb
diff options
context:
space:
mode:
authorKevin Menard <[email protected]>2024-01-24 16:39:06 -0500
committergit <[email protected]>2024-01-26 20:15:19 +0000
commit2a509787cb8869301b614139218432aef9b68f9b (patch)
tree93c7e278fd5124be8697581b1a53d78b923ffed7 /test/prism/encoding_test.rb
parent3d996e827f2ff74a1bb7e978d754cea7d957b9eb (diff)
[ruby/prism] Track whether a Symbol should have its encoding changed from the source encoding.
Ruby sets a Symbol literal's encoding to US-ASCII if the symbols consists only of US ASCII code points. Character escapes can also lead a Symbol to have a different encoding than its source's encoding. https://github.com/ruby/prism/commit/f315660b31
Diffstat (limited to 'test/prism/encoding_test.rb')
-rw-r--r--test/prism/encoding_test.rb98
1 files changed, 98 insertions, 0 deletions
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb
index e755cdaba2..f6caeeb769 100644
--- a/test/prism/encoding_test.rb
+++ b/test/prism/encoding_test.rb
@@ -148,6 +148,7 @@ module Prism
# encoding that prism supports.
escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
escapes = escapes.concat(escapes.product(escapes).map(&:join))
+ symbols = [:a, :ą, :+]
encodings.each_key do |encoding|
define_method(:"test_encoding_flags_#{encoding.name}") do
@@ -155,6 +156,18 @@ module Prism
end
end
+ encodings.each_key do |encoding|
+ define_method(:"test_symbol_encoding_flags_#{encoding.name}") do
+ assert_symbol_encoding_flags(encoding, symbols)
+ end
+ end
+
+ encodings.each_key do |encoding|
+ define_method(:"test_symbol_character_escape_encoding_flags_#{encoding.name}") do
+ assert_symbol_character_escape_encoding_flags(encoding, escapes)
+ end
+ end
+
def test_coding
result = Prism.parse("# coding: utf-8\n'string'")
actual = result.value.statements.body.first.unescaped.encoding
@@ -343,5 +356,90 @@ module Prism
assert_equal expected, actual
end
end
+
+ # Test Symbol literals without any interpolation or escape sequences.
+ def assert_symbol_encoding_flags(encoding, symbols)
+ symbols.each do |symbol|
+ source = "# encoding: #{encoding.name}\n#{symbol.inspect}"
+
+ expected =
+ begin
+ eval(source).encoding
+ rescue SyntaxError => error
+ unless error.message.include?("invalid multibyte char")
+ raise
+ end
+ end
+
+ actual =
+ Prism.parse(source).then do |result|
+ if result.success?
+ symbol = result.value.statements.body.first
+
+ if symbol.forced_utf8_encoding?
+ Encoding::UTF_8
+ elsif symbol.forced_binary_encoding?
+ Encoding::ASCII_8BIT
+ elsif symbol.forced_us_ascii_encoding?
+ Encoding::US_ASCII
+ else
+ encoding
+ end
+ else
+ error = result.errors.last
+
+ unless error.message.include?("invalid symbol")
+ raise error.message
+ end
+ end
+ end
+
+ assert_equal expected, actual
+ end
+ end
+
+ def assert_symbol_character_escape_encoding_flags(encoding, escapes)
+ escapes.each do |escaped|
+ source = "# encoding: #{encoding.name}\n:\"#{escaped}\""
+
+ expected =
+ begin
+ eval(source).encoding
+ rescue SyntaxError => error
+ if error.message.include?("UTF-8 mixed within")
+ error.message[/: (.+?)\n/, 1]
+ else
+ raise
+ end
+ end
+
+ actual =
+ Prism.parse(source).then do |result|
+ if result.success?
+ symbol = result.value.statements.body.first
+
+ if symbol.forced_utf8_encoding?
+ Encoding::UTF_8
+ elsif symbol.forced_binary_encoding?
+ Encoding::ASCII_8BIT
+ elsif symbol.forced_us_ascii_encoding?
+ Encoding::US_ASCII
+ else
+ encoding
+ end
+ else
+ error = result.errors.first
+
+ if error.message.include?("mixed")
+ error.message
+ else
+ raise error.message
+ end
+ end
+ end
+
+ assert_equal expected, actual
+ end
+ end
end
end