summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKevin Newton <[email protected]>2024-05-29 10:12:51 -0400
committerKevin Newton <[email protected]>2024-05-30 15:18:20 -0400
commit72452f43871b8034bfa718ed823bc62b5b81d6f9 (patch)
tree1bd87295dfcf10d20bce7f6d18184644bb079387
parent1ab7c412d2e3880a7ad233c32e93961888f8145c (diff)
[ruby/prism] Tests overhaul
https://github.com/ruby/prism/commit/6f886be0a4
-rw-r--r--lib/prism/ffi.rb25
-rw-r--r--lib/prism/translation/ruby_parser.rb4
-rw-r--r--test/prism/api/command_line_test.rb (renamed from test/prism/command_line_test.rb)10
-rw-r--r--test/prism/api/dump_test.rb56
-rw-r--r--test/prism/api/parse_comments_test.rb (renamed from test/prism/parse_comments_test.rb)14
-rw-r--r--test/prism/api/parse_stream_test.rb (renamed from test/prism/parse_stream_test.rb)11
-rw-r--r--test/prism/api/parse_success_test.rb16
-rw-r--r--test/prism/api/parse_test.rb66
-rw-r--r--test/prism/bom_test.rb2
-rw-r--r--test/prism/encoding/encodings_test.rb101
-rw-r--r--test/prism/encoding/regular_expression_encoding_test.rb131
-rw-r--r--test/prism/encoding/string_encoding_test.rb136
-rw-r--r--test/prism/encoding/symbol_encoding_test.rb108
-rw-r--r--test/prism/encoding_test.rb577
-rw-r--r--test/prism/errors_test.rb3
-rw-r--r--test/prism/fixtures_test.rb21
-rw-r--r--test/prism/fuzzer_test.rb10
-rw-r--r--test/prism/heredoc_dedent_test.rb133
-rw-r--r--test/prism/lex_test.rb90
-rw-r--r--test/prism/library_symbols_test.rb2
-rw-r--r--test/prism/locals_test.rb66
-rw-r--r--test/prism/magic_comment_test.rb121
-rw-r--r--test/prism/newline_offsets_test.rb22
-rw-r--r--test/prism/newline_test.rb27
-rw-r--r--test/prism/parse_test.rb371
-rw-r--r--test/prism/parser_test.rb186
-rw-r--r--test/prism/regexp_test.rb6
-rw-r--r--test/prism/result/attribute_write_test.rb (renamed from test/prism/attribute_write_test.rb)10
-rw-r--r--test/prism/result/comments_test.rb (renamed from test/prism/comments_test.rb)2
-rw-r--r--test/prism/result/constant_path_node_test.rb (renamed from test/prism/constant_path_node_test.rb)16
-rw-r--r--test/prism/result/equality_test.rb22
-rw-r--r--test/prism/result/heredoc_test.rb19
-rw-r--r--test/prism/result/index_write_test.rb (renamed from test/prism/index_write_test.rb)2
-rw-r--r--test/prism/result/integer_base_flags_test.rb33
-rw-r--r--test/prism/result/integer_parse_test.rb (renamed from test/prism/integer_parse_test.rb)4
-rw-r--r--test/prism/result/numeric_value_test.rb21
-rw-r--r--test/prism/result/overlap_test.rb43
-rw-r--r--test/prism/result/redundant_return_test.rb (renamed from test/prism/redundant_return_test.rb)2
-rw-r--r--test/prism/result/regular_expression_options_test.rb25
-rw-r--r--test/prism/result/source_location_test.rb (renamed from test/prism/location_test.rb)6
-rw-r--r--test/prism/result/static_inspect_test.rb (renamed from test/prism/static_inspect_test.rb)2
-rw-r--r--test/prism/result/static_literals_test.rb (renamed from test/prism/static_literals_test.rb)2
-rw-r--r--test/prism/result/warnings_test.rb (renamed from test/prism/warnings_test.rb)3
-rw-r--r--test/prism/ruby/compiler_test.rb (renamed from test/prism/compiler_test.rb)2
-rw-r--r--test/prism/ruby/desugar_compiler_test.rb (renamed from test/prism/desugar_compiler_test.rb)2
-rw-r--r--test/prism/ruby/dispatcher_test.rb (renamed from test/prism/dispatcher_test.rb)2
-rw-r--r--test/prism/ruby/location_test.rb173
-rw-r--r--test/prism/ruby/parameters_signature_test.rb (renamed from test/prism/parameters_signature_test.rb)20
-rw-r--r--test/prism/ruby/parser_test.rb288
-rw-r--r--test/prism/ruby/pattern_test.rb (renamed from test/prism/pattern_test.rb)2
-rw-r--r--test/prism/ruby/reflection_test.rb (renamed from test/prism/reflection_test.rb)2
-rw-r--r--test/prism/ruby/ripper_test.rb (renamed from test/prism/ripper_test.rb)33
-rw-r--r--test/prism/ruby/ruby_parser_test.rb127
-rw-r--r--test/prism/ruby/tunnel_test.rb26
-rw-r--r--test/prism/ruby_api_test.rb307
-rw-r--r--test/prism/ruby_parser_test.rb135
-rw-r--r--test/prism/snapshots_test.rb73
-rw-r--r--test/prism/snippets_test.rb42
-rw-r--r--test/prism/test_helper.rb213
-rw-r--r--test/prism/unescape_test.rb4
60 files changed, 2201 insertions, 1777 deletions
diff --git a/lib/prism/ffi.rb b/lib/prism/ffi.rb
index df7e183310..6b48af43cc 100644
--- a/lib/prism/ffi.rb
+++ b/lib/prism/ffi.rb
@@ -200,8 +200,8 @@ module Prism
class << self
# Mirror the Prism.dump API by using the serialization API.
- def dump(code, **options)
- LibRubyParser::PrismString.with_string(code) { |string| dump_common(string, options) }
+ def dump(source, **options)
+ LibRubyParser::PrismString.with_string(source) { |string| dump_common(string, options) }
end
# Mirror the Prism.dump_file API by using the serialization API.
@@ -302,6 +302,27 @@ module Prism
!parse_file_success?(filepath, **options)
end
+ # Mirror the Prism.profile API by using the serialization API.
+ def profile(source, **options)
+ LibRubyParser::PrismString.with_string(source) do |string|
+ LibRubyParser::PrismBuffer.with do |buffer|
+ LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
+ nil
+ end
+ end
+ end
+
+ # Mirror the Prism.profile_file API by using the serialization API.
+ def profile_file(filepath, **options)
+ LibRubyParser::PrismString.with_file(filepath) do |string|
+ LibRubyParser::PrismBuffer.with do |buffer|
+ options[:filepath] = filepath
+ LibRubyParser.pm_serialize_parse(buffer.pointer, string.pointer, string.length, dump_options(options))
+ nil
+ end
+ end
+ end
+
private
def dump_common(string, options) # :nodoc:
diff --git a/lib/prism/translation/ruby_parser.rb b/lib/prism/translation/ruby_parser.rb
index ec458a3b63..38690c54b3 100644
--- a/lib/prism/translation/ruby_parser.rb
+++ b/lib/prism/translation/ruby_parser.rb
@@ -485,9 +485,9 @@ module Prism
def visit_constant_path_target_node(node)
inner =
if node.parent.nil?
- s(node, :colon3, node.child.name)
+ s(node, :colon3, node.name)
else
- s(node, :colon2, visit(node.parent), node.child.name)
+ s(node, :colon2, visit(node.parent), node.name)
end
s(node, :const, inner)
diff --git a/test/prism/command_line_test.rb b/test/prism/api/command_line_test.rb
index 4b04c36f3a..a313845ead 100644
--- a/test/prism/command_line_test.rb
+++ b/test/prism/api/command_line_test.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class CommandLineTest < TestCase
@@ -67,7 +67,7 @@ module Prism
end
def test_command_line_x_implicit
- result = Prism.parse(<<~RUBY)
+ result = Prism.parse_statement(<<~RUBY)
#!/bin/bash
exit 1
@@ -75,18 +75,18 @@ module Prism
1
RUBY
- assert_kind_of IntegerNode, result.value.statements.body.first
+ assert_kind_of IntegerNode, result
end
def test_command_line_x_explicit
- result = Prism.parse(<<~RUBY, command_line: "x")
+ result = Prism.parse_statement(<<~RUBY, command_line: "x")
exit 1
#!/usr/bin/env ruby
1
RUBY
- assert_kind_of IntegerNode, result.value.statements.body.first
+ assert_kind_of IntegerNode, result
end
def test_command_line_x_implicit_fail
diff --git a/test/prism/api/dump_test.rb b/test/prism/api/dump_test.rb
new file mode 100644
index 0000000000..941088e159
--- /dev/null
+++ b/test/prism/api/dump_test.rb
@@ -0,0 +1,56 @@
+# frozen_string_literal: true
+
+return if ENV["PRISM_BUILD_MINIMAL"]
+
+require_relative "../test_helper"
+
+module Prism
+ class DumpTest < TestCase
+ Fixture.each do |fixture|
+ define_method(fixture.test_name) { assert_dump(fixture) }
+ end
+
+ def test_dump
+ filepath = __FILE__
+ source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
+
+ assert_equal Prism.lex(source, filepath: filepath).value, Prism.lex_file(filepath).value
+ assert_equal Prism.dump(source, filepath: filepath), Prism.dump_file(filepath)
+
+ serialized = Prism.dump(source, filepath: filepath)
+ ast1 = Prism.load(source, serialized).value
+ ast2 = Prism.parse(source, filepath: filepath).value
+ ast3 = Prism.parse_file(filepath).value
+
+ assert_equal_nodes ast1, ast2
+ assert_equal_nodes ast2, ast3
+ end
+
+ def test_dump_file
+ assert_nothing_raised do
+ Prism.dump_file(__FILE__)
+ end
+
+ error = assert_raise Errno::ENOENT do
+ Prism.dump_file("idontexist.rb")
+ end
+
+ assert_equal "No such file or directory - idontexist.rb", error.message
+
+ assert_raise TypeError do
+ Prism.dump_file(nil)
+ end
+ end
+
+ private
+
+ def assert_dump(fixture)
+ source = fixture.read
+
+ result = Prism.parse(source, filepath: fixture.path)
+ dumped = Prism.dump(source, filepath: fixture.path)
+
+ assert_equal_nodes(result.value, Prism.load(source, dumped).value)
+ end
+ end
+end
diff --git a/test/prism/parse_comments_test.rb b/test/prism/api/parse_comments_test.rb
index 30086e3155..4dbcca1827 100644
--- a/test/prism/parse_comments_test.rb
+++ b/test/prism/api/parse_comments_test.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class ParseCommentsTest < TestCase
@@ -17,5 +17,17 @@ module Prism
assert_kind_of Array, comments
assert_equal 1, comments.length
end
+
+ def test_parse_file_comments_error
+ error = assert_raise Errno::ENOENT do
+ Prism.parse_file_comments("idontexist.rb")
+ end
+
+ assert_equal "No such file or directory - idontexist.rb", error.message
+
+ assert_raise TypeError do
+ Prism.parse_file_comments(nil)
+ end
+ end
end
end
diff --git a/test/prism/parse_stream_test.rb b/test/prism/api/parse_stream_test.rb
index 9e6347b92b..0edee74cc2 100644
--- a/test/prism/parse_stream_test.rb
+++ b/test/prism/api/parse_stream_test.rb
@@ -1,7 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
-require "stringio"
+require_relative "../test_helper"
module Prism
class ParseStreamTest < TestCase
@@ -10,7 +9,7 @@ module Prism
result = Prism.parse_stream(io)
assert result.success?
- assert_kind_of Prism::CallNode, result.value.statements.body.first
+ assert_kind_of Prism::CallNode, result.statement
end
def test_multi_line
@@ -18,8 +17,8 @@ module Prism
result = Prism.parse_stream(io)
assert result.success?
- assert_kind_of Prism::CallNode, result.value.statements.body.first
- assert_kind_of Prism::CallNode, result.value.statements.body.last
+ assert_kind_of Prism::CallNode, result.statement
+ assert_kind_of Prism::CallNode, result.statement
end
def test_multi_read
@@ -27,7 +26,7 @@ module Prism
result = Prism.parse_stream(io)
assert result.success?
- assert_kind_of Prism::CallNode, result.value.statements.body.first
+ assert_kind_of Prism::CallNode, result.statement
end
def test___END__
diff --git a/test/prism/api/parse_success_test.rb b/test/prism/api/parse_success_test.rb
new file mode 100644
index 0000000000..2caaa5136e
--- /dev/null
+++ b/test/prism/api/parse_success_test.rb
@@ -0,0 +1,16 @@
+# frozen_string_literal: true
+
+require_relative "../test_helper"
+
+module Prism
+ class ParseSuccessTest < TestCase
+ def test_parse_success?
+ assert Prism.parse_success?("1")
+ refute Prism.parse_success?("<>")
+ end
+
+ def test_parse_file_success?
+ assert Prism.parse_file_success?(__FILE__)
+ end
+ end
+end
diff --git a/test/prism/api/parse_test.rb b/test/prism/api/parse_test.rb
new file mode 100644
index 0000000000..864d38461a
--- /dev/null
+++ b/test/prism/api/parse_test.rb
@@ -0,0 +1,66 @@
+# frozen_string_literal: true
+
+require_relative "../test_helper"
+
+module Prism
+ class ParseTest < TestCase
+ def test_parse_empty_string
+ result = Prism.parse("")
+ assert_equal [], result.value.statements.body
+ end
+
+ def test_parse_takes_file_path
+ filepath = "filepath.rb"
+ result = Prism.parse("def foo; __FILE__; end", filepath: filepath)
+
+ assert_equal filepath, find_source_file_node(result.value).filepath
+ end
+
+ def test_parse_takes_line
+ line = 4
+ result = Prism.parse("def foo\n __FILE__\nend", line: line)
+
+ assert_equal line, result.value.location.start_line
+ assert_equal line + 1, find_source_file_node(result.value).location.start_line
+
+ result = Prism.parse_lex("def foo\n __FILE__\nend", line: line)
+ assert_equal line, result.value.first.location.start_line
+ end
+
+ def test_parse_takes_negative_lines
+ line = -2
+ result = Prism.parse("def foo\n __FILE__\nend", line: line)
+
+ assert_equal line, result.value.location.start_line
+ assert_equal line + 1, find_source_file_node(result.value).location.start_line
+
+ result = Prism.parse_lex("def foo\n __FILE__\nend", line: line)
+ assert_equal line, result.value.first.location.start_line
+ end
+
+ def test_parse_file
+ node = Prism.parse_file(__FILE__).value
+ assert_kind_of ProgramNode, node
+
+ error = assert_raise Errno::ENOENT do
+ Prism.parse_file("idontexist.rb")
+ end
+
+ assert_equal "No such file or directory - idontexist.rb", error.message
+
+ assert_raise TypeError do
+ Prism.parse_file(nil)
+ end
+ end
+
+ private
+
+ def find_source_file_node(program)
+ queue = [program]
+ while (node = queue.shift)
+ return node if node.is_a?(SourceFileNode)
+ queue.concat(node.compact_child_nodes)
+ end
+ end
+ end
+end
diff --git a/test/prism/bom_test.rb b/test/prism/bom_test.rb
index 1525caf458..890bc4b36c 100644
--- a/test/prism/bom_test.rb
+++ b/test/prism/bom_test.rb
@@ -2,7 +2,7 @@
# Don't bother checking this on these engines, this is such a specific Ripper
# test.
-return if RUBY_ENGINE == "jruby" || RUBY_ENGINE == "truffleruby"
+return if RUBY_ENGINE != "ruby"
require_relative "test_helper"
diff --git a/test/prism/encoding/encodings_test.rb b/test/prism/encoding/encodings_test.rb
new file mode 100644
index 0000000000..4ad2b465cc
--- /dev/null
+++ b/test/prism/encoding/encodings_test.rb
@@ -0,0 +1,101 @@
+# frozen_string_literal: true
+
+return if RUBY_ENGINE != "ruby"
+
+require_relative "../test_helper"
+
+module Prism
+ class EncodingsTest < TestCase
+ class ConstantContext < BasicObject
+ def self.const_missing(const)
+ const
+ end
+ end
+
+ class IdentifierContext < BasicObject
+ def method_missing(name, *)
+ name
+ end
+ end
+
+ # These test that we're correctly parsing codepoints for each alias of each
+ # encoding that prism supports.
+ each_encoding do |encoding, range|
+ (encoding.names - %w[external internal filesystem locale]).each do |name|
+ define_method(:"test_encoding_#{name}") do
+ assert_encoding(encoding, name, range)
+ end
+ end
+ end
+
+ private
+
+ def assert_encoding_constant(name, character)
+ source = "# encoding: #{name}\n#{character}"
+ expected = ConstantContext.new.instance_eval(source)
+
+ result = Prism.parse(source)
+ assert result.success?
+
+ actual = result.value.statements.body.last
+ assert_kind_of ConstantReadNode, actual
+ assert_equal expected, actual.name
+ end
+
+ def assert_encoding_identifier(name, character)
+ source = "# encoding: #{name}\n#{character}"
+ expected = IdentifierContext.new.instance_eval(source)
+
+ result = Prism.parse(source)
+ assert result.success?
+
+ actual = result.value.statements.body.last
+ assert_kind_of CallNode, actual
+ assert_equal expected, actual.name
+ end
+
+ # Check that we can properly parse every codepoint in the given encoding.
+ def assert_encoding(encoding, name, range)
+ # I'm not entirely sure, but I believe these codepoints are incorrect in
+ # their parsing in CRuby. They all report as matching `[[:lower:]]` but
+ # then they are parsed as constants. This is because CRuby determines if
+ # an identifier is a constant or not by case folding it down to lowercase
+ # and checking if there is a difference. And even though they report
+ # themselves as lowercase, their case fold is different. I have reported
+ # this bug upstream.
+ case encoding
+ when Encoding::UTF_8, Encoding::UTF_8_MAC, Encoding::UTF8_DoCoMo, Encoding::UTF8_KDDI, Encoding::UTF8_SoftBank, Encoding::CESU_8
+ range = range.to_a - [
+ 0x01c5, 0x01c8, 0x01cb, 0x01f2, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b,
+ 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b,
+ 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab,
+ 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fbc, 0x1fcc, 0x1ffc,
+ ]
+ when Encoding::Windows_1253
+ range = range.to_a - [0xb5]
+ end
+
+ range.each do |codepoint|
+ character = codepoint.chr(encoding)
+
+ if character.match?(/[[:alpha:]]/)
+ if character.match?(/[[:upper:]]/)
+ assert_encoding_constant(name, character)
+ else
+ assert_encoding_identifier(name, character)
+ end
+ elsif character.match?(/[[:alnum:]]/)
+ assert_encoding_identifier(name, "_#{character}")
+ else
+ next if ["/", "{"].include?(character)
+
+ source = "# encoding: #{name}\n/(?##{character})/\n"
+ assert Prism.parse_success?(source), "Expected #{source.inspect} to parse successfully."
+ end
+ rescue RangeError
+ source = "# encoding: #{name}\n\\x#{codepoint.to_s(16)}"
+ assert Prism.parse_failure?(source)
+ end
+ end
+ end
+end
diff --git a/test/prism/encoding/regular_expression_encoding_test.rb b/test/prism/encoding/regular_expression_encoding_test.rb
new file mode 100644
index 0000000000..5d062fe59a
--- /dev/null
+++ b/test/prism/encoding/regular_expression_encoding_test.rb
@@ -0,0 +1,131 @@
+# frozen_string_literal: true
+
+return unless defined?(RubyVM::InstructionSequence)
+return if RubyVM::InstructionSequence.compile("").to_a[4][:parser] == :prism
+
+require_relative "../test_helper"
+
+module Prism
+ class RegularExpressionEncodingTest < TestCase
+ each_encoding do |encoding, _|
+ define_method(:"test_regular_expression_encoding_flags_#{encoding.name}") do
+ assert_regular_expression_encoding_flags(encoding, ["/a/", "/Δ…/", "//"])
+ end
+
+ escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
+ escapes = escapes.concat(escapes.product(escapes).map(&:join))
+
+ define_method(:"test_regular_expression_escape_encoding_flags_#{encoding.name}") do
+ assert_regular_expression_encoding_flags(encoding, escapes.map { |e| "/#{e}/" })
+ end
+
+ ["n", "u", "e", "s"].each do |modifier|
+ define_method(:"test_regular_expression_encoding_modifiers_/#{modifier}_#{encoding.name}") do
+ regexp_sources = ["abc", "garΓ§on", "\\x80", "gar\\xC3\\xA7on", "gar\\u{E7}on", "abc\\u{FFFFFF}", "\\x80\\u{80}" ]
+
+ assert_regular_expression_encoding_flags(
+ encoding,
+ regexp_sources.product(["n", "u", "e", "s"]).map { |r, modifier| "/#{r}/#{modifier}" }
+ )
+ end
+ end
+ end
+
+ private
+
+ def assert_regular_expression_encoding_flags(encoding, regexps)
+ regexps.each do |regexp|
+ regexp_modifier_used = regexp.end_with?("/u") || regexp.end_with?("/e") || regexp.end_with?("/s") || regexp.end_with?("/n")
+ source = "# encoding: #{encoding.name}\n#{regexp}"
+
+ encoding_errors = ["invalid multibyte char", "escaped non ASCII character in UTF-8 regexp", "differs from source encoding"]
+ skipped_errors = ["invalid multibyte escape", "incompatible character encoding", "UTF-8 character in non UTF-8 regexp", "invalid Unicode range", "invalid Unicode list"]
+
+ # TODO (nirvdrum 21-Feb-2024): Prism currently does not handle Regexp validation unless modifiers are used. So, skip processing those errors for now: https://github.com/ruby/prism/issues/2104
+ unless regexp_modifier_used
+ skipped_errors += encoding_errors
+ encoding_errors.clear
+ end
+
+ expected =
+ begin
+ eval(source).encoding
+ rescue SyntaxError => error
+ if encoding_errors.find { |e| error.message.include?(e) }
+ error.message.split("\n").map { |m| m[/: (.+?)$/, 1] }
+ elsif skipped_errors.find { |e| error.message.include?(e) }
+ next
+ else
+ raise
+ end
+ end
+
+ actual =
+ Prism.parse(source).then do |result|
+ if result.success?
+ regexp = result.statement
+
+ actual_encoding = if regexp.forced_utf8_encoding?
+ Encoding::UTF_8
+ elsif regexp.forced_binary_encoding?
+ Encoding::ASCII_8BIT
+ elsif regexp.forced_us_ascii_encoding?
+ Encoding::US_ASCII
+ elsif regexp.ascii_8bit?
+ Encoding::ASCII_8BIT
+ elsif regexp.utf_8?
+ Encoding::UTF_8
+ elsif regexp.euc_jp?
+ Encoding::EUC_JP
+ elsif regexp.windows_31j?
+ Encoding::Windows_31J
+ else
+ encoding
+ end
+
+ if regexp.utf_8? && actual_encoding != Encoding::UTF_8
+ raise "expected regexp encoding to be UTF-8 due to '/u' modifier, but got #{actual_encoding.name}"
+ elsif regexp.ascii_8bit? && (actual_encoding != Encoding::ASCII_8BIT && actual_encoding != Encoding::US_ASCII)
+ raise "expected regexp encoding to be ASCII-8BIT or US-ASCII due to '/n' modifier, but got #{actual_encoding.name}"
+ elsif regexp.euc_jp? && actual_encoding != Encoding::EUC_JP
+ raise "expected regexp encoding to be EUC-JP due to '/e' modifier, but got #{actual_encoding.name}"
+ elsif regexp.windows_31j? && actual_encoding != Encoding::Windows_31J
+ raise "expected regexp encoding to be Windows-31J due to '/s' modifier, but got #{actual_encoding.name}"
+ end
+
+ if regexp.utf_8? && regexp.forced_utf8_encoding?
+ raise "the forced_utf8 flag should not be set when the UTF-8 modifier (/u) is used"
+ elsif regexp.ascii_8bit? && regexp.forced_binary_encoding?
+ raise "the forced_ascii_8bit flag should not be set when the UTF-8 modifier (/u) is used"
+ end
+
+ actual_encoding
+ else
+ errors = result.errors.map(&:message)
+
+ if errors.last&.include?("UTF-8 mixed within")
+ nil
+ else
+ errors
+ end
+ end
+ end
+
+ # TODO (nirvdrum 22-Feb-2024): Remove this workaround once Prism better maps CRuby's error messages.
+ # This class of error message is tricky. The part not being compared is a representation of the regexp.
+ # Depending on the source encoding and any encoding modifiers being used, CRuby alters how the regexp is represented.
+ # Sometimes it's an MBC string. Other times it uses hexadecimal character escapes. And in other cases it uses
+ # the long-form Unicode escape sequences. This short-circuit checks that the error message is mostly correct.
+ if expected.is_a?(Array) && actual.is_a?(Array)
+ if expected.last.start_with?("/.../n has a non escaped non ASCII character in non ASCII-8BIT script:") &&
+ actual.last.start_with?("/.../n has a non escaped non ASCII character in non ASCII-8BIT script:")
+ expected.last.clear
+ actual.last.clear
+ end
+ end
+
+ assert_equal expected, actual
+ end
+ end
+ end
+end
diff --git a/test/prism/encoding/string_encoding_test.rb b/test/prism/encoding/string_encoding_test.rb
new file mode 100644
index 0000000000..6f9d86df3b
--- /dev/null
+++ b/test/prism/encoding/string_encoding_test.rb
@@ -0,0 +1,136 @@
+# frozen_string_literal: true
+
+require_relative "../test_helper"
+
+module Prism
+ class StringEncodingTest < TestCase
+ each_encoding do |encoding, _|
+ define_method(:"test_#{encoding.name}") do
+ assert_encoding(encoding)
+ end
+ end
+
+ def test_coding
+ actual = Prism.parse_statement("# coding: utf-8\n'string'").unescaped.encoding
+ assert_equal Encoding::UTF_8, actual
+ end
+
+ def test_coding_with_whitespace
+ actual = Prism.parse_statement("# coding \t \r \v : \t \v \r ascii-8bit \n'string'").unescaped.encoding
+ assert_equal Encoding::ASCII_8BIT, actual
+ end
+
+ def test_emacs_style
+ actual = Prism.parse_statement("# -*- coding: utf-8 -*-\n'string'").unescaped.encoding
+ assert_equal Encoding::UTF_8, actual
+ end
+
+ def test_utf_8_unix
+ actual = Prism.parse_statement("# coding: utf-8-unix\n'string'").unescaped.encoding
+ assert_equal Encoding::UTF_8, actual
+ end
+
+ def test_utf_8_dos
+ actual = Prism.parse_statement("# coding: utf-8-dos\n'string'").unescaped.encoding
+ assert_equal Encoding::UTF_8, actual
+ end
+
+ def test_utf_8_mac
+ actual = Prism.parse_statement("# coding: utf-8-mac\n'string'").unescaped.encoding
+ assert_equal Encoding::UTF_8, actual
+ end
+
+ def test_utf_8_star
+ actual = Prism.parse_statement("# coding: utf-8-*\n'string'").unescaped.encoding
+ assert_equal Encoding::UTF_8, actual
+ end
+
+ def test_first_lexed_token
+ encoding = Prism.lex("# encoding: ascii-8bit").value[0][0].value.encoding
+ assert_equal Encoding::ASCII_8BIT, encoding
+ end
+
+ if !ENV["PRISM_BUILD_MINIMAL"]
+ # This test may be a little confusing. Basically when we use our strpbrk,
+ # it takes into account the encoding of the file.
+ def test_strpbrk_multibyte
+ result = Prism.parse(<<~RUBY)
+ # encoding: Shift_JIS
+ %w[\x81\x5c]
+ RUBY
+
+ assert(result.errors.empty?)
+ assert_equal(
+ (+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
+ result.statement.elements.first.unescaped
+ )
+ end
+
+ def test_slice_encoding
+ slice = Prism.parse("# encoding: Shift_JIS\nγ‚’").value.slice
+ assert_equal (+"γ‚’").force_encoding(Encoding::SHIFT_JIS), slice
+ assert_equal Encoding::SHIFT_JIS, slice.encoding
+ end
+
+ def test_multibyte_escapes
+ [
+ ["'", "'"],
+ ["\"", "\""],
+ ["`", "`"],
+ ["/", "/"],
+ ["<<'HERE'\n", "\nHERE"],
+ ["<<-HERE\n", "\nHERE"]
+ ].each do |opening, closing|
+ assert Prism.parse_success?("# encoding: shift_jis\n'\\\x82\xA0'\n")
+ end
+ end
+ end
+
+ private
+
+ def assert_encoding(encoding)
+ escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
+ escapes = escapes.concat(escapes.product(escapes).map(&:join))
+
+ escapes.each do |escaped|
+ source = "# encoding: #{encoding.name}\n\"#{escaped}\""
+
+ expected =
+ begin
+ eval(source).encoding
+ rescue SyntaxError => error
+ if error.message.include?("UTF-8 mixed within")
+ error.message[/UTF-8 mixed within .+? source/]
+ else
+ raise
+ end
+ end
+
+ actual =
+ Prism.parse(source).then do |result|
+ if result.success?
+ string = result.statement
+
+ if string.forced_utf8_encoding?
+ Encoding::UTF_8
+ elsif string.forced_binary_encoding?
+ Encoding::ASCII_8BIT
+ else
+ encoding
+ end
+ else
+ error = result.errors.first
+
+ if error.message.include?("mixed")
+ error.message
+ else
+ raise error.message
+ end
+ end
+ end
+
+ assert_equal expected, actual
+ end
+ end
+ end
+end
diff --git a/test/prism/encoding/symbol_encoding_test.rb b/test/prism/encoding/symbol_encoding_test.rb
new file mode 100644
index 0000000000..20c998a58b
--- /dev/null
+++ b/test/prism/encoding/symbol_encoding_test.rb
@@ -0,0 +1,108 @@
+# frozen_string_literal: true
+
+return if RUBY_ENGINE != "ruby"
+
+require_relative "../test_helper"
+
+module Prism
+ class SymbolEncodingTest < TestCase
+ each_encoding do |encoding, _|
+ define_method(:"test_symbols_#{encoding.name}") do
+ assert_symbols(encoding)
+ end
+
+ define_method(:"test_escapes_#{encoding.name}") do
+ assert_escapes(encoding)
+ end
+ end
+
+ private
+
+ def expected_encoding(source)
+ eval(source).encoding
+ end
+
+ def actual_encoding(source, encoding)
+ result = Prism.parse(source)
+
+ if result.success?
+ symbol = result.statement
+
+ if symbol.forced_utf8_encoding?
+ Encoding::UTF_8
+ elsif symbol.forced_binary_encoding?
+ Encoding::ASCII_8BIT
+ elsif symbol.forced_us_ascii_encoding?
+ Encoding::US_ASCII
+ else
+ encoding
+ end
+ else
+ raise SyntaxError.new(result.errors.map(&:message).join("\n"))
+ end
+ end
+
+ def assert_symbols(encoding)
+ [:a, :Δ…, :+].each do |symbol|
+ source = "# encoding: #{encoding.name}\n#{symbol.inspect}"
+
+ expected =
+ begin
+ expected_encoding(source)
+ rescue SyntaxError => error
+ if error.message.include?("invalid multibyte")
+ "invalid multibyte"
+ else
+ raise
+ end
+ end
+
+ actual =
+ begin
+ actual_encoding(source, encoding)
+ rescue SyntaxError => error
+ if error.message.include?("invalid multibyte")
+ "invalid multibyte"
+ else
+ raise
+ end
+ end
+
+ assert_equal expected, actual
+ end
+ end
+
+ def assert_escapes(encoding)
+ escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
+ escapes = escapes.concat(escapes.product(escapes).map(&:join))
+
+ escapes.each do |escaped|
+ source = "# encoding: #{encoding.name}\n:\"#{escaped}\""
+
+ expected =
+ begin
+ expected_encoding(source)
+ rescue SyntaxError => error
+ if error.message.include?("UTF-8 mixed within")
+ error.message[/UTF-8 mixed within .+? source/]
+ else
+ raise
+ end
+ end
+
+ actual =
+ begin
+ actual_encoding(source, encoding)
+ rescue SyntaxError => error
+ if error.message.include?("mixed")
+ error.message.split("\n", 2).first
+ else
+ raise
+ end
+ end
+
+ assert_equal expected, actual
+ end
+ end
+ end
+end
diff --git a/test/prism/encoding_test.rb b/test/prism/encoding_test.rb
deleted file mode 100644
index 2aee473ddf..0000000000
--- a/test/prism/encoding_test.rb
+++ /dev/null
@@ -1,577 +0,0 @@
-# frozen_string_literal: true
-
-return if RUBY_ENGINE != "ruby"
-
-require_relative "test_helper"
-
-module Prism
- class EncodingTest < TestCase
- codepoints_1byte = 0...0x100
- encodings = {
- Encoding::ASCII_8BIT => codepoints_1byte,
- Encoding::US_ASCII => codepoints_1byte
- }
-
- if !ENV["PRISM_BUILD_MINIMAL"]
- encodings[Encoding::Windows_1253] = codepoints_1byte
- end
-
- # By default we don't test every codepoint in these encodings because it
- # takes a very long time.
- if ENV["PRISM_TEST_ALL_ENCODINGS"]
- codepoints_2bytes = 0...0x10000
- codepoints_unicode = (0...0x110000)
-
- codepoints_eucjp = [
- *(0...0x10000),
- *(0...0x10000).map { |bytes| bytes | 0x8F0000 }
- ]
-
- codepoints_emacs_mule = [
- *(0...0x80),
- *((0x81...0x90).flat_map { |byte1| (0x90...0x100).map { |byte2| byte1 << 8 | byte2 } }),
- *((0x90...0x9C).flat_map { |byte1| (0xA0...0x100).flat_map { |byte2| (0xA0...0x100).flat_map { |byte3| byte1 << 16 | byte2 << 8 | byte3 } } }),
- *((0xF0...0xF5).flat_map { |byte2| (0xA0...0x100).flat_map { |byte3| (0xA0...0x100).flat_map { |byte4| 0x9C << 24 | byte3 << 16 | byte3 << 8 | byte4 } } }),
- ]
-
- codepoints_gb18030 = [
- *(0...0x80),
- *((0x81..0xFE).flat_map { |byte1| (0x40...0x100).map { |byte2| byte1 << 8 | byte2 } }),
- *((0x81..0xFE).flat_map { |byte1| (0x30...0x40).flat_map { |byte2| (0x81..0xFE).flat_map { |byte3| (0x2F...0x41).map { |byte4| byte1 << 24 | byte2 << 16 | byte3 << 8 | byte4 } } } }),
- ]
-
- codepoints_euc_tw = [
- *(0..0x7F),
- *(0xA1..0xFF).flat_map { |byte1| (0xA1..0xFF).map { |byte2| (byte1 << 8) | byte2 } },
- *(0xA1..0xB0).flat_map { |byte2| (0xA1..0xFF).flat_map { |byte3| (0xA1..0xFF).flat_map { |byte4| 0x8E << 24 | byte2 << 16 | byte3 << 8 | byte4 } } }
- ]
-
- encodings.merge!(
- Encoding::CP850 => codepoints_1byte,
- Encoding::CP852 => codepoints_1byte,
- Encoding::CP855 => codepoints_1byte,
- Encoding::GB1988 => codepoints_1byte,
- Encoding::IBM437 => codepoints_1byte,
- Encoding::IBM720 => codepoints_1byte,
- Encoding::IBM737 => codepoints_1byte,
- Encoding::IBM775 => codepoints_1byte,
- Encoding::IBM852 => codepoints_1byte,
- Encoding::IBM855 => codepoints_1byte,
- Encoding::IBM857 => codepoints_1byte,
- Encoding::IBM860 => codepoints_1byte,
- Encoding::IBM861 => codepoints_1byte,
- Encoding::IBM862 => codepoints_1byte,
- Encoding::IBM863 => codepoints_1byte,
- Encoding::IBM864 => codepoints_1byte,
- Encoding::IBM865 => codepoints_1byte,
- Encoding::IBM866 => codepoints_1byte,
- Encoding::IBM869 => codepoints_1byte,
- Encoding::ISO_8859_1 => codepoints_1byte,
- Encoding::ISO_8859_2 => codepoints_1byte,
- Encoding::ISO_8859_3 => codepoints_1byte,
- Encoding::ISO_8859_4 => codepoints_1byte,
- Encoding::ISO_8859_5 => codepoints_1byte,
- Encoding::ISO_8859_6 => codepoints_1byte,
- Encoding::ISO_8859_7 => codepoints_1byte,
- Encoding::ISO_8859_8 => codepoints_1byte,
- Encoding::ISO_8859_9 => codepoints_1byte,
- Encoding::ISO_8859_10 => codepoints_1byte,
- Encoding::ISO_8859_11 => codepoints_1byte,
- Encoding::ISO_8859_13 => codepoints_1byte,
- Encoding::ISO_8859_14 => codepoints_1byte,
- Encoding::ISO_8859_15 => codepoints_1byte,
- Encoding::ISO_8859_16 => codepoints_1byte,
- Encoding::KOI8_R => codepoints_1byte,
- Encoding::KOI8_U => codepoints_1byte,
- Encoding::MACCENTEURO => codepoints_1byte,
- Encoding::MACCROATIAN => codepoints_1byte,
- Encoding::MACCYRILLIC => codepoints_1byte,
- Encoding::MACGREEK => codepoints_1byte,
- Encoding::MACICELAND => codepoints_1byte,
- Encoding::MACROMAN => codepoints_1byte,
- Encoding::MACROMANIA => codepoints_1byte,
- Encoding::MACTHAI => codepoints_1byte,
- Encoding::MACTURKISH => codepoints_1byte,
- Encoding::MACUKRAINE => codepoints_1byte,
- Encoding::TIS_620 => codepoints_1byte,
- Encoding::Windows_1250 => codepoints_1byte,
- Encoding::Windows_1251 => codepoints_1byte,
- Encoding::Windows_1252 => codepoints_1byte,
- Encoding::Windows_1254 => codepoints_1byte,
- Encoding::Windows_1255 => codepoints_1byte,
- Encoding::Windows_1256 => codepoints_1byte,
- Encoding::Windows_1257 => codepoints_1byte,
- Encoding::Windows_1258 => codepoints_1byte,
- Encoding::Windows_874 => codepoints_1byte,
- Encoding::Big5 => codepoints_2bytes,
- Encoding::Big5_HKSCS => codepoints_2bytes,
- Encoding::Big5_UAO => codepoints_2bytes,
- Encoding::CP949 => codepoints_2bytes,
- Encoding::CP950 => codepoints_2bytes,
- Encoding::CP951 => codepoints_2bytes,
- Encoding::EUC_KR => codepoints_2bytes,
- Encoding::GBK => codepoints_2bytes,
- Encoding::GB12345 => codepoints_2bytes,
- Encoding::GB2312 => codepoints_2bytes,
- Encoding::MACJAPANESE => codepoints_2bytes,
- Encoding::Shift_JIS => codepoints_2bytes,
- Encoding::SJIS_DoCoMo => codepoints_2bytes,
- Encoding::SJIS_KDDI => codepoints_2bytes,
- Encoding::SJIS_SoftBank => codepoints_2bytes,
- Encoding::Windows_31J => codepoints_2bytes,
- Encoding::UTF_8 => codepoints_unicode,
- Encoding::UTF8_MAC => codepoints_unicode,
- Encoding::UTF8_DoCoMo => codepoints_unicode,
- Encoding::UTF8_KDDI => codepoints_unicode,
- Encoding::UTF8_SoftBank => codepoints_unicode,
- Encoding::CESU_8 => codepoints_unicode,
- Encoding::CP51932 => codepoints_eucjp,
- Encoding::EUC_JP => codepoints_eucjp,
- Encoding::EUCJP_MS => codepoints_eucjp,
- Encoding::EUC_JIS_2004 => codepoints_eucjp,
- Encoding::EMACS_MULE => codepoints_emacs_mule,
- Encoding::STATELESS_ISO_2022_JP => codepoints_emacs_mule,
- Encoding::STATELESS_ISO_2022_JP_KDDI => codepoints_emacs_mule,
- Encoding::GB18030 => codepoints_gb18030,
- Encoding::EUC_TW => codepoints_euc_tw
- )
- end
-
- # These test that we're correctly parsing codepoints for each alias of each
- # encoding that prism supports.
- encodings.each do |encoding, range|
- (encoding.names - %w[external internal filesystem locale]).each do |name|
- define_method(:"test_encoding_#{name}") do
- assert_encoding(encoding, name, range)
- end
- end
- end
-
- # These test that we're correctly setting the flags on strings for each
- # encoding that prism supports.
- escapes = ["\\x00", "\\x7F", "\\x80", "\\xFF", "\\u{00}", "\\u{7F}", "\\u{80}", "\\M-\\C-?"]
- escapes = escapes.concat(escapes.product(escapes).map(&:join))
- symbols = [:a, :Δ…, :+]
- regexps = [/a/, /Δ…/, //]
-
- encodings.each_key do |encoding|
- define_method(:"test_encoding_flags_#{encoding.name}") do
- assert_encoding_flags(encoding, escapes)
- end
-
- define_method(:"test_symbol_encoding_flags_#{encoding.name}") do
- assert_symbol_encoding_flags(encoding, symbols)
- end
-
- define_method(:"test_symbol_character_escape_encoding_flags_#{encoding.name}") do
- assert_symbol_character_escape_encoding_flags(encoding, escapes)
- end
-
- define_method(:"test_regular_expression_encoding_flags_#{encoding.name}") do
- assert_regular_expression_encoding_flags(encoding, regexps.map(&:inspect))
- end
-
- define_method(:"test_regular_expression_escape_encoding_flags_#{encoding.name}") do
- assert_regular_expression_encoding_flags(encoding, escapes.map { |e| "/#{e}/" })
- end
- end
-
- encoding_modifiers = { ascii_8bit: "n", utf_8: "u", euc_jp: "e", windows_31j: "s" }
- regexp_sources = ["abc", "garΓ§on", "\\x80", "gar\\xC3\\xA7on", "gar\\u{E7}on", "abc\\u{FFFFFF}", "\\x80\\u{80}" ]
-
- encoding_modifiers.each_value do |modifier|
- encodings.each_key do |encoding|
- define_method(:"test_regular_expression_encoding_modifiers_/#{modifier}_#{encoding.name}") do
- assert_regular_expression_encoding_flags(
- encoding,
- regexp_sources.product(encoding_modifiers.values).map { |r, modifier| "/#{r}/#{modifier}" }
- )
- end
- end
- end
-
- def test_coding
- result = Prism.parse("# coding: utf-8\n'string'")
- actual = result.value.statements.body.first.unescaped.encoding
- assert_equal Encoding.find("utf-8"), actual
- end
-
- def test_coding_with_whitespace
- result = Prism.parse("# coding \t \r \v : \t \v \r ascii-8bit \n'string'")
- actual = result.value.statements.body.first.unescaped.encoding
- assert_equal Encoding.find("ascii-8bit"), actual
- end
-
- def test_emacs_style
- result = Prism.parse("# -*- coding: utf-8 -*-\n'string'")
- actual = result.value.statements.body.first.unescaped.encoding
- assert_equal Encoding.find("utf-8"), actual
- end
-
- def test_utf_8_variations
- %w[
- utf-8-unix
- utf-8-dos
- utf-8-mac
- utf-8-*
- ].each do |encoding|
- result = Prism.parse("# coding: #{encoding}\n'string'")
- actual = result.value.statements.body.first.unescaped.encoding
- assert_equal Encoding.find("utf-8"), actual
- end
- end
-
- def test_first_lexed_token
- encoding = Prism.lex("# encoding: ascii-8bit").value[0][0].value.encoding
- assert_equal Encoding.find("ascii-8bit"), encoding
- end
-
- if !ENV["PRISM_BUILD_MINIMAL"]
- # This test may be a little confusing. Basically when we use our strpbrk,
- # it takes into account the encoding of the file.
- def test_strpbrk_multibyte
- result = Prism.parse(<<~RUBY)
- # encoding: Shift_JIS
- %w[\x81\x5c]
- RUBY
-
- assert(result.errors.empty?)
- assert_equal(
- (+"\x81\x5c").force_encoding(Encoding::Shift_JIS),
- result.value.statements.body.first.elements.first.unescaped
- )
- end
-
- def test_slice_encoding
- slice = Prism.parse("# encoding: Shift_JIS\nγ‚’").value.slice
- assert_equal (+"γ‚’").force_encoding(Encoding::SHIFT_JIS), slice
- assert_equal Encoding::SHIFT_JIS, slice.encoding
- end
-
- def test_multibyte_escapes
- [
- ["'", "'"],
- ["\"", "\""],
- ["`", "`"],
- ["/", "/"],
- ["<<'HERE'\n", "\nHERE"],
- ["<<-HERE\n", "\nHERE"]
- ].each do |opening, closing|
- assert Prism.parse_success?("# encoding: shift_jis\n'\\\x82\xA0'\n")
- end
- end
- end
-
- private
-
- class ConstantContext < BasicObject
- def self.const_missing(const)
- const
- end
- end
-
- def constant_context
- ConstantContext.new
- end
-
- class IdentifierContext < BasicObject
- def method_missing(name, *)
- name
- end
- end
-
- def identifier_context
- IdentifierContext.new
- end
-
- def assert_encoding_constant(name, character)
- source = "# encoding: #{name}\n#{character}"
- expected = constant_context.instance_eval(source)
-
- result = Prism.parse(source)
- assert result.success?
-
- actual = result.value.statements.body.last
- assert_kind_of ConstantReadNode, actual
- assert_equal expected, actual.name
- end
-
- def assert_encoding_identifier(name, character)
- source = "# encoding: #{name}\n#{character}"
- expected = identifier_context.instance_eval(source)
-
- result = Prism.parse(source)
- assert result.success?
-
- actual = result.value.statements.body.last
- assert_kind_of CallNode, actual
- assert_equal expected, actual.name
- end
-
- # Check that we can properly parse every codepoint in the given encoding.
- def assert_encoding(encoding, name, range)
- # I'm not entirely sure, but I believe these codepoints are incorrect in
- # their parsing in CRuby. They all report as matching `[[:lower:]]` but
- # then they are parsed as constants. This is because CRuby determines if
- # an identifier is a constant or not by case folding it down to lowercase
- # and checking if there is a difference. And even though they report
- # themselves as lowercase, their case fold is different. I have reported
- # this bug upstream.
- case encoding
- when Encoding::UTF_8, Encoding::UTF_8_MAC, Encoding::UTF8_DoCoMo, Encoding::UTF8_KDDI, Encoding::UTF8_SoftBank, Encoding::CESU_8
- range = range.to_a - [
- 0x01c5, 0x01c8, 0x01cb, 0x01f2, 0x1f88, 0x1f89, 0x1f8a, 0x1f8b,
- 0x1f8c, 0x1f8d, 0x1f8e, 0x1f8f, 0x1f98, 0x1f99, 0x1f9a, 0x1f9b,
- 0x1f9c, 0x1f9d, 0x1f9e, 0x1f9f, 0x1fa8, 0x1fa9, 0x1faa, 0x1fab,
- 0x1fac, 0x1fad, 0x1fae, 0x1faf, 0x1fbc, 0x1fcc, 0x1ffc,
- ]
- when Encoding::Windows_1253
- range = range.to_a - [0xb5]
- end
-
- range.each do |codepoint|
- character = codepoint.chr(encoding)
-
- if character.match?(/[[:alpha:]]/)
- if character.match?(/[[:upper:]]/)
- assert_encoding_constant(name, character)
- else
- assert_encoding_identifier(name, character)
- end
- elsif character.match?(/[[:alnum:]]/)
- assert_encoding_identifier(name, "_#{character}")
- else
- next if ["/", "{"].include?(character)
-
- source = "# encoding: #{name}\n/(?##{character})/\n"
- assert Prism.parse(source).success?, "Expected #{source.inspect} to parse successfully."
- end
- rescue RangeError
- source = "# encoding: #{name}\n\\x#{codepoint.to_s(16)}"
- refute Prism.parse(source).success?
- end
- end
-
- def assert_encoding_flags(encoding, escapes)
- escapes.each do |escaped|
- source = "# encoding: #{encoding.name}\n\"#{escaped}\""
-
- expected =
- begin
- eval(source).encoding
- rescue SyntaxError => error
- if error.message.include?("UTF-8 mixed within")
- error.message[/: (.+?)\n/, 1]
- else
- raise
- end
- end
-
- actual =
- Prism.parse(source).then do |result|
- if result.success?
- string = result.value.statements.body.first
-
- if string.forced_utf8_encoding?
- Encoding::UTF_8
- elsif string.forced_binary_encoding?
- Encoding::ASCII_8BIT
- else
- encoding
- end
- else
- error = result.errors.first
-
- if error.message.include?("mixed")
- error.message
- else
- raise error.message
- end
- end
- end
-
- assert_equal expected, actual
- end
- end
-
- # Test Symbol literals without any interpolation or escape sequences.
- def assert_symbol_encoding_flags(encoding, symbols)
- symbols.each do |symbol|
- source = "# encoding: #{encoding.name}\n#{symbol.inspect}"
-
- expected =
- begin
- eval(source).encoding
- rescue SyntaxError => error
- unless error.message.include?("invalid multibyte char")
- raise
- end
- end
-
- actual =
- Prism.parse(source).then do |result|
- if result.success?
- symbol = result.value.statements.body.first
-
- if symbol.forced_utf8_encoding?
- Encoding::UTF_8
- elsif symbol.forced_binary_encoding?
- Encoding::ASCII_8BIT
- elsif symbol.forced_us_ascii_encoding?
- Encoding::US_ASCII
- else
- encoding
- end
- else
- error = result.errors.last
-
- unless error.message.include?("invalid symbol")
- raise error.message
- end
- end
- end
-
- assert_equal expected, actual
- end
- end
-
- def assert_symbol_character_escape_encoding_flags(encoding, escapes)
- escapes.each do |escaped|
- source = "# encoding: #{encoding.name}\n:\"#{escaped}\""
-
- expected =
- begin
- eval(source).encoding
- rescue SyntaxError => error
- if error.message.include?("UTF-8 mixed within")
- error.message[/: (.+?)\n/, 1]
- else
- raise
- end
- end
-
- actual =
- Prism.parse(source).then do |result|
- if result.success?
- symbol = result.value.statements.body.first
-
- if symbol.forced_utf8_encoding?
- Encoding::UTF_8
- elsif symbol.forced_binary_encoding?
- Encoding::ASCII_8BIT
- elsif symbol.forced_us_ascii_encoding?
- Encoding::US_ASCII
- else
- encoding
- end
- else
- error = result.errors.first
-
- if error.message.include?("mixed")
- error.message
- else
- raise error.message
- end
- end
- end
-
- assert_equal expected, actual
- end
- end
-
- def assert_regular_expression_encoding_flags(encoding, regexps)
- regexps.each do |regexp|
- regexp_modifier_used = regexp.end_with?("/u") || regexp.end_with?("/e") || regexp.end_with?("/s") || regexp.end_with?("/n")
- source = "# encoding: #{encoding.name}\n#{regexp}"
-
- encoding_errors = ["invalid multibyte char", "escaped non ASCII character in UTF-8 regexp", "differs from source encoding"]
- skipped_errors = ["invalid multibyte escape", "incompatible character encoding", "UTF-8 character in non UTF-8 regexp", "invalid Unicode range", "invalid Unicode list"]
-
- # TODO (nirvdrum 21-Feb-2024): Prism currently does not handle Regexp validation unless modifiers are used. So, skip processing those errors for now: https://github.com/ruby/prism/issues/2104
- unless regexp_modifier_used
- skipped_errors += encoding_errors
- encoding_errors.clear
- end
-
- expected =
- begin
- eval(source).encoding
- rescue SyntaxError => error
- if encoding_errors.find { |e| error.message.include?(e) }
- error.message.split("\n").map { |m| m[/: (.+?)$/, 1] }
- elsif skipped_errors.find { |e| error.message.include?(e) }
- next
- else
- raise
- end
- end
-
- actual =
- Prism.parse(source).then do |result|
- if result.success?
- regexp = result.value.statements.body.first
-
- actual_encoding = if regexp.forced_utf8_encoding?
- Encoding::UTF_8
- elsif regexp.forced_binary_encoding?
- Encoding::ASCII_8BIT
- elsif regexp.forced_us_ascii_encoding?
- Encoding::US_ASCII
- elsif regexp.ascii_8bit?
- Encoding::ASCII_8BIT
- elsif regexp.utf_8?
- Encoding::UTF_8
- elsif regexp.euc_jp?
- Encoding::EUC_JP
- elsif regexp.windows_31j?
- Encoding::Windows_31J
- else
- encoding
- end
-
- if regexp.utf_8? && actual_encoding != Encoding::UTF_8
- raise "expected regexp encoding to be UTF-8 due to '/u' modifier, but got #{actual_encoding.name}"
- elsif regexp.ascii_8bit? && (actual_encoding != Encoding::ASCII_8BIT && actual_encoding != Encoding::US_ASCII)
- raise "expected regexp encoding to be ASCII-8BIT or US-ASCII due to '/n' modifier, but got #{actual_encoding.name}"
- elsif regexp.euc_jp? && actual_encoding != Encoding::EUC_JP
- raise "expected regexp encoding to be EUC-JP due to '/e' modifier, but got #{actual_encoding.name}"
- elsif regexp.windows_31j? && actual_encoding != Encoding::Windows_31J
- raise "expected regexp encoding to be Windows-31J due to '/s' modifier, but got #{actual_encoding.name}"
- end
-
- if regexp.utf_8? && regexp.forced_utf8_encoding?
- raise "the forced_utf8 flag should not be set when the UTF-8 modifier (/u) is used"
- elsif regexp.ascii_8bit? && regexp.forced_binary_encoding?
- raise "the forced_ascii_8bit flag should not be set when the UTF-8 modifier (/u) is used"
- end
-
- actual_encoding
- else
- errors = result.errors.map(&:message)
-
- if errors.last&.include?("UTF-8 mixed within")
- nil
- else
- errors
- end
- end
- end
-
- # TODO (nirvdrum 22-Feb-2024): Remove this workaround once Prism better maps CRuby's error messages.
- # This class of error message is tricky. The part not being compared is a representation of the regexp.
- # Depending on the source encoding and any encoding modifiers being used, CRuby alters how the regexp is represented.
- # Sometimes it's an MBC string. Other times it uses hexadecimal character escapes. And in other cases it uses
- # the long-form Unicode escape sequences. This short-circuit checks that the error message is mostly correct.
- if expected.is_a?(Array) && actual.is_a?(Array)
- if expected.last.start_with?("/.../n has a non escaped non ASCII character in non ASCII-8BIT script:") &&
- actual.last.start_with?("/.../n has a non escaped non ASCII character in non ASCII-8BIT script:")
- expected.last.clear
- actual.last.clear
- end
- end
-
- assert_equal expected, actual
- end
- end
- end
-end
diff --git a/test/prism/errors_test.rb b/test/prism/errors_test.rb
index 5f4acb0120..8848ea5889 100644
--- a/test/prism/errors_test.rb
+++ b/test/prism/errors_test.rb
@@ -1246,8 +1246,7 @@ module Prism
end
def test_invalid_message_name
- result = Prism.parse("+.@foo,+=foo")
- assert_equal :"", result.value.statements.body.first.write_name
+ assert_equal :"", Prism.parse_statement("+.@foo,+=foo").write_name
end
def test_invalid_operator_write_fcall
diff --git a/test/prism/fixtures_test.rb b/test/prism/fixtures_test.rb
new file mode 100644
index 0000000000..7225b4ac66
--- /dev/null
+++ b/test/prism/fixtures_test.rb
@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+
+return if RUBY_VERSION < "3.2.0"
+
+require_relative "test_helper"
+
+module Prism
+ class FixturesTest < TestCase
+ except = []
+
+ # Ruby < 3.3.0 cannot parse heredocs where there are leading whitespace
+ # characters in the heredoc start.
+ # Example: <<~' EOF' or <<-' EOF'
+ # https://bugs.ruby-lang.org/issues/19539
+ except << "heredocs_leading_whitespace.txt" if RUBY_VERSION < "3.3.0"
+
+ Fixture.each(except: except) do |fixture|
+ define_method(fixture.test_name) { assert_valid_syntax(fixture.read) }
+ end
+ end
+end
diff --git a/test/prism/fuzzer_test.rb b/test/prism/fuzzer_test.rb
index 511210e7ee..4927478bdc 100644
--- a/test/prism/fuzzer_test.rb
+++ b/test/prism/fuzzer_test.rb
@@ -1,7 +1,5 @@
# frozen_string_literal: true
-return if ENV["PRISM_BUILD_MINIMAL"]
-
require_relative "test_helper"
module Prism
@@ -9,7 +7,7 @@ module Prism
# invalid memory access.
class FuzzerTest < TestCase
def self.snippet(name, source)
- define_method(:"test_fuzzer_#{name}") { Prism.dump(source) }
+ define_method(:"test_fuzzer_#{name}") { Prism.profile(source) }
end
snippet "incomplete global variable", "$"
@@ -39,29 +37,31 @@ module Prism
snippet "escaped unicode at end of file 8", '"\\u33'
snippet "escaped unicode at end of file 9", '"\\u333'
snippet "float suffix at end of file", "1e"
+ snippet "parameter name that is zero length", "a { |b;"
snippet "statements node with multiple heredocs", <<~EOF
for <<A + <<B
A
B
EOF
+
snippet "create a binary call node with arg before receiver", <<~EOF
<<-A.g/{/
A
/, ""\\
EOF
+
snippet "regular expression with start and end out of order", <<~RUBY
<<-A.g//,
A
/{/, ''\\
RUBY
+
snippet "interpolated regular expression with start and end out of order", <<~RUBY
<<-A.g/{/,
A
a
/{/, ''\\
RUBY
-
- snippet "parameter name that is zero length", "a { |b;"
end
end
diff --git a/test/prism/heredoc_dedent_test.rb b/test/prism/heredoc_dedent_test.rb
index 9fbc4d936a..4e7a3c0a14 100644
--- a/test/prism/heredoc_dedent_test.rb
+++ b/test/prism/heredoc_dedent_test.rb
@@ -4,24 +4,131 @@ require_relative "test_helper"
module Prism
class HeredocDedentTest < TestCase
- filepath = File.expand_path("fixtures/tilde_heredocs.txt", __dir__)
+ def test_content_dedented_interpolation_content
+ assert_heredoc_dedent(
+ " a\n" "1\n" " a\n",
+ "<<~EOF\n" " a\n" "\#{1}\n" " a\n" "EOF\n"
+ )
+ end
+
+ def test_content
+ assert_heredoc_dedent(
+ "a\n",
+ "<<~EOF\n" " a\n" "EOF\n"
+ )
+ end
+
+ def test_tabs_dedent_spaces
+ assert_heredoc_dedent(
+ "\ta\n" "b\n" "\t\tc\n",
+ "<<~EOF\n" "\ta\n" " b\n" "\t\tc\n" "EOF\n"
+ )
+ end
+
+ def test_interpolation_then_content
+ assert_heredoc_dedent(
+ "1 a\n",
+ "<<~EOF\n" " \#{1} a\n" "EOF\n"
+ )
+ end
+
+ def test_content_then_interpolation
+ assert_heredoc_dedent(
+ "a 1\n",
+ "<<~EOF\n" " a \#{1}\n" "EOF\n"
+ )
+ end
+
+ def test_content_dedented_interpolation
+ assert_heredoc_dedent(
+ " a\n" "1\n",
+ "<<~EOF\n" " a\n" " \#{1}\n" "EOF\n"
+ )
+ end
+
+ def test_content_interpolation
+ assert_heredoc_dedent(
+ "a\n" "1\n",
+ "<<~EOF\n" " a\n" " \#{1}\n" "EOF\n"
+ )
+ end
- File.read(filepath).split(/(?=\n)\n(?=<)/).each_with_index do |heredoc, index|
- # The first example in this file has incorrect dedent calculated by
- # TruffleRuby so we skip it.
- next if index == 0 && RUBY_ENGINE == "truffleruby"
+ def test_content_content
+ assert_heredoc_dedent(
+ "a\n" "b\n",
+ "<<~EOF\n" " a\n" " b\n" "EOF\n"
+ )
+ end
- define_method "test_heredoc_#{index}" do
- node = Prism.parse(heredoc).value.statements.body.first
+ def test_content_indented_content
+ assert_heredoc_dedent(
+ "a\n" " b\n",
+ "<<~EOF\n" " a\n" " b\n" "EOF\n"
+ )
+ end
- if node.is_a?(StringNode)
- actual = node.unescaped
- else
- actual = node.parts.map { |part| part.is_a?(StringNode) ? part.unescaped : "1" }.join
- end
+ def test_content_dedented_content
+ assert_heredoc_dedent(
+ "\ta\n" "b\n",
+ "<<~EOF\n" "\t\t\ta\n" "\t\tb\n" "EOF\n"
+ )
+ end
- assert_equal(eval(heredoc), actual, "Expected heredocs to match.")
+ def test_single_quote
+ assert_heredoc_dedent(
+ "a \#{1}\n",
+ "<<~'EOF'\n" "a \#{1}\n" "EOF\n"
+ )
+ end
+
+ def test_mixed_indentation
+ assert_heredoc_dedent(
+ "a\n" " b\n",
+ "<<~EOF\n" "\ta\n" "\t b\n" "EOF\n"
+ )
+ end
+
+ def test_indented_content_content
+ assert_heredoc_dedent(
+ " a\n" "b\n",
+ "<<~EOF\n" "\t a\n" "\tb\n" "EOF\n"
+ )
+ end
+
+ def test_indent_size
+ assert_heredoc_dedent(
+ "a\n" " b\n",
+ "<<~EOF\n" "\ta\n" " b\n" "EOF\n"
+ )
+ end
+
+ def test_blank_lines
+ assert_heredoc_dedent(
+ "a\n" "\n" "b\n",
+ "<<~EOF\n" " a\n" "\n" " b\n" "EOF\n"
+ )
+ end
+
+ def test_many_blank_lines
+ assert_heredoc_dedent(
+ "a\n" "\n" "\n" "\n" "\n" "b\n",
+ "<<~EOF\n" " a\n" "\n" "\n" "\n" "\n" " b\n" "EOF\n"
+ )
+ end
+
+ private
+
+ def assert_heredoc_dedent(expected, source)
+ node = Prism.parse_statement(source)
+
+ if node.is_a?(StringNode)
+ actual = node.unescaped
+ else
+ actual = node.parts.map { |part| part.is_a?(StringNode) ? part.unescaped : "1" }.join
end
+
+ assert_equal(expected, actual)
+ assert_equal(eval(source), actual)
end
end
end
diff --git a/test/prism/lex_test.rb b/test/prism/lex_test.rb
new file mode 100644
index 0000000000..7eac677ef7
--- /dev/null
+++ b/test/prism/lex_test.rb
@@ -0,0 +1,90 @@
+# frozen_string_literal: true
+
+return if !(RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.2.0")
+
+require_relative "test_helper"
+
+module Prism
+ class LexTest < TestCase
+ except = [
+ # It seems like there are some oddities with nested heredocs and ripper.
+ # Waiting for feedback on https://bugs.ruby-lang.org/issues/19838.
+ "seattlerb/heredoc_nested.txt",
+ "whitequark/dedenting_heredoc.txt",
+ # Ripper seems to have a bug that the regex portions before and after
+ # the heredoc are combined into a single token. See
+ # https://bugs.ruby-lang.org/issues/19838.
+ "spanning_heredoc.txt",
+ "spanning_heredoc_newlines.txt"
+ ]
+
+ if RUBY_VERSION < "3.3.0"
+ # This file has changed behavior in Ripper in Ruby 3.3, so we skip it if
+ # we're on an earlier version.
+ except << "seattlerb/pct_w_heredoc_interp_nested.txt"
+
+ # Ruby < 3.3.0 cannot parse heredocs where there are leading whitespace
+ # characters in the heredoc start.
+ # Example: <<~' EOF' or <<-' EOF'
+ # https://bugs.ruby-lang.org/issues/19539
+ except << "heredocs_leading_whitespace.txt"
+ end
+
+ Fixture.each(except: except) do |fixture|
+ define_method(fixture.test_name) { assert_lex(fixture) }
+ end
+
+ def test_lex_file
+ assert_nothing_raised do
+ Prism.lex_file(__FILE__)
+ end
+
+ error = assert_raise Errno::ENOENT do
+ Prism.lex_file("idontexist.rb")
+ end
+
+ assert_equal "No such file or directory - idontexist.rb", error.message
+
+ assert_raise TypeError do
+ Prism.lex_file(nil)
+ end
+ end
+
+ def test_parse_lex
+ node, tokens = Prism.parse_lex("def foo; end").value
+
+ assert_kind_of ProgramNode, node
+ assert_equal 5, tokens.length
+ end
+
+ def test_parse_lex_file
+ node, tokens = Prism.parse_lex_file(__FILE__).value
+
+ assert_kind_of ProgramNode, node
+ refute_empty tokens
+
+ error = assert_raise Errno::ENOENT do
+ Prism.parse_lex_file("idontexist.rb")
+ end
+
+ assert_equal "No such file or directory - idontexist.rb", error.message
+
+ assert_raise TypeError do
+ Prism.parse_lex_file(nil)
+ end
+ end
+
+ private
+
+ def assert_lex(fixture)
+ source = fixture.read
+
+ result = Prism.lex_compat(source)
+ assert_equal [], result.errors
+
+ Prism.lex_ripper(source).zip(result.value).each do |(ripper, prism)|
+ assert_equal ripper, prism
+ end
+ end
+ end
+end
diff --git a/test/prism/library_symbols_test.rb b/test/prism/library_symbols_test.rb
index b10a367c18..44f225478b 100644
--- a/test/prism/library_symbols_test.rb
+++ b/test/prism/library_symbols_test.rb
@@ -3,8 +3,6 @@
require_relative "test_helper"
return if RUBY_PLATFORM !~ /linux/
-
-# TODO: determine why these symbols are incorrect on ppc64le
return if RUBY_PLATFORM =~ /powerpc64le/
module Prism
diff --git a/test/prism/locals_test.rb b/test/prism/locals_test.rb
index 0e57a9a80c..27fdfc90ef 100644
--- a/test/prism/locals_test.rb
+++ b/test/prism/locals_test.rb
@@ -17,14 +17,14 @@ require_relative "test_helper"
module Prism
class LocalsTest < TestCase
- base = File.join(__dir__, "fixtures")
- Dir["**/*.txt", base: base].each do |relative|
+ except = [
# Skip this fixture because it has a different number of locals because
# CRuby is eliminating dead code.
- next if relative == "whitequark/ruby_bug_10653.txt"
+ "whitequark/ruby_bug_10653.txt"
+ ]
- filepath = File.join(base, relative)
- define_method("test_#{relative}") { assert_locals(filepath) }
+ Fixture.each(except: except) do |fixture|
+ define_method(fixture.test_name) { assert_locals(fixture) }
end
def setup
@@ -38,8 +38,8 @@ module Prism
private
- def assert_locals(filepath)
- source = File.read(filepath)
+ def assert_locals(fixture)
+ source = fixture.read
expected = cruby_locals(source)
actual = prism_locals(source)
@@ -47,14 +47,6 @@ module Prism
assert_equal(expected, actual)
end
- def ignore_warnings
- previous_verbosity = $VERBOSE
- $VERBOSE = nil
- yield
- ensure
- $VERBOSE = previous_verbosity
- end
-
# A wrapper around a RubyVM::InstructionSequence that provides a more
# convenient interface for accessing parts of the iseq.
class ISeq
@@ -104,35 +96,29 @@ module Prism
# For the given source, compiles with CRuby and returns a list of all of the
# sets of local variables that were encountered.
def cruby_locals(source)
- verbose, $VERBOSE = $VERBOSE, nil
-
- begin
- locals = [] #: Array[Array[Symbol | Integer]]
- stack = [ISeq.new(RubyVM::InstructionSequence.compile(source).to_a)]
-
- while (iseq = stack.pop)
- names = [*iseq.local_table]
- names.map!.with_index do |name, index|
- # When an anonymous local variable is present in the iseq's local
- # table, it is represented as the stack offset from the top.
- # However, when these are dumped to binary and read back in, they
- # are replaced with the symbol :#arg_rest. To consistently handle
- # this, we replace them here with their index.
- if name == :"#arg_rest"
- names.length - index + 1
- else
- name
- end
+ locals = [] #: Array[Array[Symbol | Integer]]
+ stack = [ISeq.new(ignore_warnings { RubyVM::InstructionSequence.compile(source) }.to_a)]
+
+ while (iseq = stack.pop)
+ names = [*iseq.local_table]
+ names.map!.with_index do |name, index|
+ # When an anonymous local variable is present in the iseq's local
+ # table, it is represented as the stack offset from the top.
+ # However, when these are dumped to binary and read back in, they
+ # are replaced with the symbol :#arg_rest. To consistently handle
+ # this, we replace them here with their index.
+ if name == :"#arg_rest"
+ names.length - index + 1
+ else
+ name
end
-
- locals << names
- iseq.each_child { |child| stack << child }
end
- locals
- ensure
- $VERBOSE = verbose
+ locals << names
+ iseq.each_child { |child| stack << child }
end
+
+ locals
end
# For the given source, parses with prism and returns a list of all of the
diff --git a/test/prism/magic_comment_test.rb b/test/prism/magic_comment_test.rb
index 9e2e92af92..14653fb0f8 100644
--- a/test/prism/magic_comment_test.rb
+++ b/test/prism/magic_comment_test.rb
@@ -2,32 +2,109 @@
require_relative "test_helper"
-return if RUBY_ENGINE != "ruby"
-
module Prism
class MagicCommentTest < TestCase
- examples = [
- "# encoding: ascii",
- "# coding: ascii",
- "# eNcOdInG: ascii",
- "# CoDiNg: ascii",
- "# \s\t\v encoding \s\t\v : \s\t\v ascii \s\t\v",
- "# -*- encoding: ascii -*-",
- "# -*- coding: ascii -*-",
- "# -*- eNcOdInG: ascii -*-",
- "# -*- CoDiNg: ascii -*-",
- "# -*- \s\t\v encoding \s\t\v : \s\t\v ascii \s\t\v -*-",
- "# -*- foo: bar; encoding: ascii -*-",
- "# coding \t \r \v : \t \v \r ascii-8bit",
- "# vim: filetype=ruby, fileencoding=windows-31j, tabsize=3, shiftwidth=3"
- ]
-
- examples.each.with_index(1) do |example, index|
- define_method(:"test_magic_comment_#{index}") do
- expected = RubyVM::InstructionSequence.compile(%Q{#{example}\n""}).eval.encoding
- actual = Prism.parse(example).encoding
+ if RUBY_ENGINE == "ruby"
+ class MagicCommentRipper < Ripper
+ attr_reader :magic_comments
+
+ def initialize(*)
+ super
+ @magic_comments = []
+ end
+
+ def on_magic_comment(key, value)
+ @magic_comments << [key, value]
+ super
+ end
+ end
+
+ Fixture.each do |fixture|
+ define_method(fixture.test_name) { assert_magic_comments(fixture) }
+ end
+ end
+
+ def test_encoding
+ assert_magic_encoding(Encoding::US_ASCII, "# encoding: ascii")
+ end
+
+ def test_coding
+ assert_magic_encoding(Encoding::US_ASCII, "# coding: ascii")
+ end
+
+ def test_eNcOdInG
+ assert_magic_encoding(Encoding::US_ASCII, "# eNcOdInG: ascii")
+ end
+
+ def test_CoDiNg
+ assert_magic_encoding(Encoding::US_ASCII, "# CoDiNg: ascii")
+ end
+
+ def test_encoding_whitespace
+ assert_magic_encoding(Encoding::US_ASCII, "# \s\t\v encoding \s\t\v : \s\t\v ascii \s\t\v")
+ end
+
+ def test_emacs_encoding
+ assert_magic_encoding(Encoding::US_ASCII, "# -*- encoding: ascii -*-")
+ end
+
+ def test_emacs_coding
+ assert_magic_encoding(Encoding::US_ASCII, "# -*- coding: ascii -*-")
+ end
+
+ def test_emacs_eNcOdInG
+ assert_magic_encoding(Encoding::US_ASCII, "# -*- eNcOdInG: ascii -*-")
+ end
+
+ def test_emacs_CoDiNg
+ assert_magic_encoding(Encoding::US_ASCII, "# -*- CoDiNg: ascii -*-")
+ end
+
+ def test_emacs_whitespace
+ assert_magic_encoding(Encoding::US_ASCII, "# -*- \s\t\v encoding \s\t\v : \s\t\v ascii \s\t\v -*-")
+ end
+
+ def test_emacs_multiple
+ assert_magic_encoding(Encoding::US_ASCII, "# -*- foo: bar; encoding: ascii -*-")
+ end
+
+ def test_coding_whitespace
+ assert_magic_encoding(Encoding::ASCII_8BIT, "# coding \t \r \v : \t \v \r ascii-8bit")
+ end
+
+ def test_vim
+ assert_magic_encoding(Encoding::Windows_31J, "# vim: filetype=ruby, fileencoding=windows-31j, tabsize=3, shiftwidth=3")
+ end
+
+ private
+
+ def assert_magic_encoding(expected, line)
+ source = %Q{#{line}\n""}
+ actual = Prism.parse(source).encoding
+
+ # Compare against our expectation.
+ assert_equal expected, actual
+
+ # Compare against Ruby's expectation.
+ if defined?(RubyVM::InstructionSequence)
+ expected = RubyVM::InstructionSequence.compile(source).eval.encoding
assert_equal expected, actual
end
end
+
+ def assert_magic_comments(fixture)
+ source = fixture.read
+
+ # Check that we get the correct number of magic comments when lexing with
+ # ripper.
+ expected = MagicCommentRipper.new(source).tap(&:parse).magic_comments
+ actual = Prism.parse(source).magic_comments
+
+ assert_equal expected.length, actual.length
+ expected.zip(actual).each do |(expected_key, expected_value), magic_comment|
+ assert_equal expected_key, magic_comment.key
+ assert_equal expected_value, magic_comment.value
+ end
+ end
end
end
diff --git a/test/prism/newline_offsets_test.rb b/test/prism/newline_offsets_test.rb
new file mode 100644
index 0000000000..99b808b1df
--- /dev/null
+++ b/test/prism/newline_offsets_test.rb
@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+
+require_relative "test_helper"
+
+module Prism
+ class NewlineOffsetsTest < TestCase
+ Fixture.each do |fixture|
+ define_method(fixture.test_name) { assert_newline_offsets(fixture) }
+ end
+
+ private
+
+ def assert_newline_offsets(fixture)
+ source = fixture.read
+
+ expected = [0]
+ source.b.scan("\n") { expected << $~.offset(0)[0] + 1 }
+
+ assert_equal expected, Prism.parse(source).source.offsets
+ end
+ end
+end
diff --git a/test/prism/newline_test.rb b/test/prism/newline_test.rb
index 75593d34bf..03d7df4c97 100644
--- a/test/prism/newline_test.rb
+++ b/test/prism/newline_test.rb
@@ -6,20 +6,23 @@ return unless defined?(RubyVM::InstructionSequence)
module Prism
class NewlineTest < TestCase
- base = File.expand_path("../", __FILE__)
- filepaths = Dir["*.rb", base: base] - %w[
- encoding_test.rb
+ skips = %w[
errors_test.rb
locals_test.rb
- parser_test.rb
regexp_test.rb
- static_literals_test.rb
+ test_helper.rb
unescape_test.rb
- warnings_test.rb
+ encoding/regular_expression_encoding_test.rb
+ encoding/string_encoding_test.rb
+ result/static_literals_test.rb
+ result/warnings_test.rb
+ ruby/parser_test.rb
+ ruby/ruby_parser_test.rb
]
- filepaths.each do |relative|
- define_method("test_newline_flags_#{relative}") do
+ base = __dir__
+ (Dir["{,api/,encoding/,result/,ruby/}*.rb", base: base] - skips).each do |relative|
+ define_method(:"test_#{relative}") do
assert_newlines(base, relative)
end
end
@@ -65,14 +68,6 @@ module Prism
assert_equal expected, actual
end
- def ignore_warnings
- previous_verbosity = $VERBOSE
- $VERBOSE = nil
- yield
- ensure
- $VERBOSE = previous_verbosity
- end
-
def rubyvm_lines(source)
queue = [ignore_warnings { RubyVM::InstructionSequence.compile(source) }]
lines = []
diff --git a/test/prism/parse_test.rb b/test/prism/parse_test.rb
deleted file mode 100644
index 5c66caebb9..0000000000
--- a/test/prism/parse_test.rb
+++ /dev/null
@@ -1,371 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "test_helper"
-
-module Prism
- class ParseTest < TestCase
- # A subclass of Ripper that extracts out magic comments.
- class MagicCommentRipper < Ripper
- attr_reader :magic_comments
-
- def initialize(*)
- super
- @magic_comments = []
- end
-
- def on_magic_comment(key, value)
- @magic_comments << [key, value]
- super
- end
- end
-
- # When we pretty-print the trees to compare against the snapshots, we want to
- # be certain that we print with the same external encoding. This is because
- # methods like Symbol#inspect take into account external encoding and it could
- # change how the snapshot is generated. On machines with certain settings
- # (like LANG=C or -Eascii-8bit) this could have been changed. So here we're
- # going to force it to be UTF-8 to keep the snapshots consistent.
- def setup
- @previous_default_external = Encoding.default_external
- ignore_warnings { Encoding.default_external = Encoding::UTF_8 }
- end
-
- def teardown
- ignore_warnings { Encoding.default_external = @previous_default_external }
- end
-
- def test_empty_string
- result = Prism.parse("")
- assert_equal [], result.value.statements.body
- end
-
- def test_parse_takes_file_path
- filepath = "filepath.rb"
- result = Prism.parse("def foo; __FILE__; end", filepath: filepath)
-
- assert_equal filepath, find_source_file_node(result.value).filepath
- end
-
- def test_parse_takes_line
- line = 4
- result = Prism.parse("def foo\n __FILE__\nend", line: line)
-
- assert_equal line, result.value.location.start_line
- assert_equal line + 1, find_source_file_node(result.value).location.start_line
-
- result = Prism.parse_lex("def foo\n __FILE__\nend", line: line)
- assert_equal line, result.value.first.location.start_line
- end
-
- def test_parse_takes_negative_lines
- line = -2
- result = Prism.parse("def foo\n __FILE__\nend", line: line)
-
- assert_equal line, result.value.location.start_line
- assert_equal line + 1, find_source_file_node(result.value).location.start_line
-
- result = Prism.parse_lex("def foo\n __FILE__\nend", line: line)
- assert_equal line, result.value.first.location.start_line
- end
-
- def test_parse_lex
- node, tokens = Prism.parse_lex("def foo; end").value
-
- assert_kind_of ProgramNode, node
- assert_equal 5, tokens.length
- end
-
- if !ENV["PRISM_BUILD_MINIMAL"]
- def test_dump_file
- assert_nothing_raised do
- Prism.dump_file(__FILE__)
- end
-
- error = assert_raise Errno::ENOENT do
- Prism.dump_file("idontexist.rb")
- end
-
- assert_equal "No such file or directory - idontexist.rb", error.message
-
- assert_raise TypeError do
- Prism.dump_file(nil)
- end
- end
- end
-
- def test_lex_file
- assert_nothing_raised do
- Prism.lex_file(__FILE__)
- end
-
- error = assert_raise Errno::ENOENT do
- Prism.lex_file("idontexist.rb")
- end
-
- assert_equal "No such file or directory - idontexist.rb", error.message
-
- assert_raise TypeError do
- Prism.lex_file(nil)
- end
- end
-
- def test_parse_lex_file
- node, tokens = Prism.parse_lex_file(__FILE__).value
-
- assert_kind_of ProgramNode, node
- refute_empty tokens
-
- error = assert_raise Errno::ENOENT do
- Prism.parse_lex_file("idontexist.rb")
- end
-
- assert_equal "No such file or directory - idontexist.rb", error.message
-
- assert_raise TypeError do
- Prism.parse_lex_file(nil)
- end
- end
-
- def test_parse_file
- node = Prism.parse_file(__FILE__).value
- assert_kind_of ProgramNode, node
-
- error = assert_raise Errno::ENOENT do
- Prism.parse_file("idontexist.rb")
- end
-
- assert_equal "No such file or directory - idontexist.rb", error.message
-
- assert_raise TypeError do
- Prism.parse_file(nil)
- end
- end
-
- def test_parse_file_success
- assert_predicate Prism.parse_file_comments(__FILE__), :any?
-
- error = assert_raise Errno::ENOENT do
- Prism.parse_file_comments("idontexist.rb")
- end
-
- assert_equal "No such file or directory - idontexist.rb", error.message
-
- assert_raise TypeError do
- Prism.parse_file_comments(nil)
- end
- end
-
- def test_parse_file_comments
- assert_predicate Prism.parse_file_comments(__FILE__), :any?
-
- error = assert_raise Errno::ENOENT do
- Prism.parse_file_comments("idontexist.rb")
- end
-
- assert_equal "No such file or directory - idontexist.rb", error.message
-
- assert_raise TypeError do
- Prism.parse_file_comments(nil)
- end
- end
-
- # To accurately compare against Ripper, we need to make sure that we're
- # running on CRuby 3.2+.
- ripper_enabled = RUBY_ENGINE == "ruby" && RUBY_VERSION >= "3.2.0"
-
- # The FOCUS environment variable allows you to specify one particular fixture
- # to test, instead of all of them.
- base = File.join(__dir__, "fixtures")
- relatives = ENV["FOCUS"] ? [ENV["FOCUS"]] : Dir["**/*.txt", base: base]
-
- relatives.each do |relative|
- # These fail on TruffleRuby due to a difference in Symbol#inspect: :ζ΅‹θ―• vs :"ζ΅‹θ―•"
- next if RUBY_ENGINE == "truffleruby" and %w[emoji_method_calls.txt seattlerb/bug202.txt seattlerb/magic_encoding_comment.txt].include?(relative)
-
- filepath = File.join(base, relative)
- snapshot = File.expand_path(File.join("snapshots", relative), __dir__)
-
- directory = File.dirname(snapshot)
- FileUtils.mkdir_p(directory) unless File.directory?(directory)
-
- ripper_should_match = ripper_enabled
- check_valid_syntax = RUBY_VERSION >= "3.2.0"
-
- case relative
- when "seattlerb/pct_w_heredoc_interp_nested.txt"
- # This file has changed behavior in Ripper in Ruby 3.3, so we skip it if
- # we're on an earlier version.
- ripper_should_match = false if RUBY_VERSION < "3.3.0"
- when "seattlerb/heredoc_nested.txt", "whitequark/dedenting_heredoc.txt"
- # It seems like there are some oddities with nested heredocs and ripper.
- # Waiting for feedback on https://bugs.ruby-lang.org/issues/19838.
- ripper_should_match = false
- when "spanning_heredoc.txt", "spanning_heredoc_newlines.txt"
- # Ripper seems to have a bug that the regex portions before and after
- # the heredoc are combined into a single token. See
- # https://bugs.ruby-lang.org/issues/19838.
- ripper_should_match = false
- when "heredocs_leading_whitespace.txt"
- # Ruby < 3.3.0 cannot parse heredocs where there are leading whitespace
- # characters in the heredoc start.
- # Example: <<~' EOF' or <<-' EOF'
- # https://bugs.ruby-lang.org/issues/19539
- if RUBY_VERSION < "3.3.0"
- ripper_should_match = false
- check_valid_syntax = false
- end
- end
-
- define_method "test_filepath_#{relative}" do
- # First, read the source from the filepath. Use binmode to avoid
- # converting CRLF on Windows, and explicitly set the external encoding
- # to UTF-8 to override the binmode default.
- source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
-
- # Make sure that the given source is valid syntax, otherwise we have an
- # invalid fixture.
- assert_valid_syntax(source) if check_valid_syntax
-
- # Next, assert that there were no errors during parsing.
- result = Prism.parse(source, filepath: relative)
- assert_empty result.errors
-
- # Next, pretty print the source.
- printed = PP.pp(result.value, +"", 79)
-
- if File.exist?(snapshot)
- saved = File.read(snapshot)
-
- # If the snapshot file exists, but the printed value does not match the
- # snapshot, then update the snapshot file.
- if printed != saved
- File.write(snapshot, printed)
- warn("Updated snapshot at #{snapshot}.")
- end
-
- # If the snapshot file exists, then assert that the printed value
- # matches the snapshot.
- assert_equal(saved, printed)
- else
- # If the snapshot file does not yet exist, then write it out now.
- File.write(snapshot, printed)
- warn("Created snapshot at #{snapshot}.")
- end
-
- if !ENV["PRISM_BUILD_MINIMAL"]
- # Next, assert that the value can be serialized and deserialized
- # without changing the shape of the tree.
- assert_equal_nodes(result.value, Prism.load(source, Prism.dump(source, filepath: relative)).value)
- end
-
- # Next, check that the location ranges of each node in the tree are a
- # superset of their respective child nodes.
- assert_non_overlapping_locations(result.value)
-
- # Next, assert that the newlines are in the expected places.
- expected_newlines = [0]
- source.b.scan("\n") { expected_newlines << $~.offset(0)[0] + 1 }
- assert_equal expected_newlines, Prism.parse(source).source.offsets
-
- if ripper_should_match
- # Finally, assert that we can lex the source and get the same tokens as
- # Ripper.
- lex_result = Prism.lex_compat(source)
- assert_equal [], lex_result.errors
- tokens = lex_result.value
-
- begin
- Prism.lex_ripper(source).zip(tokens).each do |(ripper, prism)|
- assert_equal ripper, prism
- end
- rescue SyntaxError
- raise ArgumentError, "Test file has invalid syntax #{filepath}"
- end
-
- # Next, check that we get the correct number of magic comments when
- # lexing with ripper.
- expected = MagicCommentRipper.new(source).tap(&:parse).magic_comments
- actual = result.magic_comments
-
- assert_equal expected.length, actual.length
- expected.zip(actual).each do |(expected_key, expected_value), magic_comment|
- assert_equal expected_key, magic_comment.key
- assert_equal expected_value, magic_comment.value
- end
- end
- end
- end
-
- Dir["*.txt", base: base].each do |relative|
- next if relative == "newline_terminated.txt" || relative == "spanning_heredoc_newlines.txt"
-
- # We test every snippet (separated by \n\n) in isolation
- # to ensure the parser does not try to read bytes further than the end of each snippet
- define_method "test_individual_snippets_#{relative}" do
- filepath = File.join(base, relative)
-
- # First, read the source from the filepath. Use binmode to avoid converting CRLF on Windows,
- # and explicitly set the external encoding to UTF-8 to override the binmode default.
- file_contents = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
-
- file_contents.split(/(?<=\S)\n\n(?=\S)/).each do |snippet|
- snippet = snippet.rstrip
- result = Prism.parse(snippet, filepath: relative)
- assert_empty result.errors
-
- if !ENV["PRISM_BUILD_MINIMAL"]
- assert_equal_nodes(result.value, Prism.load(snippet, Prism.dump(snippet, filepath: relative)).value)
- end
- end
- end
- end
-
- private
-
- # Check that the location ranges of each node in the tree are a superset of
- # their respective child nodes.
- def assert_non_overlapping_locations(node)
- queue = [node]
-
- while (current = queue.shift)
- # We only want to compare parent/child location overlap in the case that
- # we are not looking at a heredoc. That's because heredoc locations are
- # special in that they only use the declaration of the heredoc.
- compare = !(current.is_a?(StringNode) ||
- current.is_a?(XStringNode) ||
- current.is_a?(InterpolatedStringNode) ||
- current.is_a?(InterpolatedXStringNode)) ||
- !current.opening&.start_with?("<<")
-
- current.child_nodes.each do |child|
- # child_nodes can return nil values, so we need to skip those.
- next unless child
-
- # Now that we know we have a child node, add that to the queue.
- queue << child
-
- if compare
- assert_operator current.location.start_offset, :<=, child.location.start_offset
- assert_operator current.location.end_offset, :>=, child.location.end_offset
- end
- end
- end
- end
-
- def find_source_file_node(program)
- queue = [program]
- while (node = queue.shift)
- return node if node.is_a?(SourceFileNode)
- queue.concat(node.compact_child_nodes)
- end
- end
-
- def ignore_warnings
- previous_verbosity = $VERBOSE
- $VERBOSE = nil
- yield
- ensure
- $VERBOSE = previous_verbosity
- end
- end
-end
diff --git a/test/prism/parser_test.rb b/test/prism/parser_test.rb
deleted file mode 100644
index 79b65cf75b..0000000000
--- a/test/prism/parser_test.rb
+++ /dev/null
@@ -1,186 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "test_helper"
-
-begin
- verbose, $VERBOSE = $VERBOSE, nil
- require "parser/ruby33"
- require "prism/translation/parser33"
-rescue LoadError
- # In CRuby's CI, we're not going to test against the parser gem because we
- # don't want to have to install it. So in this case we'll just skip this test.
- return
-ensure
- $VERBOSE = verbose
-end
-
-# First, opt in to every AST feature.
-Parser::Builders::Default.modernize
-
-# Modify the source map == check so that it doesn't check against the node
-# itself so we don't get into a recursive loop.
-Parser::Source::Map.prepend(
- Module.new {
- def ==(other)
- self.class == other.class &&
- (instance_variables - %i[@node]).map do |ivar|
- instance_variable_get(ivar) == other.instance_variable_get(ivar)
- end.reduce(:&)
- end
- }
-)
-
-# Next, ensure that we're comparing the nodes and also comparing the source
-# ranges so that we're getting all of the necessary information.
-Parser::AST::Node.prepend(
- Module.new {
- def ==(other)
- super && (location == other.location)
- end
- }
-)
-
-module Prism
- class ParserTest < TestCase
- base = File.join(__dir__, "fixtures")
-
- # These files are erroring because of the parser gem being wrong.
- skip_incorrect = [
- "embdoc_no_newline_at_end.txt"
- ]
-
- # These files are either failing to parse or failing to translate, so we'll
- # skip them for now.
- skip_all = skip_incorrect | [
- "dash_heredocs.txt",
- "dos_endings.txt",
- "heredocs_with_ignored_newlines.txt",
- "regex.txt",
- "regex_char_width.txt",
- "spanning_heredoc.txt",
- "spanning_heredoc_newlines.txt",
- "unescaping.txt"
- ]
-
- # Not sure why these files are failing on JRuby, but skipping them for now.
- if RUBY_ENGINE == "jruby"
- skip_all.push("emoji_method_calls.txt", "symbols.txt")
- end
-
- # These files are failing to translate their lexer output into the lexer
- # output expected by the parser gem, so we'll skip them for now.
- skip_tokens = [
- "comments.txt",
- "heredoc_with_comment.txt",
- "indented_file_end.txt",
- "methods.txt",
- "strings.txt",
- "tilde_heredocs.txt",
- "xstring_with_backslash.txt"
- ]
-
- Dir["*.txt", base: base].each do |name|
- next if skip_all.include?(name)
-
- define_method("test_#{name}") do
- assert_equal_parses(File.join(base, name), compare_tokens: !skip_tokens.include?(name))
- end
- end
-
- private
-
- def assert_equal_parses(filepath, compare_tokens: true)
- buffer = Parser::Source::Buffer.new(filepath, 1)
- buffer.source = File.read(filepath)
-
- parser = Parser::Ruby33.new
- parser.diagnostics.consumer = ->(*) {}
- parser.diagnostics.all_errors_are_fatal = true
-
- expected_ast, expected_comments, expected_tokens =
- begin
- parser.tokenize(buffer)
- rescue ArgumentError, Parser::SyntaxError
- return
- end
-
- actual_ast, actual_comments, actual_tokens =
- Prism::Translation::Parser33.new.tokenize(buffer)
-
- assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }
- assert_equal_tokens(expected_tokens, actual_tokens) if compare_tokens
- assert_equal_comments(expected_comments, actual_comments)
- end
-
- def assert_equal_asts_message(expected_ast, actual_ast)
- queue = [[expected_ast, actual_ast]]
-
- while (left, right = queue.shift)
- if left.type != right.type
- return "expected: #{left.type}\nactual: #{right.type}"
- end
-
- if left.location != right.location
- return "expected:\n#{left.inspect}\n#{left.location.inspect}\nactual:\n#{right.inspect}\n#{right.location.inspect}"
- end
-
- if left.type == :str && left.children[0] != right.children[0]
- return "expected: #{left.inspect}\nactual: #{right.inspect}"
- end
-
- left.children.zip(right.children).each do |left_child, right_child|
- queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node)
- end
- end
-
- "expected: #{expected_ast.inspect}\nactual: #{actual_ast.inspect}"
- end
-
- def assert_equal_tokens(expected_tokens, actual_tokens)
- if expected_tokens != actual_tokens
- expected_index = 0
- actual_index = 0
-
- while expected_index < expected_tokens.length
- expected_token = expected_tokens[expected_index]
- actual_token = actual_tokens[actual_index]
-
- expected_index += 1
- actual_index += 1
-
- # The parser gem always has a space before a string end in list
- # literals, but we don't. So we'll skip over the space.
- if expected_token[0] == :tSPACE && actual_token[0] == :tSTRING_END
- expected_index += 1
- next
- end
-
- # There are a lot of tokens that have very specific meaning according
- # to the context of the parser. We don't expose that information in
- # prism, so we need to normalize these tokens a bit.
- case actual_token[0]
- when :kDO
- actual_token[0] = expected_token[0] if %i[kDO_BLOCK kDO_LAMBDA].include?(expected_token[0])
- when :tLPAREN
- actual_token[0] = expected_token[0] if expected_token[0] == :tLPAREN2
- when :tPOW
- actual_token[0] = expected_token[0] if expected_token[0] == :tDSTAR
- end
-
- # Now we can assert that the tokens are actually equal.
- assert_equal expected_token, actual_token, -> {
- "expected: #{expected_token.inspect}\n" \
- "actual: #{actual_token.inspect}"
- }
- end
- end
- end
-
- def assert_equal_comments(expected_comments, actual_comments)
- assert_equal expected_comments, actual_comments, -> {
- "expected: #{expected_comments.inspect}\n" \
- "actual: #{actual_comments.inspect}"
- }
- end
- end
-end
diff --git a/test/prism/regexp_test.rb b/test/prism/regexp_test.rb
index 35be217f79..297020fc72 100644
--- a/test/prism/regexp_test.rb
+++ b/test/prism/regexp_test.rb
@@ -223,12 +223,12 @@ module Prism
def test_last_encoding_option_wins
regex = "/foo/nu"
- option = Prism.parse(regex).value.statements.body.first.options
+ option = Prism.parse_statement(regex).options
assert_equal Regexp::FIXEDENCODING, option
regex = "/foo/un"
- option = Prism.parse(regex).value.statements.body.first.options
+ option = Prism.parse_statement(regex).options
assert_equal Regexp::NOENCODING, option
end
@@ -246,7 +246,7 @@ module Prism
def options(flags)
options =
["/foo/#{flags}", "/foo\#{1}/#{flags}"].map do |source|
- Prism.parse(source).value.statements.body.first.options
+ Prism.parse_statement(source).options
end
# Check that we get the same set of options from both regular expressions
diff --git a/test/prism/attribute_write_test.rb b/test/prism/result/attribute_write_test.rb
index bd83d72da3..8f2e352738 100644
--- a/test/prism/attribute_write_test.rb
+++ b/test/prism/result/attribute_write_test.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class AttributeWriteTest < TestCase
@@ -41,18 +41,14 @@ module Prism
private
- def parse(source)
- Prism.parse(source).value.statements.body.first
- end
-
def assert_attribute_write(source)
- call = parse(source)
+ call = Prism.parse_statement(source)
assert(call.attribute_write?)
assert_equal(1, eval(source))
end
def refute_attribute_write(source)
- call = parse(source)
+ call = Prism.parse_statement(source)
refute(call.attribute_write?)
refute_equal(1, eval(source))
end
diff --git a/test/prism/comments_test.rb b/test/prism/result/comments_test.rb
index 952d03239c..178623a75f 100644
--- a/test/prism/comments_test.rb
+++ b/test/prism/result/comments_test.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class CommentsTest < TestCase
diff --git a/test/prism/constant_path_node_test.rb b/test/prism/result/constant_path_node_test.rb
index dffb55c0ff..75925600ca 100644
--- a/test/prism/constant_path_node_test.rb
+++ b/test/prism/result/constant_path_node_test.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class ConstantPathNodeTest < TestCase
@@ -11,7 +11,7 @@ module Prism
Qux
RUBY
- constant_path = Prism.parse(source).value.statements.body.first
+ constant_path = Prism.parse_statement(source)
assert_equal("Foo::Bar::Baz::Qux", constant_path.full_name)
end
@@ -22,7 +22,7 @@ module Prism
Qux
RUBY
- constant_path = Prism.parse(source).value.statements.body.first
+ constant_path = Prism.parse_statement(source)
assert_raise(ConstantPathNode::DynamicPartsInConstantPathError) do
constant_path.full_name
end
@@ -35,7 +35,7 @@ module Prism
Qux
RUBY
- constant_path = Prism.parse(source).value.statements.body.first
+ constant_path = Prism.parse_statement(source)
assert_raise(ConstantPathNode::DynamicPartsInConstantPathError) do
constant_path.full_name
@@ -49,7 +49,7 @@ module Prism
Qux, Something = [1, 2]
RUBY
- node = Prism.parse(source).value.statements.body.first
+ node = Prism.parse_statement(source)
assert_equal("Foo::Bar::Baz::Qux", node.lefts.first.full_name)
end
@@ -60,7 +60,7 @@ module Prism
Qux, Something = [1, 2]
RUBY
- node = Prism.parse(source).value.statements.body.first
+ node = Prism.parse_statement(source)
assert_equal("::Foo::Bar::Baz::Qux", node.lefts.first.full_name)
end
@@ -69,7 +69,7 @@ module Prism
self::Foo, Bar = [1, 2]
RUBY
- constant_target = Prism.parse(source).value.statements.body.first
+ constant_target = Prism.parse_statement(source)
dynamic, static = constant_target.lefts
assert_raise(ConstantPathNode::DynamicPartsInConstantPathError) do
@@ -84,7 +84,7 @@ module Prism
Bar
RUBY
- constant = Prism.parse(source).value.statements.body.first
+ constant = Prism.parse_statement(source)
assert_equal("Bar", constant.full_name)
end
end
diff --git a/test/prism/result/equality_test.rb b/test/prism/result/equality_test.rb
new file mode 100644
index 0000000000..4f6e665a88
--- /dev/null
+++ b/test/prism/result/equality_test.rb
@@ -0,0 +1,22 @@
+# frozen_string_literal: true
+
+require_relative "../test_helper"
+
+module Prism
+ class EqualityTest < TestCase
+ def test_equality
+ assert_operator Prism.parse_statement("1"), :===, Prism.parse_statement("1")
+ assert_operator Prism.parse("1").value, :===, Prism.parse("1").value
+
+ complex_source = "class Something; @var = something.else { _1 }; end"
+ assert_operator Prism.parse_statement(complex_source), :===, Prism.parse_statement(complex_source)
+
+ refute_operator Prism.parse_statement("1"), :===, Prism.parse_statement("2")
+ refute_operator Prism.parse_statement("1"), :===, Prism.parse_statement("0x1")
+
+ complex_source_1 = "class Something; @var = something.else { _1 }; end"
+ complex_source_2 = "class Something; @var = something.else { _2 }; end"
+ refute_operator Prism.parse_statement(complex_source_1), :===, Prism.parse_statement(complex_source_2)
+ end
+ end
+end
diff --git a/test/prism/result/heredoc_test.rb b/test/prism/result/heredoc_test.rb
new file mode 100644
index 0000000000..7913c04a88
--- /dev/null
+++ b/test/prism/result/heredoc_test.rb
@@ -0,0 +1,19 @@
+# frozen_string_literal: true
+
+require_relative "../test_helper"
+
+module Prism
+ class HeredocTest < TestCase
+ def test_heredoc?
+ refute Prism.parse_statement("\"foo\"").heredoc?
+ refute Prism.parse_statement("\"foo \#{1}\"").heredoc?
+ refute Prism.parse_statement("`foo`").heredoc?
+ refute Prism.parse_statement("`foo \#{1}`").heredoc?
+
+ assert Prism.parse_statement("<<~HERE\nfoo\nHERE\n").heredoc?
+ assert Prism.parse_statement("<<~HERE\nfoo \#{1}\nHERE\n").heredoc?
+ assert Prism.parse_statement("<<~`HERE`\nfoo\nHERE\n").heredoc?
+ assert Prism.parse_statement("<<~`HERE`\nfoo \#{1}\nHERE\n").heredoc?
+ end
+ end
+end
diff --git a/test/prism/index_write_test.rb b/test/prism/result/index_write_test.rb
index cf90eb082f..0d5383b601 100644
--- a/test/prism/index_write_test.rb
+++ b/test/prism/result/index_write_test.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class IndexWriteTest < TestCase
diff --git a/test/prism/result/integer_base_flags_test.rb b/test/prism/result/integer_base_flags_test.rb
new file mode 100644
index 0000000000..ef15fb437c
--- /dev/null
+++ b/test/prism/result/integer_base_flags_test.rb
@@ -0,0 +1,33 @@
+# frozen_string_literal: true
+
+require_relative "../test_helper"
+
+module Prism
+ class IntegerBaseFlagsTest < TestCase
+ # Through some bit hackery, we want to allow consumers to use the integer
+ # base flags as the base itself. It has a nice property that the current
+ # alignment provides them in the correct order. So here we test that our
+ # assumption holds so that it doesn't change out from under us.
+ #
+ # In C, this would look something like:
+ #
+ # ((flags & ~DECIMAL) << 1) || 10
+ #
+ # We have to do some other work in Ruby because 0 is truthy and ~ on an
+ # integer doesn't have a fixed width.
+ def test_flags
+ assert_equal 2, base("0b1")
+ assert_equal 8, base("0o1")
+ assert_equal 10, base("0d1")
+ assert_equal 16, base("0x1")
+ end
+
+ private
+
+ def base(source)
+ node = Prism.parse_statement(source)
+ value = (node.send(:flags) & (0b1111 - IntegerBaseFlags::DECIMAL)) << 1
+ value == 0 ? 10 : value
+ end
+ end
+end
diff --git a/test/prism/integer_parse_test.rb b/test/prism/result/integer_parse_test.rb
index 11aee174c5..7b5ce98bb6 100644
--- a/test/prism/integer_parse_test.rb
+++ b/test/prism/result/integer_parse_test.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class IntegerParseTest < TestCase
@@ -35,7 +35,7 @@ module Prism
private
def assert_integer_parse(expected, source = expected.to_s)
- assert_equal expected, Prism.parse(source).value.statements.body.first.value
+ assert_equal expected, Prism.parse_statement(source).value
end
end
end
diff --git a/test/prism/result/numeric_value_test.rb b/test/prism/result/numeric_value_test.rb
new file mode 100644
index 0000000000..5c89230a1f
--- /dev/null
+++ b/test/prism/result/numeric_value_test.rb
@@ -0,0 +1,21 @@
+# frozen_string_literal: true
+
+require_relative "../test_helper"
+
+module Prism
+ class NumericValueTest < TestCase
+ def test_numeric_value
+ assert_equal 123, Prism.parse_statement("123").value
+ assert_equal 3.14, Prism.parse_statement("3.14").value
+ assert_equal 42i, Prism.parse_statement("42i").value
+ assert_equal 42.1ri, Prism.parse_statement("42.1ri").value
+ assert_equal 3.14i, Prism.parse_statement("3.14i").value
+ assert_equal 42r, Prism.parse_statement("42r").value
+ assert_equal 0.5r, Prism.parse_statement("0.5r").value
+ assert_equal 42ri, Prism.parse_statement("42ri").value
+ assert_equal 0.5ri, Prism.parse_statement("0.5ri").value
+ assert_equal 0xFFr, Prism.parse_statement("0xFFr").value
+ assert_equal 0xFFri, Prism.parse_statement("0xFFri").value
+ end
+ end
+end
diff --git a/test/prism/result/overlap_test.rb b/test/prism/result/overlap_test.rb
new file mode 100644
index 0000000000..155bc870d3
--- /dev/null
+++ b/test/prism/result/overlap_test.rb
@@ -0,0 +1,43 @@
+# frozen_string_literal: true
+
+require_relative "../test_helper"
+
+module Prism
+ class OverlapTest < TestCase
+ Fixture.each do |fixture|
+ define_method(fixture.test_name) { assert_overlap(fixture) }
+ end
+
+ private
+
+ # Check that the location ranges of each node in the tree are a superset of
+ # their respective child nodes.
+ def assert_overlap(fixture)
+ queue = [Prism.parse_file(fixture.full_path).value]
+
+ while (current = queue.shift)
+ # We only want to compare parent/child location overlap in the case that
+ # we are not looking at a heredoc. That's because heredoc locations are
+ # special in that they only use the declaration of the heredoc.
+ compare = !(current.is_a?(StringNode) ||
+ current.is_a?(XStringNode) ||
+ current.is_a?(InterpolatedStringNode) ||
+ current.is_a?(InterpolatedXStringNode)) ||
+ !current.opening&.start_with?("<<")
+
+ current.child_nodes.each do |child|
+ # child_nodes can return nil values, so we need to skip those.
+ next unless child
+
+ # Now that we know we have a child node, add that to the queue.
+ queue << child
+
+ if compare
+ assert_operator current.location.start_offset, :<=, child.location.start_offset
+ assert_operator current.location.end_offset, :>=, child.location.end_offset
+ end
+ end
+ end
+ end
+ end
+end
diff --git a/test/prism/redundant_return_test.rb b/test/prism/result/redundant_return_test.rb
index c668169245..3b20aeba00 100644
--- a/test/prism/redundant_return_test.rb
+++ b/test/prism/result/redundant_return_test.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class RedundantReturnTest < TestCase
diff --git a/test/prism/result/regular_expression_options_test.rb b/test/prism/result/regular_expression_options_test.rb
new file mode 100644
index 0000000000..ff6e20526f
--- /dev/null
+++ b/test/prism/result/regular_expression_options_test.rb
@@ -0,0 +1,25 @@
+# frozen_string_literal: true
+
+require_relative "../test_helper"
+
+module Prism
+ class RegularExpressionOptionsTest < TestCase
+ def test_options
+ assert_equal "", Prism.parse_statement("__FILE__").filepath
+ assert_equal "foo.rb", Prism.parse_statement("__FILE__", filepath: "foo.rb").filepath
+
+ assert_equal 1, Prism.parse_statement("foo").location.start_line
+ assert_equal 10, Prism.parse_statement("foo", line: 10).location.start_line
+
+ refute Prism.parse_statement("\"foo\"").frozen?
+ assert Prism.parse_statement("\"foo\"", frozen_string_literal: true).frozen?
+ refute Prism.parse_statement("\"foo\"", frozen_string_literal: false).frozen?
+
+ assert_kind_of CallNode, Prism.parse_statement("foo")
+ assert_kind_of LocalVariableReadNode, Prism.parse_statement("foo", scopes: [[:foo]])
+ assert_equal 1, Prism.parse_statement("foo", scopes: [[:foo], []]).depth
+
+ assert_equal [:foo], Prism.parse("foo", scopes: [[:foo]]).value.locals
+ end
+ end
+end
diff --git a/test/prism/location_test.rb b/test/prism/result/source_location_test.rb
index 256e5b41e4..ca74b36e6f 100644
--- a/test/prism/location_test.rb
+++ b/test/prism/result/source_location_test.rb
@@ -1,9 +1,9 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
- class LocationTest < TestCase
+ class SourceLocationTest < TestCase
def test_AliasGlobalVariableNode
assert_location(AliasGlobalVariableNode, "alias $foo $bar")
end
@@ -921,7 +921,7 @@ module Prism
def test_all_tested
expected = Prism.constants.grep(/.Node$/).sort - %i[MissingNode ProgramNode]
- actual = LocationTest.instance_methods(false).grep(/.Node$/).map { |name| name[5..].to_sym }.sort
+ actual = SourceLocationTest.instance_methods(false).grep(/.Node$/).map { |name| name[5..].to_sym }.sort
assert_equal expected, actual
end
diff --git a/test/prism/static_inspect_test.rb b/test/prism/result/static_inspect_test.rb
index cc8ed28c95..cf8cef3298 100644
--- a/test/prism/static_inspect_test.rb
+++ b/test/prism/result/static_inspect_test.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class StaticInspectTest < TestCase
diff --git a/test/prism/static_literals_test.rb b/test/prism/result/static_literals_test.rb
index 31c802bf90..dcfc692897 100644
--- a/test/prism/static_literals_test.rb
+++ b/test/prism/result/static_literals_test.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class StaticLiteralsTest < TestCase
diff --git a/test/prism/warnings_test.rb b/test/prism/result/warnings_test.rb
index 7ad704918a..ea062d4221 100644
--- a/test/prism/warnings_test.rb
+++ b/test/prism/result/warnings_test.rb
@@ -2,8 +2,7 @@
return if RUBY_VERSION < "3.1"
-require_relative "test_helper"
-require "stringio"
+require_relative "../test_helper"
module Prism
class WarningsTest < TestCase
diff --git a/test/prism/compiler_test.rb b/test/prism/ruby/compiler_test.rb
index 9a326eb8d6..35ccfd5950 100644
--- a/test/prism/compiler_test.rb
+++ b/test/prism/ruby/compiler_test.rb
@@ -1,7 +1,7 @@
# frozen_string_literal: true
# typed: ignore
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class CompilerTest < TestCase
diff --git a/test/prism/desugar_compiler_test.rb b/test/prism/ruby/desugar_compiler_test.rb
index 1a1d580d2d..fe9a25e030 100644
--- a/test/prism/desugar_compiler_test.rb
+++ b/test/prism/ruby/desugar_compiler_test.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class DesugarCompilerTest < TestCase
diff --git a/test/prism/dispatcher_test.rb b/test/prism/ruby/dispatcher_test.rb
index 0d8a6d35e9..1b6d7f4117 100644
--- a/test/prism/dispatcher_test.rb
+++ b/test/prism/ruby/dispatcher_test.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class DispatcherTest < TestCase
diff --git a/test/prism/ruby/location_test.rb b/test/prism/ruby/location_test.rb
new file mode 100644
index 0000000000..fc80a5b875
--- /dev/null
+++ b/test/prism/ruby/location_test.rb
@@ -0,0 +1,173 @@
+# frozen_string_literal: true
+
+require_relative "../test_helper"
+
+module Prism
+ class LocationTest < TestCase
+ def test_join
+ call = Prism.parse_statement("1234 + 567")
+ receiver = call.receiver
+ argument = call.arguments.arguments.first
+
+ joined = receiver.location.join(argument.location)
+ assert_equal 0, joined.start_offset
+ assert_equal 10, joined.length
+
+ assert_raise(RuntimeError, "Incompatible locations") do
+ argument.location.join(receiver.location)
+ end
+
+ other_argument = Prism.parse_statement("1234 + 567").arguments.arguments.first
+
+ assert_raise(RuntimeError, "Incompatible sources") do
+ other_argument.location.join(receiver.location)
+ end
+
+ assert_raise(RuntimeError, "Incompatible sources") do
+ receiver.location.join(other_argument.location)
+ end
+ end
+
+ def test_character_offsets
+ program = Prism.parse("πŸ˜€ + πŸ˜€\n😍 ||= 😍").value
+
+ # first πŸ˜€
+ location = program.statements.body.first.receiver.location
+ assert_equal 0, location.start_character_offset
+ assert_equal 1, location.end_character_offset
+ assert_equal 0, location.start_character_column
+ assert_equal 1, location.end_character_column
+
+ # second πŸ˜€
+ location = program.statements.body.first.arguments.arguments.first.location
+ assert_equal 4, location.start_character_offset
+ assert_equal 5, location.end_character_offset
+ assert_equal 4, location.start_character_column
+ assert_equal 5, location.end_character_column
+
+ # first 😍
+ location = program.statements.body.last.name_loc
+ assert_equal 6, location.start_character_offset
+ assert_equal 7, location.end_character_offset
+ assert_equal 0, location.start_character_column
+ assert_equal 1, location.end_character_column
+
+ # second 😍
+ location = program.statements.body.last.value.location
+ assert_equal 12, location.start_character_offset
+ assert_equal 13, location.end_character_offset
+ assert_equal 6, location.start_character_column
+ assert_equal 7, location.end_character_column
+ end
+
+ def test_code_units
+ program = Prism.parse("πŸ˜€ + πŸ˜€\n😍 ||= 😍").value
+
+ # first πŸ˜€
+ location = program.statements.body.first.receiver.location
+
+ assert_equal 0, location.start_code_units_offset(Encoding::UTF_8)
+ assert_equal 0, location.start_code_units_offset(Encoding::UTF_16LE)
+ assert_equal 0, location.start_code_units_offset(Encoding::UTF_32LE)
+
+ assert_equal 1, location.end_code_units_offset(Encoding::UTF_8)
+ assert_equal 2, location.end_code_units_offset(Encoding::UTF_16LE)
+ assert_equal 1, location.end_code_units_offset(Encoding::UTF_32LE)
+
+ assert_equal 0, location.start_code_units_column(Encoding::UTF_8)
+ assert_equal 0, location.start_code_units_column(Encoding::UTF_16LE)
+ assert_equal 0, location.start_code_units_column(Encoding::UTF_32LE)
+
+ assert_equal 1, location.end_code_units_column(Encoding::UTF_8)
+ assert_equal 2, location.end_code_units_column(Encoding::UTF_16LE)
+ assert_equal 1, location.end_code_units_column(Encoding::UTF_32LE)
+
+ # second πŸ˜€
+ location = program.statements.body.first.arguments.arguments.first.location
+
+ assert_equal 4, location.start_code_units_offset(Encoding::UTF_8)
+ assert_equal 5, location.start_code_units_offset(Encoding::UTF_16LE)
+ assert_equal 4, location.start_code_units_offset(Encoding::UTF_32LE)
+
+ assert_equal 5, location.end_code_units_offset(Encoding::UTF_8)
+ assert_equal 7, location.end_code_units_offset(Encoding::UTF_16LE)
+ assert_equal 5, location.end_code_units_offset(Encoding::UTF_32LE)
+
+ assert_equal 4, location.start_code_units_column(Encoding::UTF_8)
+ assert_equal 5, location.start_code_units_column(Encoding::UTF_16LE)
+ assert_equal 4, location.start_code_units_column(Encoding::UTF_32LE)
+
+ assert_equal 5, location.end_code_units_column(Encoding::UTF_8)
+ assert_equal 7, location.end_code_units_column(Encoding::UTF_16LE)
+ assert_equal 5, location.end_code_units_column(Encoding::UTF_32LE)
+
+ # first 😍
+ location = program.statements.body.last.name_loc
+
+ assert_equal 6, location.start_code_units_offset(Encoding::UTF_8)
+ assert_equal 8, location.start_code_units_offset(Encoding::UTF_16LE)
+ assert_equal 6, location.start_code_units_offset(Encoding::UTF_32LE)
+
+ assert_equal 7, location.end_code_units_offset(Encoding::UTF_8)
+ assert_equal 10, location.end_code_units_offset(Encoding::UTF_16LE)
+ assert_equal 7, location.end_code_units_offset(Encoding::UTF_32LE)
+
+ assert_equal 0, location.start_code_units_column(Encoding::UTF_8)
+ assert_equal 0, location.start_code_units_column(Encoding::UTF_16LE)
+ assert_equal 0, location.start_code_units_column(Encoding::UTF_32LE)
+
+ assert_equal 1, location.end_code_units_column(Encoding::UTF_8)
+ assert_equal 2, location.end_code_units_column(Encoding::UTF_16LE)
+ assert_equal 1, location.end_code_units_column(Encoding::UTF_32LE)
+
+ # second 😍
+ location = program.statements.body.last.value.location
+
+ assert_equal 12, location.start_code_units_offset(Encoding::UTF_8)
+ assert_equal 15, location.start_code_units_offset(Encoding::UTF_16LE)
+ assert_equal 12, location.start_code_units_offset(Encoding::UTF_32LE)
+
+ assert_equal 13, location.end_code_units_offset(Encoding::UTF_8)
+ assert_equal 17, location.end_code_units_offset(Encoding::UTF_16LE)
+ assert_equal 13, location.end_code_units_offset(Encoding::UTF_32LE)
+
+ assert_equal 6, location.start_code_units_column(Encoding::UTF_8)
+ assert_equal 7, location.start_code_units_column(Encoding::UTF_16LE)
+ assert_equal 6, location.start_code_units_column(Encoding::UTF_32LE)
+
+ assert_equal 7, location.end_code_units_column(Encoding::UTF_8)
+ assert_equal 9, location.end_code_units_column(Encoding::UTF_16LE)
+ assert_equal 7, location.end_code_units_column(Encoding::UTF_32LE)
+ end
+
+ def test_chop
+ location = Prism.parse("foo").value.location
+
+ assert_equal "fo", location.chop.slice
+ assert_equal "", location.chop.chop.chop.slice
+
+ # Check that we don't go negative.
+ 10.times { location = location.chop }
+ assert_equal "", location.slice
+ end
+
+ def test_slice_lines
+ method = Prism.parse_statement("\nprivate def foo\nend\n").arguments.arguments.first
+
+ assert_equal "private def foo\nend\n", method.slice_lines
+ end
+
+ def test_adjoin
+ program = Prism.parse("foo.bar = 1").value
+
+ location = program.statements.body.first.message_loc
+ adjoined = location.adjoin("=")
+
+ assert_kind_of Location, adjoined
+ refute_equal location, adjoined
+
+ assert_equal 4, adjoined.start_offset
+ assert_equal 9, adjoined.end_offset
+ end
+ end
+end
diff --git a/test/prism/parameters_signature_test.rb b/test/prism/ruby/parameters_signature_test.rb
index 0eed8d993d..9256bcc070 100644
--- a/test/prism/parameters_signature_test.rb
+++ b/test/prism/ruby/parameters_signature_test.rb
@@ -1,9 +1,9 @@
# frozen_string_literal: true
-require_relative "test_helper"
-
return if RUBY_VERSION < "3.2"
+require_relative "../test_helper"
+
module Prism
class ParametersSignatureTest < TestCase
def test_req
@@ -56,7 +56,6 @@ module Prism
def test_key_ordering
omit("TruffleRuby returns keys in order they were declared") if RUBY_ENGINE == "truffleruby"
-
assert_parameters([[:keyreq, :a], [:keyreq, :b], [:key, :c], [:key, :d]], "a:, c: 1, b:, d: 2")
end
@@ -75,14 +74,13 @@ module Prism
private
def assert_parameters(expected, source)
- eval("def self.m(#{source}); end")
-
- begin
- assert_equal(expected, method(:m).parameters)
- assert_equal(expected, signature(source))
- ensure
- singleton_class.undef_method(:m)
- end
+ # Compare against our expectation.
+ assert_equal(expected, signature(source))
+
+ # Compare against Ruby's expectation.
+ object = Object.new
+ eval("def object.m(#{source}); end")
+ assert_equal(expected, object.method(:m).parameters)
end
def signature(source)
diff --git a/test/prism/ruby/parser_test.rb b/test/prism/ruby/parser_test.rb
new file mode 100644
index 0000000000..a76f193f52
--- /dev/null
+++ b/test/prism/ruby/parser_test.rb
@@ -0,0 +1,288 @@
+# frozen_string_literal: true
+
+require_relative "../test_helper"
+
+begin
+ verbose, $VERBOSE = $VERBOSE, nil
+ require "parser/ruby33"
+ require "prism/translation/parser33"
+rescue LoadError
+ # In CRuby's CI, we're not going to test against the parser gem because we
+ # don't want to have to install it. So in this case we'll just skip this test.
+ return
+ensure
+ $VERBOSE = verbose
+end
+
+# First, opt in to every AST feature.
+Parser::Builders::Default.modernize
+
+# Modify the source map == check so that it doesn't check against the node
+# itself so we don't get into a recursive loop.
+Parser::Source::Map.prepend(
+ Module.new {
+ def ==(other)
+ self.class == other.class &&
+ (instance_variables - %i[@node]).map do |ivar|
+ instance_variable_get(ivar) == other.instance_variable_get(ivar)
+ end.reduce(:&)
+ end
+ }
+)
+
+# Next, ensure that we're comparing the nodes and also comparing the source
+# ranges so that we're getting all of the necessary information.
+Parser::AST::Node.prepend(
+ Module.new {
+ def ==(other)
+ super && (location == other.location)
+ end
+ }
+)
+
+module Prism
+ class ParserTest < TestCase
+ # These files are erroring because of the parser gem being wrong.
+ skip_incorrect = [
+ "embdoc_no_newline_at_end.txt"
+ ]
+
+ # These files are either failing to parse or failing to translate, so we'll
+ # skip them for now.
+ skip_all = skip_incorrect | [
+ "dash_heredocs.txt",
+ "dos_endings.txt",
+ "heredocs_with_ignored_newlines.txt",
+ "regex.txt",
+ "regex_char_width.txt",
+ "spanning_heredoc.txt",
+ "spanning_heredoc_newlines.txt",
+ "unescaping.txt",
+ "seattlerb/backticks_interpolation_line.txt",
+ "seattlerb/block_decomp_anon_splat_arg.txt",
+ "seattlerb/block_decomp_arg_splat_arg.txt",
+ "seattlerb/block_decomp_arg_splat.txt",
+ "seattlerb/block_decomp_splat.txt",
+ "seattlerb/block_paren_splat.txt",
+ "seattlerb/bug190.txt",
+ "seattlerb/case_in_hash_pat_rest_solo.txt",
+ "seattlerb/case_in_hash_pat_rest.txt",
+ "seattlerb/case_in.txt",
+ "seattlerb/heredoc_nested.txt",
+ "seattlerb/heredoc_squiggly_blank_line_plus_interpolation.txt",
+ "seattlerb/heredoc_with_carriage_return_escapes_windows.txt",
+ "seattlerb/heredoc_with_carriage_return_escapes.txt",
+ "seattlerb/heredoc_with_extra_carriage_returns_windows.txt",
+ "seattlerb/heredoc_with_only_carriage_returns_windows.txt",
+ "seattlerb/heredoc_with_only_carriage_returns.txt",
+ "seattlerb/masgn_double_paren.txt",
+ "seattlerb/parse_line_heredoc_hardnewline.txt",
+ "seattlerb/parse_pattern_044.txt",
+ "seattlerb/parse_pattern_058_2.txt",
+ "seattlerb/parse_pattern_058.txt",
+ "seattlerb/pct_nl.txt",
+ "seattlerb/pctW_lineno.txt",
+ "seattlerb/regexp_esc_C_slash.txt",
+ "seattlerb/TestRubyParserShared.txt",
+ "unparser/corpus/literal/assignment.txt",
+ "unparser/corpus/literal/block.txt",
+ "unparser/corpus/literal/def.txt",
+ "unparser/corpus/literal/dstr.txt",
+ "unparser/corpus/literal/literal.txt",
+ "unparser/corpus/literal/pattern.txt",
+ "unparser/corpus/semantic/dstr.txt",
+ "unparser/corpus/semantic/opasgn.txt",
+ "whitequark/dedenting_interpolating_heredoc_fake_line_continuation.txt",
+ "whitequark/masgn_nested.txt",
+ "whitequark/newline_in_hash_argument.txt",
+ "whitequark/parser_bug_640.txt",
+ "whitequark/parser_slash_slash_n_escaping_in_literals.txt",
+ "whitequark/ruby_bug_11989.txt",
+ "whitequark/slash_newline_in_heredocs.txt",
+ "whitequark/unary_num_pow_precedence.txt"
+ ]
+
+ # Not sure why these files are failing on JRuby, but skipping them for now.
+ if RUBY_ENGINE == "jruby"
+ skip_all.push("emoji_method_calls.txt", "symbols.txt")
+ end
+
+ # These files are failing to translate their lexer output into the lexer
+ # output expected by the parser gem, so we'll skip them for now.
+ skip_tokens = [
+ "comments.txt",
+ "heredoc_with_comment.txt",
+ "indented_file_end.txt",
+ "methods.txt",
+ "strings.txt",
+ "tilde_heredocs.txt",
+ "xstring_with_backslash.txt",
+ "seattlerb/bug169.txt",
+ "seattlerb/class_comments.txt",
+ "seattlerb/difficult4__leading_dots2.txt",
+ "seattlerb/difficult6__7.txt",
+ "seattlerb/difficult6__8.txt",
+ "seattlerb/dsym_esc_to_sym.txt",
+ "seattlerb/heredoc__backslash_dos_format.txt",
+ "seattlerb/heredoc_backslash_nl.txt",
+ "seattlerb/heredoc_comma_arg.txt",
+ "seattlerb/heredoc_squiggly_blank_lines.txt",
+ "seattlerb/heredoc_squiggly_interp.txt",
+ "seattlerb/heredoc_squiggly_tabs_extra.txt",
+ "seattlerb/heredoc_squiggly_tabs.txt",
+ "seattlerb/heredoc_squiggly_visually_blank_lines.txt",
+ "seattlerb/heredoc_squiggly.txt",
+ "seattlerb/heredoc_unicode.txt",
+ "seattlerb/heredoc_with_interpolation_and_carriage_return_escapes_windows.txt",
+ "seattlerb/heredoc_with_interpolation_and_carriage_return_escapes.txt",
+ "seattlerb/interpolated_symbol_array_line_breaks.txt",
+ "seattlerb/interpolated_word_array_line_breaks.txt",
+ "seattlerb/label_vs_string.txt",
+ "seattlerb/module_comments.txt",
+ "seattlerb/non_interpolated_symbol_array_line_breaks.txt",
+ "seattlerb/non_interpolated_word_array_line_breaks.txt",
+ "seattlerb/parse_line_block_inline_comment_leading_newlines.txt",
+ "seattlerb/parse_line_block_inline_comment.txt",
+ "seattlerb/parse_line_block_inline_multiline_comment.txt",
+ "seattlerb/parse_line_dstr_escaped_newline.txt",
+ "seattlerb/parse_line_heredoc.txt",
+ "seattlerb/parse_line_multiline_str_literal_n.txt",
+ "seattlerb/parse_line_str_with_newline_escape.txt",
+ "seattlerb/pct_Q_backslash_nl.txt",
+ "seattlerb/pct_w_heredoc_interp_nested.txt",
+ "seattlerb/qsymbols_empty_space.txt",
+ "seattlerb/qw_escape_term.txt",
+ "seattlerb/qWords_space.txt",
+ "seattlerb/read_escape_unicode_curlies.txt",
+ "seattlerb/read_escape_unicode_h4.txt",
+ "seattlerb/required_kwarg_no_value.txt",
+ "seattlerb/slashy_newlines_within_string.txt",
+ "seattlerb/str_double_escaped_newline.txt",
+ "seattlerb/str_double_newline.txt",
+ "seattlerb/str_evstr_escape.txt",
+ "seattlerb/str_newline_hash_line_number.txt",
+ "seattlerb/str_single_newline.txt",
+ "seattlerb/symbol_empty.txt",
+ "seattlerb/symbols_empty_space.txt",
+ "whitequark/args.txt",
+ "whitequark/beginless_erange_after_newline.txt",
+ "whitequark/beginless_irange_after_newline.txt",
+ "whitequark/bug_ascii_8bit_in_literal.txt",
+ "whitequark/bug_def_no_paren_eql_begin.txt",
+ "whitequark/dedenting_heredoc.txt",
+ "whitequark/dedenting_non_interpolating_heredoc_line_continuation.txt",
+ "whitequark/forward_arg_with_open_args.txt",
+ "whitequark/interp_digit_var.txt",
+ "whitequark/lbrace_arg_after_command_args.txt",
+ "whitequark/multiple_pattern_matches.txt",
+ "whitequark/parser_drops_truncated_parts_of_squiggly_heredoc.txt",
+ "whitequark/ruby_bug_11990.txt",
+ "whitequark/ruby_bug_14690.txt",
+ "whitequark/ruby_bug_9669.txt",
+ "whitequark/space_args_arg_block.txt",
+ "whitequark/space_args_block.txt"
+ ]
+
+ Fixture.each(except: skip_all) do |fixture|
+ define_method(fixture.test_name) do
+ assert_equal_parses(fixture, compare_tokens: !skip_tokens.include?(fixture.path))
+ end
+ end
+
+ private
+
+ def assert_equal_parses(fixture, compare_tokens: true)
+ buffer = Parser::Source::Buffer.new(fixture.path, 1)
+ buffer.source = fixture.read
+
+ parser = Parser::Ruby33.new
+ parser.diagnostics.consumer = ->(*) {}
+ parser.diagnostics.all_errors_are_fatal = true
+
+ expected_ast, expected_comments, expected_tokens =
+ begin
+ ignore_warnings { parser.tokenize(buffer) }
+ rescue ArgumentError, Parser::SyntaxError
+ return
+ end
+
+ actual_ast, actual_comments, actual_tokens =
+ ignore_warnings { Prism::Translation::Parser33.new.tokenize(buffer) }
+
+ assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }
+ assert_equal_tokens(expected_tokens, actual_tokens) if compare_tokens
+ assert_equal_comments(expected_comments, actual_comments)
+ end
+
+ def assert_equal_asts_message(expected_ast, actual_ast)
+ queue = [[expected_ast, actual_ast]]
+
+ while (left, right = queue.shift)
+ if left.type != right.type
+ return "expected: #{left.type}\nactual: #{right.type}"
+ end
+
+ if left.location != right.location
+ return "expected:\n#{left.inspect}\n#{left.location.inspect}\nactual:\n#{right.inspect}\n#{right.location.inspect}"
+ end
+
+ if left.type == :str && left.children[0] != right.children[0]
+ return "expected: #{left.inspect}\nactual: #{right.inspect}"
+ end
+
+ left.children.zip(right.children).each do |left_child, right_child|
+ queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node)
+ end
+ end
+
+ "expected: #{expected_ast.inspect}\nactual: #{actual_ast.inspect}"
+ end
+
+ def assert_equal_tokens(expected_tokens, actual_tokens)
+ if expected_tokens != actual_tokens
+ expected_index = 0
+ actual_index = 0
+
+ while expected_index < expected_tokens.length
+ expected_token = expected_tokens[expected_index]
+ actual_token = actual_tokens[actual_index]
+
+ expected_index += 1
+ actual_index += 1
+
+ # The parser gem always has a space before a string end in list
+ # literals, but we don't. So we'll skip over the space.
+ if expected_token[0] == :tSPACE && actual_token[0] == :tSTRING_END
+ expected_index += 1
+ next
+ end
+
+ # There are a lot of tokens that have very specific meaning according
+ # to the context of the parser. We don't expose that information in
+ # prism, so we need to normalize these tokens a bit.
+ case actual_token[0]
+ when :kDO
+ actual_token[0] = expected_token[0] if %i[kDO_BLOCK kDO_LAMBDA].include?(expected_token[0])
+ when :tLPAREN
+ actual_token[0] = expected_token[0] if expected_token[0] == :tLPAREN2
+ when :tPOW
+ actual_token[0] = expected_token[0] if expected_token[0] == :tDSTAR
+ end
+
+ # Now we can assert that the tokens are actually equal.
+ assert_equal expected_token, actual_token, -> {
+ "expected: #{expected_token.inspect}\n" \
+ "actual: #{actual_token.inspect}"
+ }
+ end
+ end
+ end
+
+ def assert_equal_comments(expected_comments, actual_comments)
+ assert_equal expected_comments, actual_comments, -> {
+ "expected: #{expected_comments.inspect}\n" \
+ "actual: #{actual_comments.inspect}"
+ }
+ end
+ end
+end
diff --git a/test/prism/pattern_test.rb b/test/prism/ruby/pattern_test.rb
index e0aa079cb9..23f512fc1c 100644
--- a/test/prism/pattern_test.rb
+++ b/test/prism/ruby/pattern_test.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class PatternTest < TestCase
diff --git a/test/prism/reflection_test.rb b/test/prism/ruby/reflection_test.rb
index 869b68b1f8..3ac462e1ac 100644
--- a/test/prism/reflection_test.rb
+++ b/test/prism/ruby/reflection_test.rb
@@ -1,6 +1,6 @@
# frozen_string_literal: true
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class ReflectionTest < TestCase
diff --git a/test/prism/ripper_test.rb b/test/prism/ruby/ripper_test.rb
index 07238fc3d5..8db47da3d3 100644
--- a/test/prism/ripper_test.rb
+++ b/test/prism/ruby/ripper_test.rb
@@ -2,13 +2,11 @@
return if RUBY_VERSION < "3.3"
-require_relative "test_helper"
+require_relative "../test_helper"
module Prism
class RipperTest < TestCase
- base = File.join(__dir__, "fixtures")
- relatives = ENV["FOCUS"] ? [ENV["FOCUS"]] : Dir["**/*.txt", base: base]
-
+ # Skip these tests that Ripper is reporting the wrong results for.
incorrect = [
# Ripper incorrectly attributes the block to the keyword.
"seattlerb/block_break.txt",
@@ -31,6 +29,7 @@ module Prism
"spanning_heredoc.txt"
]
+ # Skip these tests that we haven't implemented yet.
omitted = [
"dos_endings.txt",
"heredocs_with_ignored_newlines.txt",
@@ -50,30 +49,8 @@ module Prism
"whitequark/slash_newline_in_heredocs.txt"
]
- relatives.each do |relative|
- # Skip the tests that Ripper is reporting the wrong results for.
- next if incorrect.include?(relative)
-
- # Skip the tests we haven't implemented yet.
- next if omitted.include?(relative)
-
- filepath = File.join(__dir__, "fixtures", relative)
-
- define_method "test_ripper_#{relative}" do
- source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
-
- case relative
- when /break|next|redo|if|unless|rescue|control|keywords|retry/
- source = "-> do\nrescue\n#{source}\nend"
- end
-
- case source
- when /^ *yield/
- source = "def __invalid_yield__\n#{source}\nend"
- end
-
- assert_ripper(source)
- end
+ Fixture.each(except: incorrect | omitted) do |fixture|
+ define_method(fixture.test_name) { assert_ripper(fixture.read) }
end
private
diff --git a/test/prism/ruby/ruby_parser_test.rb b/test/prism/ruby/ruby_parser_test.rb
new file mode 100644
index 0000000000..a13daeeb84
--- /dev/null
+++ b/test/prism/ruby/ruby_parser_test.rb
@@ -0,0 +1,127 @@
+# frozen_string_literal: true
+
+return if RUBY_ENGINE == "jruby"
+
+require_relative "../test_helper"
+
+begin
+ require "ruby_parser"
+rescue LoadError
+ # In CRuby's CI, we're not going to test against the ruby_parser gem because
+ # we don't want to have to install it. So in this case we'll just skip this
+ # test.
+ return
+end
+
+# We want to also compare lines and files to make sure we're setting them
+# correctly.
+Sexp.prepend(
+ Module.new do
+ def ==(other)
+ super && line == other.line && file == other.file # && line_max == other.line_max
+ end
+ end
+)
+
+module Prism
+ class RubyParserTest < TestCase
+ todos = [
+ "newline_terminated.txt",
+ "regex_char_width.txt",
+ "seattlerb/bug169.txt",
+ "seattlerb/masgn_colon3.txt",
+ "seattlerb/messy_op_asgn_lineno.txt",
+ "seattlerb/op_asgn_primary_colon_const_command_call.txt",
+ "seattlerb/regexp_esc_C_slash.txt",
+ "seattlerb/str_lit_concat_bad_encodings.txt",
+ "unescaping.txt",
+ "unparser/corpus/literal/kwbegin.txt",
+ "unparser/corpus/literal/send.txt",
+ "whitequark/masgn_const.txt",
+ "whitequark/ruby_bug_12402.txt",
+ "whitequark/ruby_bug_14690.txt",
+ "whitequark/space_args_block.txt"
+ ]
+
+ # https://github.com/seattlerb/ruby_parser/issues/344
+ failures = [
+ "alias.txt",
+ "dos_endings.txt",
+ "heredocs_with_ignored_newlines.txt",
+ "method_calls.txt",
+ "methods.txt",
+ "multi_write.txt",
+ "not.txt",
+ "patterns.txt",
+ "regex.txt",
+ "seattlerb/and_multi.txt",
+ "seattlerb/heredoc__backslash_dos_format.txt",
+ "seattlerb/heredoc_bad_hex_escape.txt",
+ "seattlerb/heredoc_bad_oct_escape.txt",
+ "seattlerb/heredoc_with_extra_carriage_horrible_mix.txt",
+ "seattlerb/heredoc_with_extra_carriage_returns_windows.txt",
+ "seattlerb/heredoc_with_only_carriage_returns_windows.txt",
+ "seattlerb/heredoc_with_only_carriage_returns.txt",
+ "spanning_heredoc_newlines.txt",
+ "spanning_heredoc.txt",
+ "tilde_heredocs.txt",
+ "unparser/corpus/literal/literal.txt",
+ "while.txt",
+ "whitequark/cond_eflipflop.txt",
+ "whitequark/cond_iflipflop.txt",
+ "whitequark/cond_match_current_line.txt",
+ "whitequark/dedenting_heredoc.txt",
+ "whitequark/lvar_injecting_match.txt",
+ "whitequark/not.txt",
+ "whitequark/numparam_ruby_bug_19025.txt",
+ "whitequark/op_asgn_cmd.txt",
+ "whitequark/parser_bug_640.txt",
+ "whitequark/parser_slash_slash_n_escaping_in_literals.txt",
+ "whitequark/pattern_matching_single_line_allowed_omission_of_parentheses.txt",
+ "whitequark/pattern_matching_single_line.txt",
+ "whitequark/ruby_bug_11989.txt",
+ "whitequark/slash_newline_in_heredocs.txt"
+ ]
+
+ Fixture.each(except: failures) do |fixture|
+ define_method(fixture.test_name) do
+ assert_ruby_parser(fixture, todos.include?(fixture.path))
+ end
+ end
+
+ private
+
+ def assert_ruby_parser(fixture, allowed_failure)
+ source = fixture.read
+ expected = ignore_warnings { ::RubyParser.new.parse(source, fixture.path) }
+ actual = Prism::Translation::RubyParser.new.parse(source, fixture.path)
+
+ if !allowed_failure
+ assert_equal(expected, actual, -> { message(expected, actual) })
+ elsif expected == actual
+ puts "#{name} now passes"
+ end
+ end
+
+ def message(expected, actual)
+ if expected == actual
+ nil
+ elsif expected.is_a?(Sexp) && actual.is_a?(Sexp)
+ if expected.line != actual.line
+ "expected: (#{expected.inspect} line=#{expected.line}), actual: (#{actual.inspect} line=#{actual.line})"
+ elsif expected.file != actual.file
+ "expected: (#{expected.inspect} file=#{expected.file}), actual: (#{actual.inspect} file=#{actual.file})"
+ elsif expected.length != actual.length
+ "expected: (#{expected.inspect} length=#{expected.length}), actual: (#{actual.inspect} length=#{actual.length})"
+ else
+ expected.zip(actual).find do |expected_field, actual_field|
+ result = message(expected_field, actual_field)
+ break result if result
+ end
+ end
+ else
+ "expected: #{expected.inspect}, actual: #{actual.inspect}"
+ end
+ end
+ end
+end
diff --git a/test/prism/ruby/tunnel_test.rb b/test/prism/ruby/tunnel_test.rb
new file mode 100644
index 0000000000..0214681604
--- /dev/null
+++ b/test/prism/ruby/tunnel_test.rb
@@ -0,0 +1,26 @@
+# frozen_string_literal: true
+
+require_relative "../test_helper"
+
+module Prism
+ class TunnelTest < TestCase
+ def test_tunnel
+ program = Prism.parse("foo(1) +\n bar(2, 3) +\n baz(3, 4, 5)").value
+
+ tunnel = program.tunnel(1, 4).last
+ assert_kind_of IntegerNode, tunnel
+ assert_equal 1, tunnel.value
+
+ tunnel = program.tunnel(2, 6).last
+ assert_kind_of IntegerNode, tunnel
+ assert_equal 2, tunnel.value
+
+ tunnel = program.tunnel(3, 9).last
+ assert_kind_of IntegerNode, tunnel
+ assert_equal 4, tunnel.value
+
+ tunnel = program.tunnel(3, 8)
+ assert_equal [ProgramNode, StatementsNode, CallNode, ArgumentsNode, CallNode, ArgumentsNode], tunnel.map(&:class)
+ end
+ end
+end
diff --git a/test/prism/ruby_api_test.rb b/test/prism/ruby_api_test.rb
deleted file mode 100644
index a1e2592d3d..0000000000
--- a/test/prism/ruby_api_test.rb
+++ /dev/null
@@ -1,307 +0,0 @@
-# frozen_string_literal: true
-
-require_relative "test_helper"
-
-module Prism
- class RubyAPITest < TestCase
- if !ENV["PRISM_BUILD_MINIMAL"]
- def test_ruby_api
- filepath = __FILE__
- source = File.read(filepath, binmode: true, external_encoding: Encoding::UTF_8)
-
- assert_equal Prism.lex(source, filepath: filepath).value, Prism.lex_file(filepath).value
- assert_equal Prism.dump(source, filepath: filepath), Prism.dump_file(filepath)
-
- serialized = Prism.dump(source, filepath: filepath)
- ast1 = Prism.load(source, serialized).value
- ast2 = Prism.parse(source, filepath: filepath).value
- ast3 = Prism.parse_file(filepath).value
-
- assert_equal_nodes ast1, ast2
- assert_equal_nodes ast2, ast3
- end
- end
-
- def test_parse_success?
- assert Prism.parse_success?("1")
- refute Prism.parse_success?("<>")
- end
-
- def test_parse_file_success?
- assert Prism.parse_file_success?(__FILE__)
- end
-
- def test_options
- assert_equal "", Prism.parse("__FILE__").value.statements.body[0].filepath
- assert_equal "foo.rb", Prism.parse("__FILE__", filepath: "foo.rb").value.statements.body[0].filepath
-
- assert_equal 1, Prism.parse("foo").value.statements.body[0].location.start_line
- assert_equal 10, Prism.parse("foo", line: 10).value.statements.body[0].location.start_line
-
- refute Prism.parse("\"foo\"").value.statements.body[0].frozen?
- assert Prism.parse("\"foo\"", frozen_string_literal: true).value.statements.body[0].frozen?
- refute Prism.parse("\"foo\"", frozen_string_literal: false).value.statements.body[0].frozen?
-
- assert_kind_of Prism::CallNode, Prism.parse("foo").value.statements.body[0]
- assert_kind_of Prism::LocalVariableReadNode, Prism.parse("foo", scopes: [[:foo]]).value.statements.body[0]
- assert_equal 1, Prism.parse("foo", scopes: [[:foo], []]).value.statements.body[0].depth
-
- assert_equal [:foo], Prism.parse("foo", scopes: [[:foo]]).value.locals
- end
-
- def test_literal_value_method
- assert_equal 123, parse_expression("123").value
- assert_equal 3.14, parse_expression("3.14").value
- assert_equal 42i, parse_expression("42i").value
- assert_equal 42.1ri, parse_expression("42.1ri").value
- assert_equal 3.14i, parse_expression("3.14i").value
- assert_equal 42r, parse_expression("42r").value
- assert_equal 0.5r, parse_expression("0.5r").value
- assert_equal 42ri, parse_expression("42ri").value
- assert_equal 0.5ri, parse_expression("0.5ri").value
- assert_equal 0xFFr, parse_expression("0xFFr").value
- assert_equal 0xFFri, parse_expression("0xFFri").value
- end
-
- def test_location_join
- recv, args_node, _ = parse_expression("1234 + 567").child_nodes
- arg = args_node.arguments[0]
-
- joined = recv.location.join(arg.location)
- assert_equal 0, joined.start_offset
- assert_equal 10, joined.length
-
- assert_raise RuntimeError, "Incompatible locations" do
- arg.location.join(recv.location)
- end
-
- other_arg = parse_expression("1234 + 567").arguments.arguments[0]
-
- assert_raise RuntimeError, "Incompatible sources" do
- other_arg.location.join(recv.location)
- end
-
- assert_raise RuntimeError, "Incompatible sources" do
- recv.location.join(other_arg.location)
- end
- end
-
- def test_location_character_offsets
- program = Prism.parse("πŸ˜€ + πŸ˜€\n😍 ||= 😍").value
-
- # first πŸ˜€
- location = program.statements.body.first.receiver.location
- assert_equal 0, location.start_character_offset
- assert_equal 1, location.end_character_offset
- assert_equal 0, location.start_character_column
- assert_equal 1, location.end_character_column
-
- # second πŸ˜€
- location = program.statements.body.first.arguments.arguments.first.location
- assert_equal 4, location.start_character_offset
- assert_equal 5, location.end_character_offset
- assert_equal 4, location.start_character_column
- assert_equal 5, location.end_character_column
-
- # first 😍
- location = program.statements.body.last.name_loc
- assert_equal 6, location.start_character_offset
- assert_equal 7, location.end_character_offset
- assert_equal 0, location.start_character_column
- assert_equal 1, location.end_character_column
-
- # second 😍
- location = program.statements.body.last.value.location
- assert_equal 12, location.start_character_offset
- assert_equal 13, location.end_character_offset
- assert_equal 6, location.start_character_column
- assert_equal 7, location.end_character_column
- end
-
- def test_location_code_units
- program = Prism.parse("πŸ˜€ + πŸ˜€\n😍 ||= 😍").value
-
- # first πŸ˜€
- location = program.statements.body.first.receiver.location
-
- assert_equal 0, location.start_code_units_offset(Encoding::UTF_8)
- assert_equal 0, location.start_code_units_offset(Encoding::UTF_16LE)
- assert_equal 0, location.start_code_units_offset(Encoding::UTF_32LE)
-
- assert_equal 1, location.end_code_units_offset(Encoding::UTF_8)
- assert_equal 2, location.end_code_units_offset(Encoding::UTF_16LE)
- assert_equal 1, location.end_code_units_offset(Encoding::UTF_32LE)
-
- assert_equal 0, location.start_code_units_column(Encoding::UTF_8)
- assert_equal 0, location.start_code_units_column(Encoding::UTF_16LE)
- assert_equal 0, location.start_code_units_column(Encoding::UTF_32LE)
-
- assert_equal 1, location.end_code_units_column(Encoding::UTF_8)
- assert_equal 2, location.end_code_units_column(Encoding::UTF_16LE)
- assert_equal 1, location.end_code_units_column(Encoding::UTF_32LE)
-
- # second πŸ˜€
- location = program.statements.body.first.arguments.arguments.first.location
-
- assert_equal 4, location.start_code_units_offset(Encoding::UTF_8)
- assert_equal 5, location.start_code_units_offset(Encoding::UTF_16LE)
- assert_equal 4, location.start_code_units_offset(Encoding::UTF_32LE)
-
- assert_equal 5, location.end_code_units_offset(Encoding::UTF_8)
- assert_equal 7, location.end_code_units_offset(Encoding::UTF_16LE)
- assert_equal 5, location.end_code_units_offset(Encoding::UTF_32LE)
-
- assert_equal 4, location.start_code_units_column(Encoding::UTF_8)
- assert_equal 5, location.start_code_units_column(Encoding::UTF_16LE)
- assert_equal 4, location.start_code_units_column(Encoding::UTF_32LE)
-
- assert_equal 5, location.end_code_units_column(Encoding::UTF_8)
- assert_equal 7, location.end_code_units_column(Encoding::UTF_16LE)
- assert_equal 5, location.end_code_units_column(Encoding::UTF_32LE)
-
- # first 😍
- location = program.statements.body.last.name_loc
-
- assert_equal 6, location.start_code_units_offset(Encoding::UTF_8)
- assert_equal 8, location.start_code_units_offset(Encoding::UTF_16LE)
- assert_equal 6, location.start_code_units_offset(Encoding::UTF_32LE)
-
- assert_equal 7, location.end_code_units_offset(Encoding::UTF_8)
- assert_equal 10, location.end_code_units_offset(Encoding::UTF_16LE)
- assert_equal 7, location.end_code_units_offset(Encoding::UTF_32LE)
-
- assert_equal 0, location.start_code_units_column(Encoding::UTF_8)
- assert_equal 0, location.start_code_units_column(Encoding::UTF_16LE)
- assert_equal 0, location.start_code_units_column(Encoding::UTF_32LE)
-
- assert_equal 1, location.end_code_units_column(Encoding::UTF_8)
- assert_equal 2, location.end_code_units_column(Encoding::UTF_16LE)
- assert_equal 1, location.end_code_units_column(Encoding::UTF_32LE)
-
- # second 😍
- location = program.statements.body.last.value.location
-
- assert_equal 12, location.start_code_units_offset(Encoding::UTF_8)
- assert_equal 15, location.start_code_units_offset(Encoding::UTF_16LE)
- assert_equal 12, location.start_code_units_offset(Encoding::UTF_32LE)
-
- assert_equal 13, location.end_code_units_offset(Encoding::UTF_8)
- assert_equal 17, location.end_code_units_offset(Encoding::UTF_16LE)
- assert_equal 13, location.end_code_units_offset(Encoding::UTF_32LE)
-
- assert_equal 6, location.start_code_units_column(Encoding::UTF_8)
- assert_equal 7, location.start_code_units_column(Encoding::UTF_16LE)
- assert_equal 6, location.start_code_units_column(Encoding::UTF_32LE)
-
- assert_equal 7, location.end_code_units_column(Encoding::UTF_8)
- assert_equal 9, location.end_code_units_column(Encoding::UTF_16LE)
- assert_equal 7, location.end_code_units_column(Encoding::UTF_32LE)
- end
-
- def test_location_chop
- location = Prism.parse("foo").value.location
-
- assert_equal "fo", location.chop.slice
- assert_equal "", location.chop.chop.chop.slice
-
- # Check that we don't go negative.
- 10.times { location = location.chop }
- assert_equal "", location.slice
- end
-
- def test_location_slice_lines
- result = Prism.parse("\nprivate def foo\nend\n")
- method = result.value.statements.body.first.arguments.arguments.first
-
- assert_equal "private def foo\nend\n", method.slice_lines
- end
-
- def test_heredoc?
- refute parse_expression("\"foo\"").heredoc?
- refute parse_expression("\"foo \#{1}\"").heredoc?
- refute parse_expression("`foo`").heredoc?
- refute parse_expression("`foo \#{1}`").heredoc?
-
- assert parse_expression("<<~HERE\nfoo\nHERE\n").heredoc?
- assert parse_expression("<<~HERE\nfoo \#{1}\nHERE\n").heredoc?
- assert parse_expression("<<~`HERE`\nfoo\nHERE\n").heredoc?
- assert parse_expression("<<~`HERE`\nfoo \#{1}\nHERE\n").heredoc?
- end
-
- # Through some bit hackery, we want to allow consumers to use the integer
- # base flags as the base itself. It has a nice property that the current
- # alignment provides them in the correct order. So here we test that our
- # assumption holds so that it doesn't change out from under us.
- #
- # In C, this would look something like:
- #
- # ((flags & ~DECIMAL) << 1) || 10
- #
- # We have to do some other work in Ruby because 0 is truthy and ~ on an
- # integer doesn't have a fixed width.
- def test_integer_base_flags
- base = -> (node) do
- value = (node.send(:flags) & (0b1111 - IntegerBaseFlags::DECIMAL)) << 1
- value == 0 ? 10 : value
- end
-
- assert_equal 2, base[parse_expression("0b1")]
- assert_equal 8, base[parse_expression("0o1")]
- assert_equal 10, base[parse_expression("0d1")]
- assert_equal 16, base[parse_expression("0x1")]
- end
-
- def test_node_equality
- assert_operator parse_expression("1"), :===, parse_expression("1")
- assert_operator Prism.parse("1").value, :===, Prism.parse("1").value
-
- complex_source = "class Something; @var = something.else { _1 }; end"
- assert_operator parse_expression(complex_source), :===, parse_expression(complex_source)
-
- refute_operator parse_expression("1"), :===, parse_expression("2")
- refute_operator parse_expression("1"), :===, parse_expression("0x1")
-
- complex_source_1 = "class Something; @var = something.else { _1 }; end"
- complex_source_2 = "class Something; @var = something.else { _2 }; end"
- refute_operator parse_expression(complex_source_1), :===, parse_expression(complex_source_2)
- end
-
- def test_node_tunnel
- program = Prism.parse("foo(1) +\n bar(2, 3) +\n baz(3, 4, 5)").value
-
- tunnel = program.tunnel(1, 4).last
- assert_kind_of IntegerNode, tunnel
- assert_equal 1, tunnel.value
-
- tunnel = program.tunnel(2, 6).last
- assert_kind_of IntegerNode, tunnel
- assert_equal 2, tunnel.value
-
- tunnel = program.tunnel(3, 9).last
- assert_kind_of IntegerNode, tunnel
- assert_equal 4, tunnel.value
-
- tunnel = program.tunnel(3, 8)
- assert_equal [ProgramNode, StatementsNode, CallNode, ArgumentsNode, CallNode, ArgumentsNode], tunnel.map(&:class)
- end
-
- def test_location_adjoin
- program = Prism.parse("foo.bar = 1").value
-
- location = program.statements.body.first.message_loc
- adjoined = location.adjoin("=")
-
- assert_kind_of Location, adjoined
- refute_equal location, adjoined
-
- assert_equal 4, adjoined.start_offset
- assert_equal 9, adjoined.end_offset
- end
-
- private
-
- def parse_expression(source)
- Prism.parse(source).value.statements.body.first
- end
- end
-end
diff --git a/test/prism/ruby_parser_test.rb b/test/prism/ruby_parser_test.rb
deleted file mode 100644
index 0fd96d42b5..0000000000
--- a/test/prism/ruby_parser_test.rb
+++ /dev/null
@@ -1,135 +0,0 @@
-# frozen_string_literal: true
-
-return if RUBY_ENGINE == "jruby"
-
-require_relative "test_helper"
-
-begin
- require "ruby_parser"
-rescue LoadError
- # In CRuby's CI, we're not going to test against the ruby_parser gem because
- # we don't want to have to install it. So in this case we'll just skip this
- # test.
- return
-end
-
-# We want to also compare lines and files to make sure we're setting them
-# correctly.
-Sexp.prepend(
- Module.new do
- def ==(other)
- super && line == other.line && line_max == other.line_max && file == other.file
- end
- end
-)
-
-module Prism
- class RubyParserTest < TestCase
- base = File.join(__dir__, "fixtures")
-
- todos = %w[
- newline_terminated.txt
- regex_char_width.txt
- seattlerb/bug169.txt
- seattlerb/masgn_colon3.txt
- seattlerb/messy_op_asgn_lineno.txt
- seattlerb/op_asgn_primary_colon_const_command_call.txt
- seattlerb/regexp_esc_C_slash.txt
- seattlerb/str_lit_concat_bad_encodings.txt
- unescaping.txt
- unparser/corpus/literal/kwbegin.txt
- unparser/corpus/literal/send.txt
- whitequark/masgn_const.txt
- whitequark/ruby_bug_12402.txt
- whitequark/ruby_bug_14690.txt
- whitequark/space_args_block.txt
- ]
-
- # https://github.com/seattlerb/ruby_parser/issues/344
- failures = %w[
- alias.txt
- dos_endings.txt
- heredocs_with_ignored_newlines.txt
- method_calls.txt
- methods.txt
- multi_write.txt
- not.txt
- patterns.txt
- regex.txt
- seattlerb/and_multi.txt
- seattlerb/heredoc__backslash_dos_format.txt
- seattlerb/heredoc_bad_hex_escape.txt
- seattlerb/heredoc_bad_oct_escape.txt
- seattlerb/heredoc_with_extra_carriage_horrible_mix.txt
- seattlerb/heredoc_with_extra_carriage_returns_windows.txt
- seattlerb/heredoc_with_only_carriage_returns_windows.txt
- seattlerb/heredoc_with_only_carriage_returns.txt
- spanning_heredoc_newlines.txt
- spanning_heredoc.txt
- tilde_heredocs.txt
- unparser/corpus/literal/literal.txt
- while.txt
- whitequark/cond_eflipflop.txt
- whitequark/cond_iflipflop.txt
- whitequark/cond_match_current_line.txt
- whitequark/dedenting_heredoc.txt
- whitequark/lvar_injecting_match.txt
- whitequark/not.txt
- whitequark/numparam_ruby_bug_19025.txt
- whitequark/op_asgn_cmd.txt
- whitequark/parser_bug_640.txt
- whitequark/parser_slash_slash_n_escaping_in_literals.txt
- whitequark/pattern_matching_single_line_allowed_omission_of_parentheses.txt
- whitequark/pattern_matching_single_line.txt
- whitequark/ruby_bug_11989.txt
- whitequark/slash_newline_in_heredocs.txt
- ]
-
- Dir["**/*.txt", base: base].each do |name|
- next if failures.include?(name)
-
- define_method("test_#{name}") do
- begin
- # Parsing with ruby parser tends to be noisy with warnings, so we're
- # turning those off.
- previous_verbose, $VERBOSE = $VERBOSE, nil
- assert_parse_file(base, name, todos.include?(name))
- ensure
- $VERBOSE = previous_verbose
- end
- end
- end
-
- private
-
- def assert_parse_file(base, name, allowed_failure)
- filepath = File.join(base, name)
- expected = ::RubyParser.new.parse(File.read(filepath), filepath)
- actual = Prism::Translation::RubyParser.parse_file(filepath)
-
- if !allowed_failure
- assert_equal_nodes expected, actual
- elsif expected == actual
- puts "#{name} now passes"
- end
- end
-
- def assert_equal_nodes(left, right)
- return if left == right
-
- if left.is_a?(Sexp) && right.is_a?(Sexp)
- if left.line != right.line
- assert_equal "(#{left.inspect} line=#{left.line})", "(#{right.inspect} line=#{right.line})"
- elsif left.file != right.file
- assert_equal "(#{left.inspect} file=#{left.file})", "(#{right.inspect} file=#{right.file})"
- elsif left.length != right.length
- assert_equal "(#{left.inspect} length=#{left.length})", "(#{right.inspect} length=#{right.length})"
- else
- left.zip(right).each { |l, r| assert_equal_nodes(l, r) }
- end
- else
- assert_equal left, right
- end
- end
- end
-end
diff --git a/test/prism/snapshots_test.rb b/test/prism/snapshots_test.rb
new file mode 100644
index 0000000000..0744eafad3
--- /dev/null
+++ b/test/prism/snapshots_test.rb
@@ -0,0 +1,73 @@
+# frozen_string_literal: true
+
+require_relative "test_helper"
+
+module Prism
+ class SnapshotsTest < TestCase
+ # When we pretty-print the trees to compare against the snapshots, we want
+ # to be certain that we print with the same external encoding. This is
+ # because methods like Symbol#inspect take into account external encoding
+ # and it could change how the snapshot is generated. On machines with
+ # certain settings (like LANG=C or -Eascii-8bit) this could have been
+ # changed. So here we're going to force it to be UTF-8 to keep the snapshots
+ # consistent.
+ def setup
+ @previous_default_external = Encoding.default_external
+ ignore_warnings { Encoding.default_external = Encoding::UTF_8 }
+ end
+
+ def teardown
+ ignore_warnings { Encoding.default_external = @previous_default_external }
+ end
+
+ except = []
+
+ # These fail on TruffleRuby due to a difference in Symbol#inspect:
+ # :ζ΅‹θ―• vs :"ζ΅‹θ―•"
+ if RUBY_ENGINE == "truffleruby"
+ except.push(
+ "emoji_method_calls.txt",
+ "seattlerb/bug202.txt",
+ "seattlerb/magic_encoding_comment.txt"
+ )
+ end
+
+ Fixture.each(except: except) do |fixture|
+ define_method(fixture.test_name) { assert_snapshot(fixture) }
+ end
+
+ private
+
+ def assert_snapshot(fixture)
+ source = fixture.read
+
+ result = Prism.parse(source, filepath: fixture.path)
+ assert result.success?
+
+ printed = PP.pp(result.value, +"", 79)
+ snapshot = fixture.snapshot_path
+
+ if File.exist?(snapshot)
+ saved = File.read(snapshot)
+
+ # If the snapshot file exists, but the printed value does not match the
+ # snapshot, then update the snapshot file.
+ if printed != saved
+ File.write(snapshot, printed)
+ warn("Updated snapshot at #{snapshot}.")
+ end
+
+ # If the snapshot file exists, then assert that the printed value
+ # matches the snapshot.
+ assert_equal(saved, printed)
+ else
+ # If the snapshot file does not yet exist, then write it out now.
+ directory = File.dirname(snapshot)
+ FileUtils.mkdir_p(directory) unless File.directory?(directory)
+
+ File.write(snapshot, printed)
+ warn("Created snapshot at #{snapshot}.")
+ end
+ end
+ end
+end
diff --git a/test/prism/snippets_test.rb b/test/prism/snippets_test.rb
new file mode 100644
index 0000000000..26847da184
--- /dev/null
+++ b/test/prism/snippets_test.rb
@@ -0,0 +1,42 @@
+# frozen_string_literal: true
+
+require_relative "test_helper"
+
+module Prism
+ class SnippetsTest < TestCase
+ except = [
+ "newline_terminated.txt",
+ "seattlerb/begin_rescue_else_ensure_no_bodies.txt",
+ "seattlerb/case_in.txt",
+ "seattlerb/parse_line_defn_no_parens.txt",
+ "seattlerb/pct_nl.txt",
+ "seattlerb/str_heredoc_interp.txt",
+ "spanning_heredoc_newlines.txt",
+ "unparser/corpus/semantic/dstr.txt",
+ "whitequark/dedenting_heredoc.txt",
+ "whitequark/multiple_pattern_matches.txt"
+ ]
+
+ Fixture.each(except: except) do |fixture|
+ define_method(fixture.test_name) { assert_snippets(fixture) }
+ end
+
+ private
+
+ # We test every snippet (separated by \n\n) in isolation to ensure the
+ # parser does not try to read bytes further than the end of each snippet.
+ def assert_snippets(fixture)
+ fixture.read.split(/(?<=\S)\n\n(?=\S)/).each do |snippet|
+ snippet = snippet.rstrip
+
+ result = Prism.parse(snippet, filepath: fixture.path)
+ assert result.success?
+
+ if !ENV["PRISM_BUILD_MINIMAL"]
+ dumped = Prism.dump(snippet, filepath: fixture.path)
+ assert_equal_nodes(result.value, Prism.load(snippet, dumped).value)
+ end
+ end
+ end
+ end
+end
diff --git a/test/prism/test_helper.rb b/test/prism/test_helper.rb
index 77af7e7b45..d6d0abf548 100644
--- a/test/prism/test_helper.rb
+++ b/test/prism/test_helper.rb
@@ -1,8 +1,9 @@
# frozen_string_literal: true
require "prism"
-require "ripper"
require "pp"
+require "ripper"
+require "stringio"
require "test/unit"
require "tempfile"
@@ -16,19 +17,202 @@ if defined?(Test::Unit::Assertions::AssertionMessage)
end
module Prism
+ # A convenience method for retrieving the first statement in the source string
+ # parsed by Prism.
+ def self.parse_statement(source, **options)
+ parse(source, **options).value.statements.body.first
+ end
+
+ class ParseResult < Result
+ # Returns the first statement in the body of the parsed source.
+ def statement
+ value.statements.body.first
+ end
+ end
+
class TestCase < ::Test::Unit::TestCase
+ # We have a set of fixtures that we use to test various aspects of the
+ # parser. They are all represented as .txt files under the
+ # test/prism/fixtures directory. Typically in test files you will find calls
+ # to Fixture.each which yields Fixture objects to the given block. These
+ # are used to define test methods that assert against each fixture in some
+ # way.
+ class Fixture
+ BASE = File.join(__dir__, "fixtures")
+
+ attr_reader :path
+
+ def initialize(path)
+ @path = path
+ end
+
+ def read
+ File.read(full_path, binmode: true, external_encoding: Encoding::UTF_8)
+ end
+
+ def full_path
+ File.join(BASE, path)
+ end
+
+ def snapshot_path
+ File.join(__dir__, "snapshots", path)
+ end
+
+ def test_name
+ :"test_#{path}"
+ end
+
+ def self.each(except: [], &block)
+ paths = Dir[ENV.fetch("FOCUS") { File.join("**", "*.txt") }, base: BASE] - except
+ paths.each { |path| yield Fixture.new(path) }
+ end
+ end
+
+ # Yield each encoding that we want to test, along with a range of the
+ # codepoints that should be tested.
+ def self.each_encoding
+ codepoints_1byte = 0...0x100
+
+ yield Encoding::ASCII_8BIT, codepoints_1byte
+ yield Encoding::US_ASCII, codepoints_1byte
+
+ if !ENV["PRISM_BUILD_MINIMAL"]
+ yield Encoding::Windows_1253, codepoints_1byte
+ end
+
+ # By default we don't test every codepoint in these encodings because it
+ # takes a very long time.
+ return unless ENV["PRISM_TEST_ALL_ENCODINGS"]
+
+ yield Encoding::CP850, codepoints_1byte
+ yield Encoding::CP852, codepoints_1byte
+ yield Encoding::CP855, codepoints_1byte
+ yield Encoding::GB1988, codepoints_1byte
+ yield Encoding::IBM437, codepoints_1byte
+ yield Encoding::IBM720, codepoints_1byte
+ yield Encoding::IBM737, codepoints_1byte
+ yield Encoding::IBM775, codepoints_1byte
+ yield Encoding::IBM852, codepoints_1byte
+ yield Encoding::IBM855, codepoints_1byte
+ yield Encoding::IBM857, codepoints_1byte
+ yield Encoding::IBM860, codepoints_1byte
+ yield Encoding::IBM861, codepoints_1byte
+ yield Encoding::IBM862, codepoints_1byte
+ yield Encoding::IBM863, codepoints_1byte
+ yield Encoding::IBM864, codepoints_1byte
+ yield Encoding::IBM865, codepoints_1byte
+ yield Encoding::IBM866, codepoints_1byte
+ yield Encoding::IBM869, codepoints_1byte
+ yield Encoding::ISO_8859_1, codepoints_1byte
+ yield Encoding::ISO_8859_2, codepoints_1byte
+ yield Encoding::ISO_8859_3, codepoints_1byte
+ yield Encoding::ISO_8859_4, codepoints_1byte
+ yield Encoding::ISO_8859_5, codepoints_1byte
+ yield Encoding::ISO_8859_6, codepoints_1byte
+ yield Encoding::ISO_8859_7, codepoints_1byte
+ yield Encoding::ISO_8859_8, codepoints_1byte
+ yield Encoding::ISO_8859_9, codepoints_1byte
+ yield Encoding::ISO_8859_10, codepoints_1byte
+ yield Encoding::ISO_8859_11, codepoints_1byte
+ yield Encoding::ISO_8859_13, codepoints_1byte
+ yield Encoding::ISO_8859_14, codepoints_1byte
+ yield Encoding::ISO_8859_15, codepoints_1byte
+ yield Encoding::ISO_8859_16, codepoints_1byte
+ yield Encoding::KOI8_R, codepoints_1byte
+ yield Encoding::KOI8_U, codepoints_1byte
+ yield Encoding::MACCENTEURO, codepoints_1byte
+ yield Encoding::MACCROATIAN, codepoints_1byte
+ yield Encoding::MACCYRILLIC, codepoints_1byte
+ yield Encoding::MACGREEK, codepoints_1byte
+ yield Encoding::MACICELAND, codepoints_1byte
+ yield Encoding::MACROMAN, codepoints_1byte
+ yield Encoding::MACROMANIA, codepoints_1byte
+ yield Encoding::MACTHAI, codepoints_1byte
+ yield Encoding::MACTURKISH, codepoints_1byte
+ yield Encoding::MACUKRAINE, codepoints_1byte
+ yield Encoding::TIS_620, codepoints_1byte
+ yield Encoding::Windows_1250, codepoints_1byte
+ yield Encoding::Windows_1251, codepoints_1byte
+ yield Encoding::Windows_1252, codepoints_1byte
+ yield Encoding::Windows_1254, codepoints_1byte
+ yield Encoding::Windows_1255, codepoints_1byte
+ yield Encoding::Windows_1256, codepoints_1byte
+ yield Encoding::Windows_1257, codepoints_1byte
+ yield Encoding::Windows_1258, codepoints_1byte
+ yield Encoding::Windows_874, codepoints_1byte
+
+ codepoints_2bytes = 0...0x10000
+
+ yield Encoding::Big5, codepoints_2bytes
+ yield Encoding::Big5_HKSCS, codepoints_2bytes
+ yield Encoding::Big5_UAO, codepoints_2bytes
+ yield Encoding::CP949, codepoints_2bytes
+ yield Encoding::CP950, codepoints_2bytes
+ yield Encoding::CP951, codepoints_2bytes
+ yield Encoding::EUC_KR, codepoints_2bytes
+ yield Encoding::GBK, codepoints_2bytes
+ yield Encoding::GB12345, codepoints_2bytes
+ yield Encoding::GB2312, codepoints_2bytes
+ yield Encoding::MACJAPANESE, codepoints_2bytes
+ yield Encoding::Shift_JIS, codepoints_2bytes
+ yield Encoding::SJIS_DoCoMo, codepoints_2bytes
+ yield Encoding::SJIS_KDDI, codepoints_2bytes
+ yield Encoding::SJIS_SoftBank, codepoints_2bytes
+ yield Encoding::Windows_31J, codepoints_2bytes
+
+ codepoints_unicode = (0...0x110000)
+
+ yield Encoding::UTF_8, codepoints_unicode
+ yield Encoding::UTF8_MAC, codepoints_unicode
+ yield Encoding::UTF8_DoCoMo, codepoints_unicode
+ yield Encoding::UTF8_KDDI, codepoints_unicode
+ yield Encoding::UTF8_SoftBank, codepoints_unicode
+ yield Encoding::CESU_8, codepoints_unicode
+
+ codepoints_eucjp = [
+ *(0...0x10000),
+ *(0...0x10000).map { |bytes| bytes | 0x8F0000 }
+ ]
+
+ yield Encoding::CP51932, codepoints_eucjp
+ yield Encoding::EUC_JP, codepoints_eucjp
+ yield Encoding::EUCJP_MS, codepoints_eucjp
+ yield Encoding::EUC_JIS_2004, codepoints_eucjp
+
+ codepoints_emacs_mule = [
+ *(0...0x80),
+ *((0x81...0x90).flat_map { |byte1| (0x90...0x100).map { |byte2| byte1 << 8 | byte2 } }),
+ *((0x90...0x9C).flat_map { |byte1| (0xA0...0x100).flat_map { |byte2| (0xA0...0x100).flat_map { |byte3| byte1 << 16 | byte2 << 8 | byte3 } } }),
+ *((0xF0...0xF5).flat_map { |byte2| (0xA0...0x100).flat_map { |byte3| (0xA0...0x100).flat_map { |byte4| 0x9C << 24 | byte3 << 16 | byte3 << 8 | byte4 } } }),
+ ]
+
+ yield Encoding::EMACS_MULE, codepoints_emacs_mule
+ yield Encoding::STATELESS_ISO_2022_JP, codepoints_emacs_mule
+ yield Encoding::STATELESS_ISO_2022_JP_KDDI, codepoints_emacs_mule
+
+ codepoints_gb18030 = [
+ *(0...0x80),
+ *((0x81..0xFE).flat_map { |byte1| (0x40...0x100).map { |byte2| byte1 << 8 | byte2 } }),
+ *((0x81..0xFE).flat_map { |byte1| (0x30...0x40).flat_map { |byte2| (0x81..0xFE).flat_map { |byte3| (0x2F...0x41).map { |byte4| byte1 << 24 | byte2 << 16 | byte3 << 8 | byte4 } } } }),
+ ]
+
+ yield Encoding::GB18030, codepoints_gb18030
+
+ codepoints_euc_tw = [
+ *(0..0x7F),
+ *(0xA1..0xFF).flat_map { |byte1| (0xA1..0xFF).map { |byte2| (byte1 << 8) | byte2 } },
+ *(0xA1..0xB0).flat_map { |byte2| (0xA1..0xFF).flat_map { |byte3| (0xA1..0xFF).flat_map { |byte4| 0x8E << 24 | byte2 << 16 | byte3 << 8 | byte4 } } }
+ ]
+
+ yield Encoding::EUC_TW, codepoints_euc_tw
+ end
+
private
if RUBY_ENGINE == "ruby"
# Check that the given source is valid syntax by compiling it with RubyVM.
def check_syntax(source)
- $VERBOSE, previous = nil, $VERBOSE
-
- begin
- RubyVM::InstructionSequence.compile(source)
- ensure
- $VERBOSE = previous
- end
+ ignore_warnings { RubyVM::InstructionSequence.compile(source) }
end
# Assert that the given source is valid Ruby syntax by attempting to
@@ -51,6 +235,8 @@ module Prism
end
end
+ # CRuby has this same method, so define it so that we don't accidentally
+ # break CRuby CI.
def assert_raises(*args, &block)
raise "Use assert_raise instead"
end
@@ -122,5 +308,16 @@ module Prism
assert_equal expected, actual
end
end
+
+ def ignore_warnings
+ previous = $VERBOSE
+ $VERBOSE = nil
+
+ begin
+ yield
+ ensure
+ $VERBOSE = previous
+ end
+ end
end
end
diff --git a/test/prism/unescape_test.rb b/test/prism/unescape_test.rb
index 3f78a59b11..35e1952cb2 100644
--- a/test/prism/unescape_test.rb
+++ b/test/prism/unescape_test.rb
@@ -2,7 +2,7 @@
require_relative "test_helper"
-return if RUBY_VERSION < "3.1.0" || Prism::BACKEND == :FFI
+return if RUBY_VERSION < "3.1.0"
module Prism
class UnescapeTest < TestCase
@@ -41,7 +41,7 @@ module Prism
result = Prism.parse(code(escape), encoding: "binary")
if result.success?
- yield result.value.statements.body.first
+ yield result.statement
else
:error
end