summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoah Gibbs <[email protected]>2024-02-15 12:10:36 +0000
committergit <[email protected]>2024-02-15 20:26:31 +0000
commita5cee8fa79422a295ce200a92b901c4182995800 (patch)
tree394bcca583d2a868f54008b0f8b852f2b33c5b62
parentfa334ecd40d66fa1aa1a344afdcf776d5a8c541c (diff)
[ruby/prism] Handle more aliases. Better testing of prism ripper CLI and a test for it.
https://github.com/ruby/prism/commit/cfd4f28cb3
-rw-r--r--lib/prism/translation/ripper.rb169
-rw-r--r--test/prism/ripper_test.rb101
2 files changed, 228 insertions, 42 deletions
diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb
index e0b76abded..f5db4628bf 100644
--- a/lib/prism/translation/ripper.rb
+++ b/lib/prism/translation/ripper.rb
@@ -222,6 +222,44 @@ module Prism
on_break(on_args_add_block(args_val, false))
end
+ # Visit an AliasMethodNode.
+ def visit_alias_method_node(node)
+ # For both the old and new name, if there is a colon in the symbol
+ # name (e.g. 'alias :foo :bar') then we do *not* emit the [:symbol] wrapper around
+ # the lexer token (e.g. :@ident) inside [:symbol_literal]. But if there
+ # is no colon (e.g. 'alias foo bar') then we *do* still emit the [:symbol] wrapper.
+
+ if node.new_name.is_a?(SymbolNode) && !node.new_name.opening
+ new_name_val = visit_symbol_literal_node(node.new_name, no_symbol_wrapper: true)
+ else
+ new_name_val = visit(node.new_name)
+ end
+ if node.old_name.is_a?(SymbolNode) && !node.old_name.opening
+ old_name_val = visit_symbol_literal_node(node.old_name, no_symbol_wrapper: true)
+ else
+ old_name_val = visit(node.old_name)
+ end
+
+ on_alias(new_name_val, old_name_val)
+ end
+
+ # Visit an AliasGlobalVariableNode.
+ def visit_alias_global_variable_node(node)
+ on_var_alias(visit(node.new_name), visit(node.old_name))
+ end
+
+ # Visit a GlobalVariableReadNode.
+ def visit_global_variable_read_node(node)
+ bounds(node.location)
+ on_gvar(node.name.to_s)
+ end
+
+ # Visit a BackReferenceReadNode.
+ def visit_back_reference_read_node(node)
+ bounds(node.location)
+ on_backref(node.name.to_s)
+ end
+
# Visit an AndNode.
def visit_and_node(node)
visit_binary_operator(node)
@@ -326,23 +364,7 @@ module Prism
# Visit an InterpolatedStringNode node.
def visit_interpolated_string_node(node)
- parts = node.parts.map do |part|
- case part
- when StringNode
- bounds(part.content_loc)
- on_tstring_content(part.content)
- when EmbeddedStatementsNode
- on_string_embexpr(visit(part))
- else
- raise NotImplementedError, "Unexpected node type in InterpolatedStringNode"
- end
- end
-
- string_list = parts.inject(on_string_content) do |items, item|
- on_string_add(items, item)
- end
-
- on_string_literal(string_list)
+ on_string_literal(visit_enumerated_node(node))
end
# Visit an EmbeddedStatementsNode node.
@@ -352,15 +374,12 @@ module Prism
# Visit a SymbolNode node.
def visit_symbol_node(node)
- if (opening = node.opening) && (['"', "'"].include?(opening[-1]) || opening.start_with?("%s"))
- bounds(node.value_loc)
- tstring_val = on_tstring_content(node.value.to_s)
- return on_dyna_symbol(on_string_add(on_string_content, tstring_val))
- end
+ visit_symbol_literal_node(node)
+ end
- bounds(node.value_loc)
- ident_val = on_ident(node.value.to_s)
- on_symbol_literal(on_symbol(ident_val))
+ # Visit an InterpolatedSymbolNode node.
+ def visit_interpolated_symbol_node(node)
+ on_dyna_symbol(visit_enumerated_node(node))
end
# Visit a StatementsNode node.
@@ -459,6 +478,25 @@ module Prism
end
end
+ # Visit an InterpolatedStringNode or an InterpolatedSymbolNode node.
+ def visit_enumerated_node(node)
+ parts = node.parts.map do |part|
+ case part
+ when StringNode
+ bounds(part.content_loc)
+ on_tstring_content(part.content)
+ when EmbeddedStatementsNode
+ on_string_embexpr(visit(part))
+ else
+ raise NotImplementedError, "Unexpected node type in visit_enumerated_node"
+ end
+ end
+
+ parts.inject(on_string_content) do |items, item|
+ on_string_add(items, item)
+ end
+ end
+
# Visit an operation-and-assign node, such as +=.
def visit_binary_op_assign(node, operator: node.operator)
bounds(node.name_loc)
@@ -487,6 +525,87 @@ module Prism
on_assign(on_aref_field(visit(node.receiver), args_val), assign_val)
end
+ # In an alias statement Ripper will emit @kw instead of @ident if the object
+ # being aliased is a Ruby keyword. For instance, in the line "alias :foo :if",
+ # the :if is treated as a lexer keyword. So we need to know what symbols are
+ # also keywords.
+ RUBY_KEYWORDS = [
+ "alias",
+ "and",
+ "begin",
+ "BEGIN",
+ "break",
+ "case",
+ "class",
+ "def",
+ "defined?",
+ "do",
+ "else",
+ "elsif",
+ "end",
+ "END",
+ "ensure",
+ "false",
+ "for",
+ "if",
+ "in",
+ "module",
+ "next",
+ "nil",
+ "not",
+ "or",
+ "redo",
+ "rescue",
+ "retry",
+ "return",
+ "self",
+ "super",
+ "then",
+ "true",
+ "undef",
+ "unless",
+ "until",
+ "when",
+ "while",
+ "yield",
+ "__ENCODING__",
+ "__FILE__",
+ "__LINE__",
+ ]
+
+ # Ripper has several methods of emitting a symbol literal. Inside an alias
+ # sometimes it suppresses the [:symbol] wrapper around ident. If the symbol
+ # is also the name of a keyword (e.g. :if) it will emit a :@kw wrapper, not
+ # an :@ident wrapper, with similar treatment for constants and operators.
+ def visit_symbol_literal_node(node, no_symbol_wrapper: false)
+ if (opening = node.opening) && (['"', "'"].include?(opening[-1]) || opening.start_with?("%s"))
+ bounds(node.value_loc)
+ str_val = node.value.to_s
+ if str_val == ""
+ return on_dyna_symbol(on_string_content)
+ else
+ tstring_val = on_tstring_content(str_val)
+ return on_dyna_symbol(on_string_add(on_string_content, tstring_val))
+ end
+ end
+
+ bounds(node.value_loc)
+ node_name = node.value.to_s
+ if RUBY_KEYWORDS.include?(node_name)
+ token_val = on_kw(node_name)
+ elsif node_name.length == 0
+ raise NotImplementedError
+ elsif /[[:upper:]]/.match(node_name[0])
+ token_val = on_const(node_name)
+ elsif /[[:punct:]]/.match(node_name[0])
+ token_val = on_op(node_name)
+ else
+ token_val = on_ident(node_name)
+ end
+ sym_val = no_symbol_wrapper ? token_val : on_symbol(token_val)
+ on_symbol_literal(sym_val)
+ end
+
# Visit a node that represents a number. We need to explicitly handle the
# unary - operator.
def visit_number(node)
diff --git a/test/prism/ripper_test.rb b/test/prism/ripper_test.rb
index 7abb78c723..8a9af18a13 100644
--- a/test/prism/ripper_test.rb
+++ b/test/prism/ripper_test.rb
@@ -4,6 +4,50 @@ require_relative "test_helper"
module Prism
class RipperTest < TestCase
+ def truffleruby?
+ RUBY_ENGINE == "truffleruby"
+ end
+
+ def windows?
+ Gem.win_platform?
+ end
+
+ # Ripper produces certain ambiguous structures. For instance, it often
+ # adds an :args_add_block with "false" as the block meaning there is
+ # no block call. It can be hard to tell which of multiple equivalent
+ # structures it will produce. This method attempts to return a normalized
+ # comparable structure.
+ def normalized_sexp(parsed)
+ if parsed.is_a?(Array)
+ # For args_add_block, if the third entry is nil or false, remove it.
+ # Note that CRuby Ripper uses false for no block, while older JRuby
+ # uses nil. We need to do this for both.
+ return normalized_sexp(parsed[1]) if parsed[0] == :args_add_block && !parsed[2]
+
+ parsed.each.with_index do |item, idx|
+ if item.is_a?(Array)
+ parsed[idx] = normalized_sexp(parsed[idx])
+ end
+ end
+ end
+
+ parsed
+ end
+
+ def assert_ripper_equivalent(source, path: "inline source code")
+ expected = Ripper.sexp_raw(source)
+
+ refute_nil expected, "Could not parse #{path} with Ripper!"
+ expected = normalized_sexp(expected)
+ actual = Prism::Translation::Ripper.sexp_raw(source)
+ refute_nil actual, "Could not parse #{path} with Prism!"
+ actual = normalized_sexp(actual)
+ assert_equal expected, actual, "Expected Ripper and Prism to give equivalent output for #{path}!"
+ end
+
+ end
+
+ class RipperShortSourceTest < RipperTest
def test_binary
assert_equivalent("1 + 2")
assert_equivalent("3 - 4 * 5")
@@ -36,7 +80,7 @@ module Prism
assert_equivalent("foo.bar")
# TruffleRuby prints emoji symbols differently in a way that breaks here.
- if RUBY_ENGINE != "truffleruby"
+ unless truffleruby?
assert_equivalent("🗻")
assert_equivalent("🗻.location")
assert_equivalent("foo.🗻")
@@ -57,9 +101,9 @@ module Prism
def test_method_call_blocks
assert_equivalent("foo { |a| a }")
- # assert_equivalent("foo(bar 1)")
- # assert_equivalent("foo bar 1")
- # assert_equivalent("foo(bar 1) { 7 }")
+ assert_equivalent("foo(bar 1)")
+ assert_equivalent("foo bar 1")
+ assert_equivalent("foo(bar 1) { 7 }")
end
def test_method_calls_on_immediate_values
@@ -69,7 +113,7 @@ module Prism
assert_equivalent("7 and 7")
assert_equivalent("7 || 7")
assert_equivalent("7 or 7")
- #assert_equivalent("'racecar'.reverse")
+ assert_equivalent("'racecar'.reverse")
end
def test_range
@@ -142,20 +186,49 @@ module Prism
assert_equivalent("a = 1")
end
+ def test_alias
+ assert_equivalent("alias :foo :bar")
+ assert_equivalent("alias $a $b")
+ assert_equivalent("alias $a $'")
+ assert_equivalent("alias foo bar")
+ assert_equivalent("alias foo if")
+ assert_equivalent("alias :'def' :\"abc\#{1}\"")
+ assert_equivalent("alias :\"abc\#{1}\" :'def'")
+
+ unless truffleruby?
+ assert_equivalent("alias :foo :Ę") # Uppercase Unicode character is a constant
+ assert_equivalent("alias :Ę :foo")
+ end
+
+ assert_equivalent("alias foo +")
+ assert_equivalent("alias foo :+")
+ assert_equivalent("alias :foo :''")
+ assert_equivalent("alias :'' :foo")
+ end
+
+ # This is *exactly* the kind of thing where Ripper would have a weird
+ # special case we didn't handle correctly. We're still testing with
+ # a leading colon since putting random keywords there will often get
+ # parse errors. Mostly we want to know that Ripper will use :@kw
+ # instead of :@ident for the lexer symbol for all of these.
+ def test_keyword_aliases
+ Prism::Translation::Ripper::RUBY_KEYWORDS.each do |keyword|
+ assert_equivalent("alias :foo :#{keyword}")
+ end
+ end
+
private
def assert_equivalent(source)
- expected = Ripper.sexp_raw(source)
-
- refute_nil expected
- assert_equal expected, Prism::Translation::Ripper.sexp_raw(source)
+ assert_ripper_equivalent(source)
end
end
- class RipperFixturesTest < TestCase
+ class RipperFixturesTest < RipperTest
#base = File.join(__dir__, "fixtures")
#relatives = ENV["FOCUS"] ? [ENV["FOCUS"]] : Dir["**/*.txt", base: base]
relatives = [
+ "alias.txt",
"arithmetic.txt",
"booleans.txt",
"boolean_operators.txt",
@@ -172,14 +245,8 @@ module Prism
# and explicitly set the external encoding to UTF-8 to override the binmode default.
source = File.read(path, binmode: true, external_encoding: Encoding::UTF_8)
- expected = Ripper.sexp_raw(source)
- if expected.nil?
- puts "Could not parse #{path.inspect}!"
- end
- refute_nil expected
- assert_equal expected, Translation::Ripper.sexp_raw(source)
+ assert_ripper_equivalent(source, path: path)
end
end
-
end
end