diff options
author | Noah Gibbs <[email protected]> | 2024-02-15 12:10:36 +0000 |
---|---|---|
committer | git <[email protected]> | 2024-02-15 20:26:31 +0000 |
commit | a5cee8fa79422a295ce200a92b901c4182995800 (patch) | |
tree | 394bcca583d2a868f54008b0f8b852f2b33c5b62 | |
parent | fa334ecd40d66fa1aa1a344afdcf776d5a8c541c (diff) |
[ruby/prism] Handle more aliases. Better testing of prism ripper CLI and a test for it.
https://github.com/ruby/prism/commit/cfd4f28cb3
-rw-r--r-- | lib/prism/translation/ripper.rb | 169 | ||||
-rw-r--r-- | test/prism/ripper_test.rb | 101 |
2 files changed, 228 insertions, 42 deletions
diff --git a/lib/prism/translation/ripper.rb b/lib/prism/translation/ripper.rb index e0b76abded..f5db4628bf 100644 --- a/lib/prism/translation/ripper.rb +++ b/lib/prism/translation/ripper.rb @@ -222,6 +222,44 @@ module Prism on_break(on_args_add_block(args_val, false)) end + # Visit an AliasMethodNode. + def visit_alias_method_node(node) + # For both the old and new name, if there is a colon in the symbol + # name (e.g. 'alias :foo :bar') then we do *not* emit the [:symbol] wrapper around + # the lexer token (e.g. :@ident) inside [:symbol_literal]. But if there + # is no colon (e.g. 'alias foo bar') then we *do* still emit the [:symbol] wrapper. + + if node.new_name.is_a?(SymbolNode) && !node.new_name.opening + new_name_val = visit_symbol_literal_node(node.new_name, no_symbol_wrapper: true) + else + new_name_val = visit(node.new_name) + end + if node.old_name.is_a?(SymbolNode) && !node.old_name.opening + old_name_val = visit_symbol_literal_node(node.old_name, no_symbol_wrapper: true) + else + old_name_val = visit(node.old_name) + end + + on_alias(new_name_val, old_name_val) + end + + # Visit an AliasGlobalVariableNode. + def visit_alias_global_variable_node(node) + on_var_alias(visit(node.new_name), visit(node.old_name)) + end + + # Visit a GlobalVariableReadNode. + def visit_global_variable_read_node(node) + bounds(node.location) + on_gvar(node.name.to_s) + end + + # Visit a BackReferenceReadNode. + def visit_back_reference_read_node(node) + bounds(node.location) + on_backref(node.name.to_s) + end + # Visit an AndNode. def visit_and_node(node) visit_binary_operator(node) @@ -326,23 +364,7 @@ module Prism # Visit an InterpolatedStringNode node. def visit_interpolated_string_node(node) - parts = node.parts.map do |part| - case part - when StringNode - bounds(part.content_loc) - on_tstring_content(part.content) - when EmbeddedStatementsNode - on_string_embexpr(visit(part)) - else - raise NotImplementedError, "Unexpected node type in InterpolatedStringNode" - end - end - - string_list = parts.inject(on_string_content) do |items, item| - on_string_add(items, item) - end - - on_string_literal(string_list) + on_string_literal(visit_enumerated_node(node)) end # Visit an EmbeddedStatementsNode node. @@ -352,15 +374,12 @@ module Prism # Visit a SymbolNode node. def visit_symbol_node(node) - if (opening = node.opening) && (['"', "'"].include?(opening[-1]) || opening.start_with?("%s")) - bounds(node.value_loc) - tstring_val = on_tstring_content(node.value.to_s) - return on_dyna_symbol(on_string_add(on_string_content, tstring_val)) - end + visit_symbol_literal_node(node) + end - bounds(node.value_loc) - ident_val = on_ident(node.value.to_s) - on_symbol_literal(on_symbol(ident_val)) + # Visit an InterpolatedSymbolNode node. + def visit_interpolated_symbol_node(node) + on_dyna_symbol(visit_enumerated_node(node)) end # Visit a StatementsNode node. @@ -459,6 +478,25 @@ module Prism end end + # Visit an InterpolatedStringNode or an InterpolatedSymbolNode node. + def visit_enumerated_node(node) + parts = node.parts.map do |part| + case part + when StringNode + bounds(part.content_loc) + on_tstring_content(part.content) + when EmbeddedStatementsNode + on_string_embexpr(visit(part)) + else + raise NotImplementedError, "Unexpected node type in visit_enumerated_node" + end + end + + parts.inject(on_string_content) do |items, item| + on_string_add(items, item) + end + end + # Visit an operation-and-assign node, such as +=. def visit_binary_op_assign(node, operator: node.operator) bounds(node.name_loc) @@ -487,6 +525,87 @@ module Prism on_assign(on_aref_field(visit(node.receiver), args_val), assign_val) end + # In an alias statement Ripper will emit @kw instead of @ident if the object + # being aliased is a Ruby keyword. For instance, in the line "alias :foo :if", + # the :if is treated as a lexer keyword. So we need to know what symbols are + # also keywords. + RUBY_KEYWORDS = [ + "alias", + "and", + "begin", + "BEGIN", + "break", + "case", + "class", + "def", + "defined?", + "do", + "else", + "elsif", + "end", + "END", + "ensure", + "false", + "for", + "if", + "in", + "module", + "next", + "nil", + "not", + "or", + "redo", + "rescue", + "retry", + "return", + "self", + "super", + "then", + "true", + "undef", + "unless", + "until", + "when", + "while", + "yield", + "__ENCODING__", + "__FILE__", + "__LINE__", + ] + + # Ripper has several methods of emitting a symbol literal. Inside an alias + # sometimes it suppresses the [:symbol] wrapper around ident. If the symbol + # is also the name of a keyword (e.g. :if) it will emit a :@kw wrapper, not + # an :@ident wrapper, with similar treatment for constants and operators. + def visit_symbol_literal_node(node, no_symbol_wrapper: false) + if (opening = node.opening) && (['"', "'"].include?(opening[-1]) || opening.start_with?("%s")) + bounds(node.value_loc) + str_val = node.value.to_s + if str_val == "" + return on_dyna_symbol(on_string_content) + else + tstring_val = on_tstring_content(str_val) + return on_dyna_symbol(on_string_add(on_string_content, tstring_val)) + end + end + + bounds(node.value_loc) + node_name = node.value.to_s + if RUBY_KEYWORDS.include?(node_name) + token_val = on_kw(node_name) + elsif node_name.length == 0 + raise NotImplementedError + elsif /[[:upper:]]/.match(node_name[0]) + token_val = on_const(node_name) + elsif /[[:punct:]]/.match(node_name[0]) + token_val = on_op(node_name) + else + token_val = on_ident(node_name) + end + sym_val = no_symbol_wrapper ? token_val : on_symbol(token_val) + on_symbol_literal(sym_val) + end + # Visit a node that represents a number. We need to explicitly handle the # unary - operator. def visit_number(node) diff --git a/test/prism/ripper_test.rb b/test/prism/ripper_test.rb index 7abb78c723..8a9af18a13 100644 --- a/test/prism/ripper_test.rb +++ b/test/prism/ripper_test.rb @@ -4,6 +4,50 @@ require_relative "test_helper" module Prism class RipperTest < TestCase + def truffleruby? + RUBY_ENGINE == "truffleruby" + end + + def windows? + Gem.win_platform? + end + + # Ripper produces certain ambiguous structures. For instance, it often + # adds an :args_add_block with "false" as the block meaning there is + # no block call. It can be hard to tell which of multiple equivalent + # structures it will produce. This method attempts to return a normalized + # comparable structure. + def normalized_sexp(parsed) + if parsed.is_a?(Array) + # For args_add_block, if the third entry is nil or false, remove it. + # Note that CRuby Ripper uses false for no block, while older JRuby + # uses nil. We need to do this for both. + return normalized_sexp(parsed[1]) if parsed[0] == :args_add_block && !parsed[2] + + parsed.each.with_index do |item, idx| + if item.is_a?(Array) + parsed[idx] = normalized_sexp(parsed[idx]) + end + end + end + + parsed + end + + def assert_ripper_equivalent(source, path: "inline source code") + expected = Ripper.sexp_raw(source) + + refute_nil expected, "Could not parse #{path} with Ripper!" + expected = normalized_sexp(expected) + actual = Prism::Translation::Ripper.sexp_raw(source) + refute_nil actual, "Could not parse #{path} with Prism!" + actual = normalized_sexp(actual) + assert_equal expected, actual, "Expected Ripper and Prism to give equivalent output for #{path}!" + end + + end + + class RipperShortSourceTest < RipperTest def test_binary assert_equivalent("1 + 2") assert_equivalent("3 - 4 * 5") @@ -36,7 +80,7 @@ module Prism assert_equivalent("foo.bar") # TruffleRuby prints emoji symbols differently in a way that breaks here. - if RUBY_ENGINE != "truffleruby" + unless truffleruby? assert_equivalent("🗻") assert_equivalent("🗻.location") assert_equivalent("foo.🗻") @@ -57,9 +101,9 @@ module Prism def test_method_call_blocks assert_equivalent("foo { |a| a }") - # assert_equivalent("foo(bar 1)") - # assert_equivalent("foo bar 1") - # assert_equivalent("foo(bar 1) { 7 }") + assert_equivalent("foo(bar 1)") + assert_equivalent("foo bar 1") + assert_equivalent("foo(bar 1) { 7 }") end def test_method_calls_on_immediate_values @@ -69,7 +113,7 @@ module Prism assert_equivalent("7 and 7") assert_equivalent("7 || 7") assert_equivalent("7 or 7") - #assert_equivalent("'racecar'.reverse") + assert_equivalent("'racecar'.reverse") end def test_range @@ -142,20 +186,49 @@ module Prism assert_equivalent("a = 1") end + def test_alias + assert_equivalent("alias :foo :bar") + assert_equivalent("alias $a $b") + assert_equivalent("alias $a $'") + assert_equivalent("alias foo bar") + assert_equivalent("alias foo if") + assert_equivalent("alias :'def' :\"abc\#{1}\"") + assert_equivalent("alias :\"abc\#{1}\" :'def'") + + unless truffleruby? + assert_equivalent("alias :foo :Ę") # Uppercase Unicode character is a constant + assert_equivalent("alias :Ę :foo") + end + + assert_equivalent("alias foo +") + assert_equivalent("alias foo :+") + assert_equivalent("alias :foo :''") + assert_equivalent("alias :'' :foo") + end + + # This is *exactly* the kind of thing where Ripper would have a weird + # special case we didn't handle correctly. We're still testing with + # a leading colon since putting random keywords there will often get + # parse errors. Mostly we want to know that Ripper will use :@kw + # instead of :@ident for the lexer symbol for all of these. + def test_keyword_aliases + Prism::Translation::Ripper::RUBY_KEYWORDS.each do |keyword| + assert_equivalent("alias :foo :#{keyword}") + end + end + private def assert_equivalent(source) - expected = Ripper.sexp_raw(source) - - refute_nil expected - assert_equal expected, Prism::Translation::Ripper.sexp_raw(source) + assert_ripper_equivalent(source) end end - class RipperFixturesTest < TestCase + class RipperFixturesTest < RipperTest #base = File.join(__dir__, "fixtures") #relatives = ENV["FOCUS"] ? [ENV["FOCUS"]] : Dir["**/*.txt", base: base] relatives = [ + "alias.txt", "arithmetic.txt", "booleans.txt", "boolean_operators.txt", @@ -172,14 +245,8 @@ module Prism # and explicitly set the external encoding to UTF-8 to override the binmode default. source = File.read(path, binmode: true, external_encoding: Encoding::UTF_8) - expected = Ripper.sexp_raw(source) - if expected.nil? - puts "Could not parse #{path.inspect}!" - end - refute_nil expected - assert_equal expected, Translation::Ripper.sexp_raw(source) + assert_ripper_equivalent(source, path: path) end end - end end |