test/prism/ruby/parser_test.rb


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291

# frozen_string_literal: true

require_relative "../test_helper"

begin
  verbose, $VERBOSE = $VERBOSE, nil
  require "parser/ruby33"
  require "prism/translation/parser33"
  require "prism/translation/parser34"
rescue LoadError
  # In CRuby's CI, we're not going to test against the parser gem because we
  # don't want to have to install it. So in this case we'll just skip this test.
  return
ensure
  $VERBOSE = verbose
end

# First, opt in to every AST feature.
Parser::Builders::Default.modernize
Prism::Translation::Parser::Builder.modernize

# The parser gem rejects some strings that would most likely lead to errors
# in consumers due to encoding problems. RuboCop however monkey-patches this
# method out in order to accept such code.
# https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/builders/default.rb#L2289-L2295
Parser::Builders::Default.prepend(
  Module.new {
    def string_value(token)
      value(token)
    end
  }
)

# Modify the source map == check so that it doesn't check against the node
# itself so we don't get into a recursive loop.
Parser::Source::Map.prepend(
  Module.new {
    def ==(other)
      self.class == other.class &&
        (instance_variables - %i[@node]).map do |ivar|
          instance_variable_get(ivar) == other.instance_variable_get(ivar)
        end.reduce(:&)
    end
  }
)

# Next, ensure that we're comparing the nodes and also comparing the source
# ranges so that we're getting all of the necessary information.
Parser::AST::Node.prepend(
  Module.new {
    def ==(other)
      super && (location == other.location)
    end
  }
)

module Prism
  class ParserTest < TestCase
    # These files contain code with valid syntax that can't be parsed.
    skip_syntax_error = [
      # alias/undef with %s(abc) symbol literal
      "alias.txt",
      "seattlerb/bug_215.txt",

      # 1.. && 2
      "ranges.txt",
    ]

    # These files contain code that is being parsed incorrectly by the parser
    # gem, and therefore we don't want to compare against our translation.
    skip_incorrect = [
      # https://github.com/whitequark/parser/issues/1017
      "spanning_heredoc.txt",
      "spanning_heredoc_newlines.txt",

      # https://github.com/whitequark/parser/issues/1021
      "seattlerb/heredoc_nested.txt",

      # https://github.com/whitequark/parser/issues/1016
      "whitequark/unary_num_pow_precedence.txt",

      # https://github.com/whitequark/parser/issues/950
      "whitequark/dedenting_interpolating_heredoc_fake_line_continuation.txt",

      # Contains an escaped multibyte character. This is supposed to drop to backslash
      "seattlerb/regexp_escape_extended.txt",

      # https://github.com/whitequark/parser/issues/1020
      # These contain consecutive \r characters, followed by \n. Prism only receives
      # the already modified source buffer which dropped one \r but must know the
      # original code to parse it correctly.
      "seattlerb/heredoc_with_extra_carriage_returns_windows.txt",
      "seattlerb/heredoc_with_only_carriage_returns_windows.txt",
      "seattlerb/heredoc_with_only_carriage_returns.txt",

      # https://github.com/whitequark/parser/issues/1026
      # Regex with \c escape
      "unescaping.txt",
      "seattlerb/regexp_esc_C_slash.txt",
    ]

    # These files are either failing to parse or failing to translate, so we'll
    # skip them for now.
    skip_all = skip_incorrect | [
    ]

    # Not sure why these files are failing on JRuby, but skipping them for now.
    if RUBY_ENGINE == "jruby"
      skip_all.push("emoji_method_calls.txt", "symbols.txt")
    end

    # These files are failing to translate their lexer output into the lexer
    # output expected by the parser gem, so we'll skip them for now.
    skip_tokens = [
      "dash_heredocs.txt",
      "embdoc_no_newline_at_end.txt",
      "methods.txt",
      "seattlerb/bug169.txt",
      "seattlerb/case_in.txt",
      "seattlerb/difficult4__leading_dots2.txt",
      "seattlerb/difficult6__7.txt",
      "seattlerb/difficult6__8.txt",
      "seattlerb/heredoc_unicode.txt",
      "seattlerb/parse_line_heredoc.txt",
      "seattlerb/pct_w_heredoc_interp_nested.txt",
      "seattlerb/required_kwarg_no_value.txt",
      "seattlerb/TestRubyParserShared.txt",
      "unparser/corpus/literal/assignment.txt",
      "unparser/corpus/literal/literal.txt",
      "whitequark/args.txt",
      "whitequark/beginless_erange_after_newline.txt",
      "whitequark/beginless_irange_after_newline.txt",
      "whitequark/forward_arg_with_open_args.txt",
      "whitequark/kwarg_no_paren.txt",
      "whitequark/lbrace_arg_after_command_args.txt",
      "whitequark/multiple_pattern_matches.txt",
      "whitequark/newline_in_hash_argument.txt",
      "whitequark/pattern_matching_expr_in_paren.txt",
      "whitequark/pattern_matching_hash.txt",
      "whitequark/ruby_bug_14690.txt",
      "whitequark/ruby_bug_9669.txt",
      "whitequark/space_args_arg_block.txt",
      "whitequark/space_args_block.txt"
    ]

    Fixture.each(except: skip_syntax_error) do |fixture|
      define_method(fixture.test_name) do
        assert_equal_parses(
          fixture,
          compare_asts: !skip_all.include?(fixture.path),
          compare_tokens: !skip_tokens.include?(fixture.path),
          compare_comments: fixture.path != "embdoc_no_newline_at_end.txt"
        )
      end
    end

    def test_non_prism_builder_class_deprecated
      warnings = capture_warnings { Prism::Translation::Parser33.new(Parser::Builders::Default.new) }

      assert_include(warnings, "#{__FILE__}:#{__LINE__ - 2}")
      assert_include(warnings, "is not a `Prism::Translation::Parser::Builder` subclass")

      warnings = capture_warnings { Prism::Translation::Parser33.new }
      assert_empty(warnings)
    end

    if RUBY_VERSION >= "3.3"
      def test_current_parser_for_current_ruby
        major, minor, _patch = Gem::Version.new(RUBY_VERSION).segments
        # Let's just hope there never is a Ruby 3.10 or similar
        expected = major * 10 + minor
        assert_equal(expected, Translation::ParserCurrent.new.version)
      end
    end

    def test_it_block_parameter_syntax
      it_fixture_path = Pathname(__dir__).join("../../../test/prism/fixtures/it.txt")

      buffer = Parser::Source::Buffer.new(it_fixture_path)
      buffer.source = it_fixture_path.read
      actual_ast = Prism::Translation::Parser34.new.tokenize(buffer)[0]

      it_block_parameter_sexp = parse_sexp {
        s(:itblock,
          s(:send, nil, :x), :it,
          s(:lvar, :it))
      }

      assert_equal(it_block_parameter_sexp, actual_ast.to_sexp)
    end

    private

    def assert_equal_parses(fixture, compare_asts: true, compare_tokens: true, compare_comments: true)
      buffer = Parser::Source::Buffer.new(fixture.path, 1)
      buffer.source = fixture.read

      parser = Parser::Ruby33.new
      parser.diagnostics.consumer = ->(*) {}
      parser.diagnostics.all_errors_are_fatal = true

      expected_ast, expected_comments, expected_tokens =
        ignore_warnings { parser.tokenize(buffer) }

      actual_ast, actual_comments, actual_tokens =
        ignore_warnings { Prism::Translation::Parser33.new.tokenize(buffer) }

      if expected_ast == actual_ast
        if !compare_asts && !Fixture.custom_base_path?
          puts "#{fixture.path} is now passing"
        end

        assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }

        begin
          assert_equal_tokens(expected_tokens, actual_tokens)
        rescue Test::Unit::AssertionFailedError
          raise if compare_tokens
        else
          puts "#{fixture.path} is now passing" if !compare_tokens && !Fixture.custom_base_path?
        end

        assert_equal_comments(expected_comments, actual_comments) if compare_comments
      elsif compare_asts
        assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }
      end
    end

    def assert_equal_asts_message(expected_ast, actual_ast)
      queue = [[expected_ast, actual_ast]]

      while (left, right = queue.shift)
        if left.type != right.type
          return "expected: #{left.type}\nactual: #{right.type}"
        end

        if left.location != right.location
          return "expected:\n#{left.inspect}\n#{left.location.inspect}\nactual:\n#{right.inspect}\n#{right.location.inspect}"
        end

        if left.type == :str && left.children[0] != right.children[0]
          return "expected: #{left.inspect}\nactual: #{right.inspect}"
        end

        left.children.zip(right.children).each do |left_child, right_child|
          queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node)
        end
      end

      "expected: #{expected_ast.inspect}\nactual: #{actual_ast.inspect}"
    end

    def assert_equal_tokens(expected_tokens, actual_tokens)
      if expected_tokens != actual_tokens
        index = 0
        max_index = [expected_tokens, actual_tokens].map(&:size).max

        while index <= max_index
          expected_token = expected_tokens.fetch(index, [])
          actual_token = actual_tokens.fetch(index, [])

          index += 1

          # There are a lot of tokens that have very specific meaning according
          # to the context of the parser. We don't expose that information in
          # prism, so we need to normalize these tokens a bit.
          if expected_token[0] == :kDO_BLOCK && actual_token[0] == :kDO
            actual_token[0] = expected_token[0]
          end

          # Now we can assert that the tokens are actually equal.
          assert_equal expected_token, actual_token, -> {
            "expected: #{expected_token.inspect}\n" \
            "actual: #{actual_token.inspect}"
          }
        end
      end
    end

    def assert_equal_comments(expected_comments, actual_comments)
      assert_equal expected_comments, actual_comments, -> {
        "expected: #{expected_comments.inspect}\n" \
        "actual: #{actual_comments.inspect}"
      }
    end

    def parse_sexp(&block)
      Class.new { extend AST::Sexp }.instance_eval(&block).to_sexp
    end
  end
end