1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
|
# frozen_string_literal: true
require_relative "../test_helper"
begin
verbose, $VERBOSE = $VERBOSE, nil
require "parser/ruby33"
require "prism/translation/parser33"
require "prism/translation/parser34"
rescue LoadError
# In CRuby's CI, we're not going to test against the parser gem because we
# don't want to have to install it. So in this case we'll just skip this test.
return
ensure
$VERBOSE = verbose
end
# First, opt in to every AST feature.
Parser::Builders::Default.modernize
Prism::Translation::Parser::Builder.modernize
# The parser gem rejects some strings that would most likely lead to errors
# in consumers due to encoding problems. RuboCop however monkey-patches this
# method out in order to accept such code.
# https://github.com/whitequark/parser/blob/v3.3.6.0/lib/parser/builders/default.rb#L2289-L2295
Parser::Builders::Default.prepend(
Module.new {
def string_value(token)
value(token)
end
}
)
# Modify the source map == check so that it doesn't check against the node
# itself so we don't get into a recursive loop.
Parser::Source::Map.prepend(
Module.new {
def ==(other)
self.class == other.class &&
(instance_variables - %i[@node]).map do |ivar|
instance_variable_get(ivar) == other.instance_variable_get(ivar)
end.reduce(:&)
end
}
)
# Next, ensure that we're comparing the nodes and also comparing the source
# ranges so that we're getting all of the necessary information.
Parser::AST::Node.prepend(
Module.new {
def ==(other)
super && (location == other.location)
end
}
)
module Prism
class ParserTest < TestCase
# These files contain code with valid syntax that can't be parsed.
skip_syntax_error = [
# alias/undef with %s(abc) symbol literal
"alias.txt",
"seattlerb/bug_215.txt",
# 1.. && 2
"ranges.txt",
]
# These files contain code that is being parsed incorrectly by the parser
# gem, and therefore we don't want to compare against our translation.
skip_incorrect = [
# https://github.com/whitequark/parser/issues/1017
"spanning_heredoc.txt",
"spanning_heredoc_newlines.txt",
# https://github.com/whitequark/parser/issues/1021
"seattlerb/heredoc_nested.txt",
# https://github.com/whitequark/parser/issues/1016
"whitequark/unary_num_pow_precedence.txt",
# https://github.com/whitequark/parser/issues/950
"whitequark/dedenting_interpolating_heredoc_fake_line_continuation.txt",
# Contains an escaped multibyte character. This is supposed to drop to backslash
"seattlerb/regexp_escape_extended.txt",
# https://github.com/whitequark/parser/issues/1020
# These contain consecutive \r characters, followed by \n. Prism only receives
# the already modified source buffer which dropped one \r but must know the
# original code to parse it correctly.
"seattlerb/heredoc_with_extra_carriage_returns_windows.txt",
"seattlerb/heredoc_with_only_carriage_returns_windows.txt",
"seattlerb/heredoc_with_only_carriage_returns.txt",
# https://github.com/whitequark/parser/issues/1026
# Regex with \c escape
"unescaping.txt",
"seattlerb/regexp_esc_C_slash.txt",
]
# These files are either failing to parse or failing to translate, so we'll
# skip them for now.
skip_all = skip_incorrect | [
]
# Not sure why these files are failing on JRuby, but skipping them for now.
if RUBY_ENGINE == "jruby"
skip_all.push("emoji_method_calls.txt", "symbols.txt")
end
# These files are failing to translate their lexer output into the lexer
# output expected by the parser gem, so we'll skip them for now.
skip_tokens = [
"dash_heredocs.txt",
"embdoc_no_newline_at_end.txt",
"methods.txt",
"seattlerb/bug169.txt",
"seattlerb/case_in.txt",
"seattlerb/difficult4__leading_dots2.txt",
"seattlerb/difficult6__7.txt",
"seattlerb/difficult6__8.txt",
"seattlerb/heredoc_unicode.txt",
"seattlerb/parse_line_heredoc.txt",
"seattlerb/pct_w_heredoc_interp_nested.txt",
"seattlerb/required_kwarg_no_value.txt",
"seattlerb/TestRubyParserShared.txt",
"unparser/corpus/literal/assignment.txt",
"unparser/corpus/literal/literal.txt",
"whitequark/args.txt",
"whitequark/beginless_erange_after_newline.txt",
"whitequark/beginless_irange_after_newline.txt",
"whitequark/forward_arg_with_open_args.txt",
"whitequark/kwarg_no_paren.txt",
"whitequark/lbrace_arg_after_command_args.txt",
"whitequark/multiple_pattern_matches.txt",
"whitequark/newline_in_hash_argument.txt",
"whitequark/pattern_matching_expr_in_paren.txt",
"whitequark/pattern_matching_hash.txt",
"whitequark/ruby_bug_14690.txt",
"whitequark/ruby_bug_9669.txt",
"whitequark/space_args_arg_block.txt",
"whitequark/space_args_block.txt"
]
Fixture.each(except: skip_syntax_error) do |fixture|
define_method(fixture.test_name) do
assert_equal_parses(
fixture,
compare_asts: !skip_all.include?(fixture.path),
compare_tokens: !skip_tokens.include?(fixture.path),
compare_comments: fixture.path != "embdoc_no_newline_at_end.txt"
)
end
end
def test_non_prism_builder_class_deprecated
warnings = capture_warnings { Prism::Translation::Parser33.new(Parser::Builders::Default.new) }
assert_include(warnings, "#{__FILE__}:#{__LINE__ - 2}")
assert_include(warnings, "is not a `Prism::Translation::Parser::Builder` subclass")
warnings = capture_warnings { Prism::Translation::Parser33.new }
assert_empty(warnings)
end
if RUBY_VERSION >= "3.3"
def test_current_parser_for_current_ruby
major, minor, _patch = Gem::Version.new(RUBY_VERSION).segments
# Let's just hope there never is a Ruby 3.10 or similar
expected = major * 10 + minor
assert_equal(expected, Translation::ParserCurrent.new.version)
end
end
def test_it_block_parameter_syntax
it_fixture_path = Pathname(__dir__).join("../../../test/prism/fixtures/it.txt")
buffer = Parser::Source::Buffer.new(it_fixture_path)
buffer.source = it_fixture_path.read
actual_ast = Prism::Translation::Parser34.new.tokenize(buffer)[0]
it_block_parameter_sexp = parse_sexp {
s(:itblock,
s(:send, nil, :x), :it,
s(:lvar, :it))
}
assert_equal(it_block_parameter_sexp, actual_ast.to_sexp)
end
private
def assert_equal_parses(fixture, compare_asts: true, compare_tokens: true, compare_comments: true)
buffer = Parser::Source::Buffer.new(fixture.path, 1)
buffer.source = fixture.read
parser = Parser::Ruby33.new
parser.diagnostics.consumer = ->(*) {}
parser.diagnostics.all_errors_are_fatal = true
expected_ast, expected_comments, expected_tokens =
ignore_warnings { parser.tokenize(buffer) }
actual_ast, actual_comments, actual_tokens =
ignore_warnings { Prism::Translation::Parser33.new.tokenize(buffer) }
if expected_ast == actual_ast
if !compare_asts && !Fixture.custom_base_path?
puts "#{fixture.path} is now passing"
end
assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }
begin
assert_equal_tokens(expected_tokens, actual_tokens)
rescue Test::Unit::AssertionFailedError
raise if compare_tokens
else
puts "#{fixture.path} is now passing" if !compare_tokens && !Fixture.custom_base_path?
end
assert_equal_comments(expected_comments, actual_comments) if compare_comments
elsif compare_asts
assert_equal expected_ast, actual_ast, -> { assert_equal_asts_message(expected_ast, actual_ast) }
end
end
def assert_equal_asts_message(expected_ast, actual_ast)
queue = [[expected_ast, actual_ast]]
while (left, right = queue.shift)
if left.type != right.type
return "expected: #{left.type}\nactual: #{right.type}"
end
if left.location != right.location
return "expected:\n#{left.inspect}\n#{left.location.inspect}\nactual:\n#{right.inspect}\n#{right.location.inspect}"
end
if left.type == :str && left.children[0] != right.children[0]
return "expected: #{left.inspect}\nactual: #{right.inspect}"
end
left.children.zip(right.children).each do |left_child, right_child|
queue << [left_child, right_child] if left_child.is_a?(Parser::AST::Node)
end
end
"expected: #{expected_ast.inspect}\nactual: #{actual_ast.inspect}"
end
def assert_equal_tokens(expected_tokens, actual_tokens)
if expected_tokens != actual_tokens
index = 0
max_index = [expected_tokens, actual_tokens].map(&:size).max
while index <= max_index
expected_token = expected_tokens.fetch(index, [])
actual_token = actual_tokens.fetch(index, [])
index += 1
# There are a lot of tokens that have very specific meaning according
# to the context of the parser. We don't expose that information in
# prism, so we need to normalize these tokens a bit.
if expected_token[0] == :kDO_BLOCK && actual_token[0] == :kDO
actual_token[0] = expected_token[0]
end
# Now we can assert that the tokens are actually equal.
assert_equal expected_token, actual_token, -> {
"expected: #{expected_token.inspect}\n" \
"actual: #{actual_token.inspect}"
}
end
end
end
def assert_equal_comments(expected_comments, actual_comments)
assert_equal expected_comments, actual_comments, -> {
"expected: #{expected_comments.inspect}\n" \
"actual: #{actual_comments.inspect}"
}
end
def parse_sexp(&block)
Class.new { extend AST::Sexp }.instance_eval(&block).to_sexp
end
end
end
|