summaryrefslogtreecommitdiff
path: root/lib/prism/translation/parser
diff options
context:
space:
mode:
authorKevin Newton <[email protected]>2025-03-18 13:02:56 -0400
committerKevin Newton <[email protected]>2025-03-18 13:36:53 -0400
commitb003d4019421e004460a8c947db2695d71ad0b8c (patch)
tree387fa0cc390aaa90f008c4f95069fd11b28429e3 /lib/prism/translation/parser
parent33aaa069a4e7b405e6d7ec5fcbf04a487e36e345 (diff)
Fix up merge conflicts for prism sync
Diffstat (limited to 'lib/prism/translation/parser')
-rw-r--r--lib/prism/translation/parser/compiler.rb142
-rw-r--r--lib/prism/translation/parser/lexer.rb169
2 files changed, 14 insertions, 297 deletions
diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb
index 338c916eec..aa1cb5d20b 100644
--- a/lib/prism/translation/parser/compiler.rb
+++ b/lib/prism/translation/parser/compiler.rb
@@ -1100,7 +1100,7 @@ module Prism
def visit_interpolated_regular_expression_node(node)
builder.regexp_compose(
token(node.opening_loc),
- visit_all(node.parts),
+ string_nodes_from_interpolation(node, node.opening),
[node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
)
@@ -1117,45 +1117,6 @@ module Prism
return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
end
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
- parts = if node.parts.one? { |part| part.type == :string_node }
- node.parts.flat_map do |node|
- if node.type == :string_node && node.unescaped.lines.count >= 2
- start_offset = node.content_loc.start_offset
-
- node.unescaped.lines.map do |line|
- end_offset = start_offset + line.bytesize
- offsets = srange_offsets(start_offset, end_offset)
- start_offset = end_offset
-
- builder.string_internal([line, offsets])
- end
- else
- visit(node)
- end
-=======
- parts = node.parts.flat_map do |part|
- # When the content of a string node is split across multiple lines, the
- # parser gem creates individual string nodes for each line the content is part of.
- if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
- string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, node.opening)
- else
- visit(part)
->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
- end
- else
- visit_all(node.parts)
- end
-
->>>>>>> a651126458 (Fix an incompatibility with the parser translator)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
builder.string_compose(
token(node.opening_loc),
string_nodes_from_interpolation(node, node.opening),
@@ -1739,23 +1700,7 @@ module Prism
if node.content.include?("\n")
string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
else
-<<<<<<< HEAD
-<<<<<<< HEAD
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
-=======
- start_offset = node.content_loc.start_offset
-
- [content_lines, unescaped_lines].transpose.map do |content_line, unescaped_line|
- end_offset = start_offset + content_line.bytesize
- offsets = srange_offsets(start_offset, end_offset)
- start_offset = end_offset
-
- builder.string_internal([unescaped_line, offsets])
- end
->>>>>>> a651126458 (Fix an incompatibility with the parser translator)
-=======
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
end
builder.string_compose(
@@ -1799,10 +1744,6 @@ module Prism
builder.symbol([node.unescaped, srange(node.location)])
end
else
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
parts =
if node.value == ""
[]
@@ -1810,22 +1751,6 @@ module Prism
string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
else
[builder.string_internal([node.unescaped, srange(node.value_loc)])]
-<<<<<<< HEAD
-=======
- parts = if node.value.lines.one?
- [builder.string_internal([node.unescaped, srange(node.value_loc)])]
- else
- start_offset = node.value_loc.start_offset
-
- node.value.lines.map do |line|
- end_offset = start_offset + line.bytesize
- offsets = srange_offsets(start_offset, end_offset)
- start_offset = end_offset
-
- builder.string_internal([line, offsets])
->>>>>>> a651126458 (Fix an incompatibility with the parser translator)
-=======
->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
end
builder.symbol_compose(
@@ -1964,23 +1889,7 @@ module Prism
elsif node.content.include?("\n")
string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
else
-<<<<<<< HEAD
-<<<<<<< HEAD
- [builder.string_internal([node.unescaped, srange(node.content_loc)])]
-=======
- start_offset = node.content_loc.start_offset
-
- node.unescaped.lines.map do |line|
- end_offset = start_offset + line.bytesize
- offsets = srange_offsets(start_offset, end_offset)
- start_offset = end_offset
-
- builder.string_internal([line, offsets])
- end
->>>>>>> a651126458 (Fix an incompatibility with the parser translator)
-=======
[builder.string_internal([node.unescaped, srange(node.content_loc)])]
->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
end
builder.xstring_compose(
@@ -2219,31 +2128,14 @@ module Prism
def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
unescaped = unescaped.lines
escaped = escaped.lines
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
- percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
-=======
->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
-=======
percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
- percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
+ regex = opening == "/" || opening&.start_with?("%r")
# Non-interpolating strings
if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
current_length = 0
current_line = +""
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
escaped.filter_map.with_index do |escaped_line, index|
unescaped_line = unescaped.fetch(index, "")
current_length += escaped_line.bytesize
@@ -2258,19 +2150,6 @@ module Prism
current_line = +""
current_length = 0
s
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
- if opening&.end_with?("'")
- escaped.each do |line|
- escaped_lengths << line.bytesize
- normalized_lengths << chomped_bytesize(line)
- do_next_tokens << true
->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
end
else
escaped_lengths = []
@@ -2285,11 +2164,18 @@ module Prism
.chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
.each do |lines|
escaped_lengths << lines.sum(&:bytesize)
- unescaped_lines_count = lines.sum do |line|
- count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
- count -= 1 if !line.end_with?("\n") && count > 0
- count
- end
+
+ unescaped_lines_count =
+ if regex
+ 0 # Will always be preserved as is
+ else
+ lines.sum do |line|
+ count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
+ count -= 1 if !line.end_with?("\n") && count > 0
+ count
+ end
+ end
+
extra = 1
extra = lines.count if percent_array # Account for line continuations in percent arrays
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
index 39eb9943d7..7db519499f 100644
--- a/lib/prism/translation/parser/lexer.rb
+++ b/lib/prism/translation/parser/lexer.rb
@@ -10,15 +10,7 @@ module Prism
# format for the parser gem.
class Lexer
# These tokens are always skipped
-<<<<<<< HEAD
-<<<<<<< HEAD
TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF])
-=======
- TYPES_ALWAYS_SKIP = %i[IGNORED_NEWLINE __END__ EOF].to_set
->>>>>>> ca9500a3fc (Optimize array inclusion checks in the parser translator)
-=======
- TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF])
->>>>>>> 422d5c4c64 (Use Set.new over to_set)
private_constant :TYPES_ALWAYS_SKIP
# The direct translating of types between the two lexers.
@@ -203,42 +195,18 @@ module Prism
#
# NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned
# instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046
-<<<<<<< HEAD
-<<<<<<< HEAD
- LAMBDA_TOKEN_TYPES = Set.new([:kDO_LAMBDA, :tLAMBDA, :tLAMBEG])
-=======
- LAMBDA_TOKEN_TYPES = [:kDO_LAMBDA, :tLAMBDA, :tLAMBEG].to_set
->>>>>>> ca9500a3fc (Optimize array inclusion checks in the parser translator)
-=======
LAMBDA_TOKEN_TYPES = Set.new([:kDO_LAMBDA, :tLAMBDA, :tLAMBEG])
->>>>>>> 422d5c4c64 (Use Set.new over to_set)
# The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
# The following token types are listed as those classified as `tLPAREN`.
LPAREN_CONVERSION_TOKEN_TYPES = Set.new([
:kBREAK, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
:tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS
-<<<<<<< HEAD
-<<<<<<< HEAD
- ])
-
- # Types of tokens that are allowed to continue a method call with comments in-between.
- # For these, the parser gem doesn't emit a newline token after the last comment.
- COMMENT_CONTINUATION_TYPES = Set.new([:COMMENT, :AMPERSAND_DOT, :DOT])
-=======
- ].to_set
-
- # Types of tokens that are allowed to continue a method call with comments in-between.
- # For these, the parser gem doesn't emit a newline token after the last comment.
- COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT].to_set
->>>>>>> ca9500a3fc (Optimize array inclusion checks in the parser translator)
-=======
])
# Types of tokens that are allowed to continue a method call with comments in-between.
# For these, the parser gem doesn't emit a newline token after the last comment.
COMMENT_CONTINUATION_TYPES = Set.new([:COMMENT, :AMPERSAND_DOT, :DOT])
->>>>>>> 422d5c4c64 (Use Set.new over to_set)
private_constant :COMMENT_CONTINUATION_TYPES
# Heredocs are complex and require us to keep track of a bit of info to refer to later
@@ -435,62 +403,8 @@ module Prism
end
when :tSTRING_CONTENT
is_percent_array = percent_array?(quote_stack.last)
-<<<<<<< HEAD
-=======
if (lines = token.value.lines).one?
-<<<<<<< HEAD
-<<<<<<< HEAD
- # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
- is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
- # The parser gem only removes indentation when the heredoc is not nested
- not_nested = heredoc_stack.size == 1
- if is_percent_array
- value = percent_array_unescape(value)
- elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
- value = trim_heredoc_whitespace(value, current_heredoc)
- end
->>>>>>> bd3dd2b62a (Fix parser translator tokens for %-arrays with whitespace escapes)
-
- if (lines = token.value.lines).one?
- # Prism usually emits a single token for strings with line continuations.
- # For squiggly heredocs they are not joined so we do that manually here.
- current_string = +""
- current_length = 0
- start_offset = token.location.start_offset
- while token.type == :STRING_CONTENT
- current_length += token.value.bytesize
- # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
- is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
- # The parser gem only removes indentation when the heredoc is not nested
- not_nested = heredoc_stack.size == 1
- if is_percent_array
- value = percent_array_unescape(token.value)
- elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
- value = trim_heredoc_whitespace(token.value, current_heredoc)
- end
-
-<<<<<<< HEAD
-=======
- # Prism usually emits a single token for strings with line continuations.
- # For squiggly heredocs they are not joined so we do that manually here.
- current_string = +""
- current_length = 0
- start_offset = token.location.start_offset
- while token.type == :STRING_CONTENT
- current_length += token.value.bytesize
- # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
- is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
- # The parser gem only removes indentation when the heredoc is not nested
- not_nested = heredoc_stack.size == 1
- if is_percent_array
- value = percent_array_unescape(token.value)
- elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
- value = trim_heredoc_whitespace(token.value, current_heredoc)
- end
-
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
# Prism usually emits a single token for strings with line continuations.
# For squiggly heredocs they are not joined so we do that manually here.
current_string = +""
@@ -508,7 +422,6 @@ module Prism
value = trim_heredoc_whitespace(token.value, current_heredoc)
end
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
current_string << unescape_string(value, quote_stack.last)
if (backslash_count = token.value[/(\\{1,})\n/, 1]&.length).nil? || backslash_count.even? || !interpolation?(quote_stack.last)
tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
@@ -553,11 +466,6 @@ module Prism
current_line = +""
adjustment = 0
end
-=======
- end_offset = start_offset + adjusted_line.bytesize + adjustment
- tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
- start_offset = end_offset
->>>>>>> a651126458 (Fix an incompatibility with the parser translator)
end
end
next
@@ -786,44 +694,7 @@ module Prism
while (skipped = scanner.skip_until(/\\/))
# Append what was just skipped over, excluding the found backslash.
result.append_as_bytes(string.byteslice(scanner.pos - skipped, skipped - 1))
-<<<<<<< HEAD
-<<<<<<< HEAD
- escape_read(result, scanner, false, false)
-=======
-
- if scanner.peek(1) == "\n"
- # Line continuation
- scanner.pos += 1
- elsif (replacement = ESCAPES[scanner.peek(1)])
- # Simple single-character escape sequences like \n
- result.append_as_bytes(replacement)
- scanner.pos += 1
- elsif (octal = scanner.check(/[0-7]{1,3}/))
- # \nnn
- result.append_as_bytes(octal.to_i(8).chr)
- scanner.pos += octal.bytesize
- elsif (hex = scanner.check(/x([0-9a-fA-F]{1,2})/))
- # \xnn
- result.append_as_bytes(hex[1..].to_i(16).chr)
- scanner.pos += hex.bytesize
- elsif (unicode = scanner.check(/u([0-9a-fA-F]{4})/))
- # \unnnn
- result.append_as_bytes(unicode[1..].hex.chr(Encoding::UTF_8))
- scanner.pos += unicode.bytesize
- elsif scanner.peek(3) == "u{}"
- # https://github.com/whitequark/parser/issues/856
- scanner.pos += 3
- elsif (unicode_parts = scanner.check(/u{.*}/))
- # \u{nnnn ...}
- unicode_parts[2..-2].split.each do |unicode|
- result.append_as_bytes(unicode.hex.chr(Encoding::UTF_8))
- end
- scanner.pos += unicode_parts.bytesize
- end
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
escape_read(result, scanner, false, false)
->>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation)
end
# Add remaining chars
@@ -835,13 +706,6 @@ module Prism
end
end
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
# Certain strings are merged into a single string token.
def simplify_string?(value, quote)
case quote
@@ -859,24 +723,11 @@ module Prism
end
end
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation)
# Escape a byte value, given the control and meta flags.
def escape_build(value, control, meta)
value &= 0x9f if control
value |= 0x80 if meta
-<<<<<<< HEAD
-<<<<<<< HEAD
- value
-=======
- value.chr
->>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation)
-=======
value
->>>>>>> 161c606b1f (Fix parser translator crash for certain octal escapes)
end
# Read an escape out of the string scanner, given the control and meta
@@ -920,15 +771,6 @@ module Prism
end
end
-<<<<<<< HEAD
-=======
->>>>>>> bd3dd2b62a (Fix parser translator tokens for %-arrays with whitespace escapes)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
->>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
# In a percent array, certain whitespace can be preceeded with a backslash,
# causing the following characters to be part of the previous element.
def percent_array_unescape(string)
@@ -953,17 +795,6 @@ module Prism
# Determine if characters preceeded by a backslash should be escaped or not
def interpolation?(quote)
!quote.end_with?("'") && !quote.start_with?("%q", "%w", "%i", "%s")
-<<<<<<< HEAD
-<<<<<<< HEAD
- end
-
- # Regexp allow interpolation but are handled differently during unescaping
- def regexp?(quote)
- quote == "/" || quote.start_with?("%r")
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
end
# Regexp allow interpolation but are handled differently during unescaping