diff options
author | Kevin Newton <[email protected]> | 2025-03-18 13:02:56 -0400 |
---|---|---|
committer | Kevin Newton <[email protected]> | 2025-03-18 13:36:53 -0400 |
commit | b003d4019421e004460a8c947db2695d71ad0b8c (patch) | |
tree | 387fa0cc390aaa90f008c4f95069fd11b28429e3 /lib/prism/translation/parser | |
parent | 33aaa069a4e7b405e6d7ec5fcbf04a487e36e345 (diff) |
Fix up merge conflicts for prism sync
Diffstat (limited to 'lib/prism/translation/parser')
-rw-r--r-- | lib/prism/translation/parser/compiler.rb | 142 | ||||
-rw-r--r-- | lib/prism/translation/parser/lexer.rb | 169 |
2 files changed, 14 insertions, 297 deletions
diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb index 338c916eec..aa1cb5d20b 100644 --- a/lib/prism/translation/parser/compiler.rb +++ b/lib/prism/translation/parser/compiler.rb @@ -1100,7 +1100,7 @@ module Prism def visit_interpolated_regular_expression_node(node) builder.regexp_compose( token(node.opening_loc), - visit_all(node.parts), + string_nodes_from_interpolation(node, node.opening), [node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)], builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)]) ) @@ -1117,45 +1117,6 @@ module Prism return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) } end -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= - parts = if node.parts.one? { |part| part.type == :string_node } - node.parts.flat_map do |node| - if node.type == :string_node && node.unescaped.lines.count >= 2 - start_offset = node.content_loc.start_offset - - node.unescaped.lines.map do |line| - end_offset = start_offset + line.bytesize - offsets = srange_offsets(start_offset, end_offset) - start_offset = end_offset - - builder.string_internal([line, offsets]) - end - else - visit(node) - end -======= - parts = node.parts.flat_map do |part| - # When the content of a string node is split across multiple lines, the - # parser gem creates individual string nodes for each line the content is part of. - if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil? - string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, node.opening) - else - visit(part) ->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator) - end - else - visit_all(node.parts) - end - ->>>>>>> a651126458 (Fix an incompatibility with the parser translator) -======= ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) -======= ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) builder.string_compose( token(node.opening_loc), string_nodes_from_interpolation(node, node.opening), @@ -1739,23 +1700,7 @@ module Prism if node.content.include?("\n") string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening) else -<<<<<<< HEAD -<<<<<<< HEAD - [builder.string_internal([node.unescaped, srange(node.content_loc)])] -======= - start_offset = node.content_loc.start_offset - - [content_lines, unescaped_lines].transpose.map do |content_line, unescaped_line| - end_offset = start_offset + content_line.bytesize - offsets = srange_offsets(start_offset, end_offset) - start_offset = end_offset - - builder.string_internal([unescaped_line, offsets]) - end ->>>>>>> a651126458 (Fix an incompatibility with the parser translator) -======= [builder.string_internal([node.unescaped, srange(node.content_loc)])] ->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator) end builder.string_compose( @@ -1799,10 +1744,6 @@ module Prism builder.symbol([node.unescaped, srange(node.location)]) end else -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator) parts = if node.value == "" [] @@ -1810,22 +1751,6 @@ module Prism string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening) else [builder.string_internal([node.unescaped, srange(node.value_loc)])] -<<<<<<< HEAD -======= - parts = if node.value.lines.one? - [builder.string_internal([node.unescaped, srange(node.value_loc)])] - else - start_offset = node.value_loc.start_offset - - node.value.lines.map do |line| - end_offset = start_offset + line.bytesize - offsets = srange_offsets(start_offset, end_offset) - start_offset = end_offset - - builder.string_internal([line, offsets]) ->>>>>>> a651126458 (Fix an incompatibility with the parser translator) -======= ->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator) end builder.symbol_compose( @@ -1964,23 +1889,7 @@ module Prism elsif node.content.include?("\n") string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening) else -<<<<<<< HEAD -<<<<<<< HEAD - [builder.string_internal([node.unescaped, srange(node.content_loc)])] -======= - start_offset = node.content_loc.start_offset - - node.unescaped.lines.map do |line| - end_offset = start_offset + line.bytesize - offsets = srange_offsets(start_offset, end_offset) - start_offset = end_offset - - builder.string_internal([line, offsets]) - end ->>>>>>> a651126458 (Fix an incompatibility with the parser translator) -======= [builder.string_internal([node.unescaped, srange(node.content_loc)])] ->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator) end builder.xstring_compose( @@ -2219,31 +2128,14 @@ module Prism def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening) unescaped = unescaped.lines escaped = escaped.lines -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD - percent_array = opening&.start_with?("%w", "%W", "%i", "%I") -======= ->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator) -======= percent_array = opening&.start_with?("%w", "%W", "%i", "%I") ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) -======= - percent_array = opening&.start_with?("%w", "%W", "%i", "%I") ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) + regex = opening == "/" || opening&.start_with?("%r") # Non-interpolating strings if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i") current_length = 0 current_line = +"" -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) -======= ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) escaped.filter_map.with_index do |escaped_line, index| unescaped_line = unescaped.fetch(index, "") current_length += escaped_line.bytesize @@ -2258,19 +2150,6 @@ module Prism current_line = +"" current_length = 0 s -<<<<<<< HEAD -<<<<<<< HEAD -======= - if opening&.end_with?("'") - escaped.each do |line| - escaped_lengths << line.bytesize - normalized_lengths << chomped_bytesize(line) - do_next_tokens << true ->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator) -======= ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) -======= ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) end else escaped_lengths = [] @@ -2285,11 +2164,18 @@ module Prism .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false } .each do |lines| escaped_lengths << lines.sum(&:bytesize) - unescaped_lines_count = lines.sum do |line| - count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? } - count -= 1 if !line.end_with?("\n") && count > 0 - count - end + + unescaped_lines_count = + if regex + 0 # Will always be preserved as is + else + lines.sum do |line| + count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? } + count -= 1 if !line.end_with?("\n") && count > 0 + count + end + end + extra = 1 extra = lines.count if percent_array # Account for line continuations in percent arrays diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb index 39eb9943d7..7db519499f 100644 --- a/lib/prism/translation/parser/lexer.rb +++ b/lib/prism/translation/parser/lexer.rb @@ -10,15 +10,7 @@ module Prism # format for the parser gem. class Lexer # These tokens are always skipped -<<<<<<< HEAD -<<<<<<< HEAD TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF]) -======= - TYPES_ALWAYS_SKIP = %i[IGNORED_NEWLINE __END__ EOF].to_set ->>>>>>> ca9500a3fc (Optimize array inclusion checks in the parser translator) -======= - TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF]) ->>>>>>> 422d5c4c64 (Use Set.new over to_set) private_constant :TYPES_ALWAYS_SKIP # The direct translating of types between the two lexers. @@ -203,42 +195,18 @@ module Prism # # NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned # instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046 -<<<<<<< HEAD -<<<<<<< HEAD - LAMBDA_TOKEN_TYPES = Set.new([:kDO_LAMBDA, :tLAMBDA, :tLAMBEG]) -======= - LAMBDA_TOKEN_TYPES = [:kDO_LAMBDA, :tLAMBDA, :tLAMBEG].to_set ->>>>>>> ca9500a3fc (Optimize array inclusion checks in the parser translator) -======= LAMBDA_TOKEN_TYPES = Set.new([:kDO_LAMBDA, :tLAMBDA, :tLAMBEG]) ->>>>>>> 422d5c4c64 (Use Set.new over to_set) # The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem. # The following token types are listed as those classified as `tLPAREN`. LPAREN_CONVERSION_TOKEN_TYPES = Set.new([ :kBREAK, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3, :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS -<<<<<<< HEAD -<<<<<<< HEAD - ]) - - # Types of tokens that are allowed to continue a method call with comments in-between. - # For these, the parser gem doesn't emit a newline token after the last comment. - COMMENT_CONTINUATION_TYPES = Set.new([:COMMENT, :AMPERSAND_DOT, :DOT]) -======= - ].to_set - - # Types of tokens that are allowed to continue a method call with comments in-between. - # For these, the parser gem doesn't emit a newline token after the last comment. - COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT].to_set ->>>>>>> ca9500a3fc (Optimize array inclusion checks in the parser translator) -======= ]) # Types of tokens that are allowed to continue a method call with comments in-between. # For these, the parser gem doesn't emit a newline token after the last comment. COMMENT_CONTINUATION_TYPES = Set.new([:COMMENT, :AMPERSAND_DOT, :DOT]) ->>>>>>> 422d5c4c64 (Use Set.new over to_set) private_constant :COMMENT_CONTINUATION_TYPES # Heredocs are complex and require us to keep track of a bit of info to refer to later @@ -435,62 +403,8 @@ module Prism end when :tSTRING_CONTENT is_percent_array = percent_array?(quote_stack.last) -<<<<<<< HEAD -======= if (lines = token.value.lines).one? -<<<<<<< HEAD -<<<<<<< HEAD - # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line. - is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line - # The parser gem only removes indentation when the heredoc is not nested - not_nested = heredoc_stack.size == 1 - if is_percent_array - value = percent_array_unescape(value) - elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0 - value = trim_heredoc_whitespace(value, current_heredoc) - end ->>>>>>> bd3dd2b62a (Fix parser translator tokens for %-arrays with whitespace escapes) - - if (lines = token.value.lines).one? - # Prism usually emits a single token for strings with line continuations. - # For squiggly heredocs they are not joined so we do that manually here. - current_string = +"" - current_length = 0 - start_offset = token.location.start_offset - while token.type == :STRING_CONTENT - current_length += token.value.bytesize - # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line. - is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line - # The parser gem only removes indentation when the heredoc is not nested - not_nested = heredoc_stack.size == 1 - if is_percent_array - value = percent_array_unescape(token.value) - elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0 - value = trim_heredoc_whitespace(token.value, current_heredoc) - end - -<<<<<<< HEAD -======= - # Prism usually emits a single token for strings with line continuations. - # For squiggly heredocs they are not joined so we do that manually here. - current_string = +"" - current_length = 0 - start_offset = token.location.start_offset - while token.type == :STRING_CONTENT - current_length += token.value.bytesize - # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line. - is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line - # The parser gem only removes indentation when the heredoc is not nested - not_nested = heredoc_stack.size == 1 - if is_percent_array - value = percent_array_unescape(token.value) - elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0 - value = trim_heredoc_whitespace(token.value, current_heredoc) - end - ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) -======= # Prism usually emits a single token for strings with line continuations. # For squiggly heredocs they are not joined so we do that manually here. current_string = +"" @@ -508,7 +422,6 @@ module Prism value = trim_heredoc_whitespace(token.value, current_heredoc) end ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) current_string << unescape_string(value, quote_stack.last) if (backslash_count = token.value[/(\\{1,})\n/, 1]&.length).nil? || backslash_count.even? || !interpolation?(quote_stack.last) tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]] @@ -553,11 +466,6 @@ module Prism current_line = +"" adjustment = 0 end -======= - end_offset = start_offset + adjusted_line.bytesize + adjustment - tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]] - start_offset = end_offset ->>>>>>> a651126458 (Fix an incompatibility with the parser translator) end end next @@ -786,44 +694,7 @@ module Prism while (skipped = scanner.skip_until(/\\/)) # Append what was just skipped over, excluding the found backslash. result.append_as_bytes(string.byteslice(scanner.pos - skipped, skipped - 1)) -<<<<<<< HEAD -<<<<<<< HEAD - escape_read(result, scanner, false, false) -======= - - if scanner.peek(1) == "\n" - # Line continuation - scanner.pos += 1 - elsif (replacement = ESCAPES[scanner.peek(1)]) - # Simple single-character escape sequences like \n - result.append_as_bytes(replacement) - scanner.pos += 1 - elsif (octal = scanner.check(/[0-7]{1,3}/)) - # \nnn - result.append_as_bytes(octal.to_i(8).chr) - scanner.pos += octal.bytesize - elsif (hex = scanner.check(/x([0-9a-fA-F]{1,2})/)) - # \xnn - result.append_as_bytes(hex[1..].to_i(16).chr) - scanner.pos += hex.bytesize - elsif (unicode = scanner.check(/u([0-9a-fA-F]{4})/)) - # \unnnn - result.append_as_bytes(unicode[1..].hex.chr(Encoding::UTF_8)) - scanner.pos += unicode.bytesize - elsif scanner.peek(3) == "u{}" - # https://github.com/whitequark/parser/issues/856 - scanner.pos += 3 - elsif (unicode_parts = scanner.check(/u{.*}/)) - # \u{nnnn ...} - unicode_parts[2..-2].split.each do |unicode| - result.append_as_bytes(unicode.hex.chr(Encoding::UTF_8)) - end - scanner.pos += unicode_parts.bytesize - end ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) -======= escape_read(result, scanner, false, false) ->>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation) end # Add remaining chars @@ -835,13 +706,6 @@ module Prism end end -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) -======= ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) # Certain strings are merged into a single string token. def simplify_string?(value, quote) case quote @@ -859,24 +723,11 @@ module Prism end end -<<<<<<< HEAD -<<<<<<< HEAD -<<<<<<< HEAD -======= ->>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation) # Escape a byte value, given the control and meta flags. def escape_build(value, control, meta) value &= 0x9f if control value |= 0x80 if meta -<<<<<<< HEAD -<<<<<<< HEAD - value -======= - value.chr ->>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation) -======= value ->>>>>>> 161c606b1f (Fix parser translator crash for certain octal escapes) end # Read an escape out of the string scanner, given the control and meta @@ -920,15 +771,6 @@ module Prism end end -<<<<<<< HEAD -======= ->>>>>>> bd3dd2b62a (Fix parser translator tokens for %-arrays with whitespace escapes) -======= ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) -======= ->>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation) -======= ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) # In a percent array, certain whitespace can be preceeded with a backslash, # causing the following characters to be part of the previous element. def percent_array_unescape(string) @@ -953,17 +795,6 @@ module Prism # Determine if characters preceeded by a backslash should be escaped or not def interpolation?(quote) !quote.end_with?("'") && !quote.start_with?("%q", "%w", "%i", "%s") -<<<<<<< HEAD -<<<<<<< HEAD - end - - # Regexp allow interpolation but are handled differently during unescaping - def regexp?(quote) - quote == "/" || quote.start_with?("%r") -======= ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) -======= ->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator) end # Regexp allow interpolation but are handled differently during unescaping |