Fix up merge conflicts for prism sync

author: Kevin Newton <[email protected]> 2025-03-18 13:02:56 -0400
committer: Kevin Newton <[email protected]> 2025-03-18 13:36:53 -0400
commit: b003d4019421e004460a8c947db2695d71ad0b8c (patch)
tree: 387fa0cc390aaa90f008c4f95069fd11b28429e3 /lib/prism/translation/parser
parent: 33aaa069a4e7b405e6d7ec5fcbf04a487e36e345 (diff)
2 files changed, 14 insertions, 297 deletions
diff --git a/lib/prism/translation/parser/compiler.rb b/lib/prism/translation/parser/compiler.rb
index 338c916eec..aa1cb5d20b 100644
--- a/lib/prism/translation/parser/compiler.rb
+++ b/lib/prism/translation/parser/compiler.rb
@@ -1100,7 +1100,7 @@ module Prism
         def visit_interpolated_regular_expression_node(node)
           builder.regexp_compose(
             token(node.opening_loc),
-            visit_all(node.parts),
+            string_nodes_from_interpolation(node, node.opening),
             [node.closing[0], srange_offsets(node.closing_loc.start_offset, node.closing_loc.start_offset + 1)],
             builder.regexp_options([node.closing[1..], srange_offsets(node.closing_loc.start_offset + 1, node.closing_loc.end_offset)])
           )
@@ -1117,45 +1117,6 @@ module Prism
             return visit_heredoc(node) { |children, closing| builder.string_compose(token(node.opening_loc), children, closing) }
           end
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-          parts = if node.parts.one? { |part| part.type == :string_node }
-            node.parts.flat_map do |node|
-              if node.type == :string_node && node.unescaped.lines.count >= 2
-                start_offset = node.content_loc.start_offset
-
-                node.unescaped.lines.map do |line|
-                  end_offset = start_offset + line.bytesize
-                  offsets = srange_offsets(start_offset, end_offset)
-                  start_offset = end_offset
-
-                  builder.string_internal([line, offsets])
-                end
-              else
-                visit(node)
-              end
-=======
-          parts = node.parts.flat_map do |part|
-            # When the content of a string node is split across multiple lines, the
-            # parser gem creates individual string nodes for each line the content is part of.
-            if part.type == :string_node && part.content.include?("\n") && part.opening_loc.nil?
-              string_nodes_from_line_continuations(part.unescaped, part.content, part.content_loc.start_offset, node.opening)
-            else
-              visit(part)
->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
-            end
-          else
-            visit_all(node.parts)
-          end
-
->>>>>>> a651126458 (Fix an incompatibility with the parser translator)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
           builder.string_compose(
             token(node.opening_loc),
             string_nodes_from_interpolation(node, node.opening),
@@ -1739,23 +1700,7 @@ module Prism
               if node.content.include?("\n")
                 string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
               else
-<<<<<<< HEAD
-<<<<<<< HEAD
-                [builder.string_internal([node.unescaped, srange(node.content_loc)])]
-=======
-                start_offset = node.content_loc.start_offset
-
-                [content_lines, unescaped_lines].transpose.map do |content_line, unescaped_line|
-                  end_offset = start_offset + content_line.bytesize
-                  offsets = srange_offsets(start_offset, end_offset)
-                  start_offset = end_offset
-
-                  builder.string_internal([unescaped_line, offsets])
-                end
->>>>>>> a651126458 (Fix an incompatibility with the parser translator)
-=======
                 [builder.string_internal([node.unescaped, srange(node.content_loc)])]
->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
               end
 
             builder.string_compose(
@@ -1799,10 +1744,6 @@ module Prism
               builder.symbol([node.unescaped, srange(node.location)])
             end
           else
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
             parts =
               if node.value == ""
                 []
@@ -1810,22 +1751,6 @@ module Prism
                 string_nodes_from_line_continuations(node.unescaped, node.value, node.value_loc.start_offset, node.opening)
               else
                 [builder.string_internal([node.unescaped, srange(node.value_loc)])]
-<<<<<<< HEAD
-=======
-            parts = if node.value.lines.one?
-              [builder.string_internal([node.unescaped, srange(node.value_loc)])]
-            else
-              start_offset = node.value_loc.start_offset
-
-              node.value.lines.map do |line|
-                end_offset = start_offset + line.bytesize
-                offsets = srange_offsets(start_offset, end_offset)
-                start_offset = end_offset
-
-                builder.string_internal([line, offsets])
->>>>>>> a651126458 (Fix an incompatibility with the parser translator)
-=======
->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
               end
 
             builder.symbol_compose(
@@ -1964,23 +1889,7 @@ module Prism
             elsif node.content.include?("\n")
               string_nodes_from_line_continuations(node.unescaped, node.content, node.content_loc.start_offset, node.opening)
             else
-<<<<<<< HEAD
-<<<<<<< HEAD
-              [builder.string_internal([node.unescaped, srange(node.content_loc)])]
-=======
-              start_offset = node.content_loc.start_offset
-
-              node.unescaped.lines.map do |line|
-                end_offset = start_offset + line.bytesize
-                offsets = srange_offsets(start_offset, end_offset)
-                start_offset = end_offset
-
-                builder.string_internal([line, offsets])
-              end
->>>>>>> a651126458 (Fix an incompatibility with the parser translator)
-=======
               [builder.string_internal([node.unescaped, srange(node.content_loc)])]
->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
             end
 
           builder.xstring_compose(
@@ -2219,31 +2128,14 @@ module Prism
         def string_nodes_from_line_continuations(unescaped, escaped, start_offset, opening)
           unescaped = unescaped.lines
           escaped = escaped.lines
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-          percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
-=======
->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
-=======
           percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
-          percent_array = opening&.start_with?("%w", "%W", "%i", "%I")
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
+          regex = opening == "/" || opening&.start_with?("%r")
 
           # Non-interpolating strings
           if opening&.end_with?("'") || opening&.start_with?("%q", "%s", "%w", "%i")
             current_length = 0
             current_line = +""
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
             escaped.filter_map.with_index do |escaped_line, index|
               unescaped_line = unescaped.fetch(index, "")
               current_length += escaped_line.bytesize
@@ -2258,19 +2150,6 @@ module Prism
               current_line = +""
               current_length = 0
               s
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
-          if opening&.end_with?("'")
-            escaped.each do |line|
-              escaped_lengths << line.bytesize
-              normalized_lengths << chomped_bytesize(line)
-              do_next_tokens << true
->>>>>>> 2637007929 (Better handle all kinds of multiline strings in the parser translator)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
             end
           else
             escaped_lengths = []
@@ -2285,11 +2164,18 @@ module Prism
               .chunk_while { |before, after| before[/(\\*)\r?\n$/, 1]&.length&.odd? || false }
               .each do |lines|
                 escaped_lengths << lines.sum(&:bytesize)
-                unescaped_lines_count = lines.sum do |line|
-                  count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
-                  count -= 1 if !line.end_with?("\n") && count > 0
-                  count
-                end
+
+                unescaped_lines_count =
+                  if regex
+                    0 # Will always be preserved as is
+                  else
+                    lines.sum do |line|
+                      count = line.scan(/(\\*)n/).count { |(backslashes)| backslashes&.length&.odd? }
+                      count -= 1 if !line.end_with?("\n") && count > 0
+                      count
+                    end
+                  end
+
                 extra = 1
                 extra = lines.count if percent_array # Account for line continuations in percent arrays
 
diff --git a/lib/prism/translation/parser/lexer.rb b/lib/prism/translation/parser/lexer.rb
index 39eb9943d7..7db519499f 100644
--- a/lib/prism/translation/parser/lexer.rb
+++ b/lib/prism/translation/parser/lexer.rb
@@ -10,15 +10,7 @@ module Prism
       # format for the parser gem.
       class Lexer
         # These tokens are always skipped
-<<<<<<< HEAD
-<<<<<<< HEAD
         TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF])
-=======
-        TYPES_ALWAYS_SKIP = %i[IGNORED_NEWLINE __END__ EOF].to_set
->>>>>>> ca9500a3fc (Optimize array inclusion checks in the parser translator)
-=======
-        TYPES_ALWAYS_SKIP = Set.new(%i[IGNORED_NEWLINE __END__ EOF])
->>>>>>> 422d5c4c64 (Use Set.new over to_set)
         private_constant :TYPES_ALWAYS_SKIP
 
         # The direct translating of types between the two lexers.
@@ -203,42 +195,18 @@ module Prism
         #
         # NOTE: In edge cases like `-> (foo = -> (bar) {}) do end`, please note that `kDO` is still returned
         # instead of `kDO_LAMBDA`, which is expected: https://github.com/ruby/prism/pull/3046
-<<<<<<< HEAD
-<<<<<<< HEAD
-        LAMBDA_TOKEN_TYPES = Set.new([:kDO_LAMBDA, :tLAMBDA, :tLAMBEG])
-=======
-        LAMBDA_TOKEN_TYPES = [:kDO_LAMBDA, :tLAMBDA, :tLAMBEG].to_set
->>>>>>> ca9500a3fc (Optimize array inclusion checks in the parser translator)
-=======
         LAMBDA_TOKEN_TYPES = Set.new([:kDO_LAMBDA, :tLAMBDA, :tLAMBEG])
->>>>>>> 422d5c4c64 (Use Set.new over to_set)
 
         # The `PARENTHESIS_LEFT` token in Prism is classified as either `tLPAREN` or `tLPAREN2` in the Parser gem.
         # The following token types are listed as those classified as `tLPAREN`.
         LPAREN_CONVERSION_TOKEN_TYPES = Set.new([
           :kBREAK, :kCASE, :tDIVIDE, :kFOR, :kIF, :kNEXT, :kRETURN, :kUNTIL, :kWHILE, :tAMPER, :tANDOP, :tBANG, :tCOMMA, :tDOT2, :tDOT3,
           :tEQL, :tLPAREN, :tLPAREN2, :tLPAREN_ARG, :tLSHFT, :tNL, :tOP_ASGN, :tOROP, :tPIPE, :tSEMI, :tSTRING_DBEG, :tUMINUS, :tUPLUS
-<<<<<<< HEAD
-<<<<<<< HEAD
-        ])
-
-        # Types of tokens that are allowed to continue a method call with comments in-between.
-        # For these, the parser gem doesn't emit a newline token after the last comment.
-        COMMENT_CONTINUATION_TYPES = Set.new([:COMMENT, :AMPERSAND_DOT, :DOT])
-=======
-        ].to_set
-
-        # Types of tokens that are allowed to continue a method call with comments in-between.
-        # For these, the parser gem doesn't emit a newline token after the last comment.
-        COMMENT_CONTINUATION_TYPES = [:COMMENT, :AMPERSAND_DOT, :DOT].to_set
->>>>>>> ca9500a3fc (Optimize array inclusion checks in the parser translator)
-=======
         ])
 
         # Types of tokens that are allowed to continue a method call with comments in-between.
         # For these, the parser gem doesn't emit a newline token after the last comment.
         COMMENT_CONTINUATION_TYPES = Set.new([:COMMENT, :AMPERSAND_DOT, :DOT])
->>>>>>> 422d5c4c64 (Use Set.new over to_set)
         private_constant :COMMENT_CONTINUATION_TYPES
 
         # Heredocs are complex and require us to keep track of a bit of info to refer to later
@@ -435,62 +403,8 @@ module Prism
               end
             when :tSTRING_CONTENT
               is_percent_array = percent_array?(quote_stack.last)
-<<<<<<< HEAD
-=======
 
               if (lines = token.value.lines).one?
-<<<<<<< HEAD
-<<<<<<< HEAD
-                # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
-                is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
-                # The parser gem only removes indentation when the heredoc is not nested
-                not_nested = heredoc_stack.size == 1
-                if is_percent_array
-                  value = percent_array_unescape(value)
-                elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
-                  value = trim_heredoc_whitespace(value, current_heredoc)
-                end
->>>>>>> bd3dd2b62a (Fix parser translator tokens for %-arrays with whitespace escapes)
-
-              if (lines = token.value.lines).one?
-                # Prism usually emits a single token for strings with line continuations.
-                # For squiggly heredocs they are not joined so we do that manually here.
-                current_string = +""
-                current_length = 0
-                start_offset = token.location.start_offset
-                while token.type == :STRING_CONTENT
-                  current_length += token.value.bytesize
-                  # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
-                  is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
-                  # The parser gem only removes indentation when the heredoc is not nested
-                  not_nested = heredoc_stack.size == 1
-                  if is_percent_array
-                    value = percent_array_unescape(token.value)
-                  elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
-                    value = trim_heredoc_whitespace(token.value, current_heredoc)
-                  end
-
-<<<<<<< HEAD
-=======
-                # Prism usually emits a single token for strings with line continuations.
-                # For squiggly heredocs they are not joined so we do that manually here.
-                current_string = +""
-                current_length = 0
-                start_offset = token.location.start_offset
-                while token.type == :STRING_CONTENT
-                  current_length += token.value.bytesize
-                  # Heredoc interpolation can have multiple STRING_CONTENT nodes on the same line.
-                  is_first_token_on_line = lexed[index - 1] && token.location.start_line != lexed[index - 2][0].location&.start_line
-                  # The parser gem only removes indentation when the heredoc is not nested
-                  not_nested = heredoc_stack.size == 1
-                  if is_percent_array
-                    value = percent_array_unescape(token.value)
-                  elsif is_first_token_on_line && not_nested && (current_heredoc = heredoc_stack.last).common_whitespace > 0
-                    value = trim_heredoc_whitespace(token.value, current_heredoc)
-                  end
-
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
                 # Prism usually emits a single token for strings with line continuations.
                 # For squiggly heredocs they are not joined so we do that manually here.
                 current_string = +""
@@ -508,7 +422,6 @@ module Prism
                     value = trim_heredoc_whitespace(token.value, current_heredoc)
                   end
 
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
                   current_string << unescape_string(value, quote_stack.last)
                   if (backslash_count = token.value[/(\\{1,})\n/, 1]&.length).nil? || backslash_count.even? || !interpolation?(quote_stack.last)
                     tokens << [:tSTRING_CONTENT, [current_string, range(start_offset, start_offset + current_length)]]
@@ -553,11 +466,6 @@ module Prism
                     current_line = +""
                     adjustment = 0
                   end
-=======
-                  end_offset = start_offset + adjusted_line.bytesize + adjustment
-                  tokens << [:tSTRING_CONTENT, [adjusted_line, Range.new(source_buffer, offset_cache[start_offset], offset_cache[end_offset])]]
-                  start_offset = end_offset
->>>>>>> a651126458 (Fix an incompatibility with the parser translator)
                 end
               end
               next
@@ -786,44 +694,7 @@ module Prism
             while (skipped = scanner.skip_until(/\\/))
               # Append what was just skipped over, excluding the found backslash.
               result.append_as_bytes(string.byteslice(scanner.pos - skipped, skipped - 1))
-<<<<<<< HEAD
-<<<<<<< HEAD
-              escape_read(result, scanner, false, false)
-=======
-
-              if scanner.peek(1) == "\n"
-                # Line continuation
-                scanner.pos += 1
-              elsif (replacement = ESCAPES[scanner.peek(1)])
-                # Simple single-character escape sequences like \n
-                result.append_as_bytes(replacement)
-                scanner.pos += 1
-              elsif (octal = scanner.check(/[0-7]{1,3}/))
-                # \nnn
-                result.append_as_bytes(octal.to_i(8).chr)
-                scanner.pos += octal.bytesize
-              elsif (hex = scanner.check(/x([0-9a-fA-F]{1,2})/))
-                # \xnn
-                result.append_as_bytes(hex[1..].to_i(16).chr)
-                scanner.pos += hex.bytesize
-              elsif (unicode = scanner.check(/u([0-9a-fA-F]{4})/))
-                # \unnnn
-                result.append_as_bytes(unicode[1..].hex.chr(Encoding::UTF_8))
-                scanner.pos += unicode.bytesize
-              elsif scanner.peek(3) == "u{}"
-                # https://github.com/whitequark/parser/issues/856
-                scanner.pos += 3
-              elsif (unicode_parts = scanner.check(/u{.*}/))
-                # \u{nnnn ...}
-                unicode_parts[2..-2].split.each do |unicode|
-                  result.append_as_bytes(unicode.hex.chr(Encoding::UTF_8))
-                end
-                scanner.pos += unicode_parts.bytesize
-              end
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
               escape_read(result, scanner, false, false)
->>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation)
             end
 
             # Add remaining chars
@@ -835,13 +706,6 @@ module Prism
           end
         end
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
         # Certain strings are merged into a single string token.
         def simplify_string?(value, quote)
           case quote
@@ -859,24 +723,11 @@ module Prism
           end
         end
 
-<<<<<<< HEAD
-<<<<<<< HEAD
-<<<<<<< HEAD
-=======
->>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation)
         # Escape a byte value, given the control and meta flags.
         def escape_build(value, control, meta)
           value &= 0x9f if control
           value |= 0x80 if meta
-<<<<<<< HEAD
-<<<<<<< HEAD
-          value
-=======
-          value.chr
->>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation)
-=======
           value
->>>>>>> 161c606b1f (Fix parser translator crash for certain octal escapes)
         end
 
         # Read an escape out of the string scanner, given the control and meta
@@ -920,15 +771,6 @@ module Prism
           end
         end
 
-<<<<<<< HEAD
-=======
->>>>>>> bd3dd2b62a (Fix parser translator tokens for %-arrays with whitespace escapes)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
->>>>>>> 09c59a3aa5 (Handle control and meta escapes in parser translation)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
         # In a percent array, certain whitespace can be preceeded with a backslash,
         # causing the following characters to be part of the previous element.
         def percent_array_unescape(string)
@@ -953,17 +795,6 @@ module Prism
         # Determine if characters preceeded by a backslash should be escaped or not
         def interpolation?(quote)
           !quote.end_with?("'") && !quote.start_with?("%q", "%w", "%i", "%s")
-<<<<<<< HEAD
-<<<<<<< HEAD
-        end
-
-        # Regexp allow interpolation but are handled differently during unescaping
-        def regexp?(quote)
-          quote == "/" || quote.start_with?("%r")
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
-=======
->>>>>>> 4edfe9d981 (Further refine string handling in the parser translator)
         end
 
         # Regexp allow interpolation but are handled differently during unescaping
author	Kevin Newton <[email protected]>	2025-03-18 13:02:56 -0400
committer	Kevin Newton <[email protected]>	2025-03-18 13:36:53 -0400
commit	b003d4019421e004460a8c947db2695d71ad0b8c (patch)
tree	387fa0cc390aaa90f008c4f95069fd11b28429e3 /lib/prism/translation/parser
parent	33aaa069a4e7b405e6d7ec5fcbf04a487e36e345 (diff)