[ruby/prism] Fix lex_compat for `<<HEREDOC # comment` at EOF

Fixes https://github.com/ruby/prism/pull/1874 https://github.com/ruby/prism/commit/304dd78dd2
author: Martin Emde <[email protected]> 2023-11-29 20:02:43 -0800
committer: git <[email protected]> 2023-11-30 14:10:04 +0000
commit: aac8be803409a18f6c32b438d154432eeb6f49e8 (patch)
tree: 9543753bc0eaf5edd85bae3f004c484f33e73f98
parent: 1802d14ca8924bd67e0915c5ad9f1fad5dba0602 (diff)
3 files changed, 36 insertions, 8 deletions
diff --git a/lib/prism/lex_compat.rb b/lib/prism/lex_compat.rb
index 66be275bcd..0336f48d6d 100644
--- a/lib/prism/lex_compat.rb
+++ b/lib/prism/lex_compat.rb
@@ -729,16 +729,31 @@ module Prism
             # comment and there is still whitespace after the comment, then
             # Ripper will append a on_nl token (even though there isn't
             # necessarily a newline). We mirror that here.
-            start_offset = previous_token.location.end_offset
-            end_offset = token.location.start_offset
-
-            if previous_token.type == :COMMENT && start_offset < end_offset
-              if bom
-                start_offset += 3
-                end_offset += 3
+            if previous_token.type == :COMMENT
+              # If the token before the comment was a heredoc end, then
+              # the comment's end_offset is before the heredoc end token.
+              # This is not the correct offset to use for figuring out if
+              # there is trailing whitespace after the comment.
+              # Use the end_offset of the heredoc end instead.
+              before_comment = result_value[index - 2]
+              before_comment &&= before_comment[0]
+
+              if before_comment&.type == :HEREDOC_END
+                start_offset = before_comment.location.end_offset
+              else
+                start_offset = previous_token.location.end_offset
               end
 
-              tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
+              end_offset = token.location.start_offset
+
+              if start_offset < end_offset
+                if bom
+                  start_offset += 3
+                  end_offset += 3
+                end
+
+                tokens << Token.new([[lineno, 0], :on_nl, source.byteslice(start_offset...end_offset), lex_state])
+              end
             end
 
             Token.new([[lineno, column], event, value, lex_state])
diff --git a/test/prism/fixtures/heredoc_with_comment.txt b/test/prism/fixtures/heredoc_with_comment.txt
new file mode 100644
index 0000000000..cf48c12051
--- /dev/null
+++ b/test/prism/fixtures/heredoc_with_comment.txt
@@ -0,0 +1,2 @@
+<<-TARGET # comment
+TARGET
+\ No newline at end of file
diff --git a/test/prism/snapshots/heredoc_with_comment_at_start.txt b/test/prism/snapshots/heredoc_with_comment_at_start.txt
new file mode 100644
index 0000000000..d9dfa8541e
--- /dev/null
+++ b/test/prism/snapshots/heredoc_with_comment_at_start.txt
@@ -0,0 +1,11 @@
+@ ProgramNode (location: (1,0)-(1,9))
+├── locals: []
+└── statements:
+    @ StatementsNode (location: (1,0)-(1,9))
+    └── body: (length: 1)
+        └── @ StringNode (location: (1,0)-(1,9))
+            ├── flags: ∅
+            ├── opening_loc: (1,0)-(1,9) = "<<-TARGET"
+            ├── content_loc: (2,0)-(3,0) = "  data\r\n"
+            ├── closing_loc: (3,0)-(4,0) = "TARGET\r\n"
+            └── unescaped: "  data\r\n"
author	Martin Emde <[email protected]>	2023-11-29 20:02:43 -0800
committer	git <[email protected]>	2023-11-30 14:10:04 +0000
commit	aac8be803409a18f6c32b438d154432eeb6f49e8 (patch)
tree	9543753bc0eaf5edd85bae3f004c484f33e73f98
parent	1802d14ca8924bd67e0915c5ad9f1fad5dba0602 (diff)