[ruby/prism] Add some code samples

https://github.com/ruby/prism/commit/f5c883af56
author: Kevin Newton <[email protected]> 2024-06-06 10:04:23 -0400
committer: git <[email protected]> 2024-06-06 14:15:55 +0000
commit: 78d7b470ec100495a8879ebf319d627f78bf60c2 (patch)
tree: 18112bb9f332f0c7bd764e4c49e129ca05830cfe
parent: b0059980d04c2395589f3da4a9babaeb45dd4429 (diff)
5 files changed, 309 insertions, 89 deletions
diff --git a/sample/find_calls.rb b/sample/find_calls.rb
new file mode 100644
index 0000000000..30af56c719
--- /dev/null
+++ b/sample/find_calls.rb
@@ -0,0 +1,105 @@
+# This script finds calls to a specific method with a certain keyword parameter
+# within a given source file.
+
+require "prism"
+require "pp"
+
+# For deprecation or refactoring purposes, it's often useful to find all of the
+# places that call a specific method with a specific k  eyword parameter. This is
+# easily accomplished with a visitor such as this one.
+class QuxParameterVisitor < Prism::Visitor
+  def initialize(calls)
+    @calls = calls
+  end
+
+  def visit_call_node(node)
+    @calls << node if qux?(node)
+    super
+  end
+
+  private
+
+  def qux?(node)
+    # All nodes implement pattern matching, so you can use the `in` operator to
+    # pull out all of their individual fields. As you can see by this extensive
+    # pattern match, this is quite a powerful feature.
+    node in {
+      # This checks that the receiver is the constant Qux or the constant path
+      # ::Qux. We are assuming relative constants are fine in this case.
+      receiver: (
+        Prism::ConstantReadNode[name: :Qux] |
+        Prism::ConstantPathNode[parent: nil, name: :Qux]
+      ),
+      # This checks that the name of the method is qux. We purposefully are not
+      # checking the call operator (., ::, or &.) because we want all of them.
+      # In other ASTs, this would be multiple node types, but prism combines
+      # them all into one for convenience.
+      name: :qux,
+      arguments: Prism::ArgumentsNode[
+        # Here we're going to use the "find" pattern to find the keyword hash
+        # node that has the correct key.
+        arguments: [
+          *,
+          Prism::KeywordHashNode[
+            # Here we'll use another "find" pattern to find the key that we are
+            # specifically looking for.
+            elements: [
+              *,
+              # Finally, we can assert against the key itself. Note that we are
+              # not looking at the value of hash pair, because we are only
+              # specifically looking for a key.
+              Prism::AssocNode[key: Prism::SymbolNode[unescaped: "qux"]],
+              *
+            ]
+          ],
+          *
+        ]
+      ]
+    }
+  end
+end
+
+calls = []
+Prism.parse_stream(DATA).value.accept(QuxParameterVisitor.new(calls))
+
+calls.each do |call|
+  print "CallNode "
+  puts PP.pp(call.location, +"")
+  print "  "
+  puts call.slice
+end
+
+# =>
+# CallNode (5,6)-(5,29)
+#   Qux.qux(222, qux: true)
+# CallNode (9,6)-(9,30)
+#   Qux&.qux(333, qux: true)
+# CallNode (20,6)-(20,51)
+#   Qux::qux(888, qux: ::Qux.qux(999, qux: true))
+# CallNode (20,25)-(20,50)
+#   ::Qux.qux(999, qux: true)
+
+__END__
+module Foo
+  class Bar
+    def baz1
+      Qux.qux(111)
+      Qux.qux(222, qux: true)
+    end
+
+    def baz2
+      Qux&.qux(333, qux: true)
+      Qux&.qux(444)
+    end
+
+    def baz3
+      qux(555, qux: false)
+      666.qux(666)
+    end
+
+    def baz4
+      Qux::qux(777)
+      Qux::qux(888, qux: ::Qux.qux(999, qux: true))
+    end
+  end
+end
diff --git a/sample/find_comments.rb b/sample/find_comments.rb
index 2468444210..6a26cd32b7 100644
--- a/sample/find_comments.rb
+++ b/sample/find_comments.rb
@@ -1,22 +1,100 @@
-# This script finds all of the comments within a given source file.
+# This script finds all of the comments within a given source file for a method.
 
 require "prism"
 
-Prism.parse_comments(DATA.read).each do |comment|
+class FindMethodComments < Prism::Visitor
+  def initialize(target, comments, nesting = [])
+    @target = target
+    @comments = comments
+    @nesting = nesting
+  end
+
+  # These visit methods are specific to each class. Defining a visitor allows
+  # you to group functionality that applies to all node types into a single
+  # class. You can find which method corresponds to which node type by looking
+  # at the class name, calling #type on the node, or by looking at the #accept
+  # method definition on the node.
+  def visit_module_node(node)
+    visitor = FindMethodComments.new(@target, @comments, [*@nesting, node.name])
+    node.compact_child_nodes.each { |child| child.accept(visitor) }
+  end
+
+  def visit_class_node(node)
+    # We could keep track of an internal state where we push the class name here
+    # and then pop it after the visit is complete. However, it is often simpler
+    # and cleaner to generate a new visitor instance when the state changes,
+    # because then the state is immutable and it's easier to reason about. This
+    # also provides for more debugging opportunity in the initializer.
+    visitor = FindMethodComments.new(@target, @comments, [*@nesting, node.name])
+    node.compact_child_nodes.each { |child| child.accept(visitor) }
+  end
+
+  def visit_def_node(node)
+    if [*@nesting, node.name] == @target
+      # Comments are always attached to locations (either inner locations on a
+      # node like the location of a keyword or the location on the node itself).
+      # Nodes are considered either "leading" or "trailing", which means that
+      # they occur before or after the location, respectively. In this case of
+      # documentation, we only want to consider leading comments. You can also
+      # fetch all of the comments on a location with #comments.
+      @comments.concat(node.location.leading_comments)
+    else
+      super
+    end
+  end
+end
+
+# Most of the time, the concept of "finding" something in the AST can be
+# accomplished either with a queue or with a visitor. In this case we will use a
+# visitor, but a queue would work just as well.
+def find_comments(result, path)
+  target = path.split(/::|#/).map(&:to_sym)
+  comments = []
+
+  result.value.accept(FindMethodComments.new(target, comments))
+  comments
+end
+
+result = Prism.parse_stream(DATA)
+result.attach_comments!
+
+find_comments(result, "Foo#foo").each do |comment|
+  puts comment.inspect
+  puts comment.slice
+end
+
+# =>
+# #<Prism::InlineComment @location=#<Prism::Location @start_offset=205 @length=27 start_line=13>>
+# # This is the documentation
+# #<Prism::InlineComment @location=#<Prism::Location @start_offset=235 @length=21 start_line=14>>
+# # for the foo method.
+
+find_comments(result, "Foo::Bar#bar").each do |comment|
   puts comment.inspect
   puts comment.slice
 end
 
 # =>
-# #<Prism::InlineComment @location=#<Prism::Location @start_offset=0 @length=42 start_line=1>>
-# # This is documentation for the Foo class.
-# #<Prism::InlineComment @location=#<Prism::Location @start_offset=55 @length=43 start_line=3>>
-# # This is documentation for the bar method.
+# #<Prism::InlineComment @location=#<Prism::Location @start_offset=126 @length=23 start_line=7>>
+# # This is documentation
+# #<Prism::InlineComment @location=#<Prism::Location @start_offset=154 @length=21 start_line=8>>
+# # for the bar method.
 
 __END__
-# This is documentation for the Foo class.
-class Foo
-  # This is documentation for the bar method.
-  def bar
+# This is the documentation
+# for the Foo module.
+module Foo
+  # This is documentation
+  # for the Bar class.
+  class Bar
+    # This is documentation
+    # for the bar method.
+    def bar
+    end
+  end
+
+  # This is the documentation
+  # for the foo method.
+  def foo
   end
 end
diff --git a/sample/find_nodes.rb b/sample/find_nodes.rb
deleted file mode 100644
index 3c96756780..0000000000
--- a/sample/find_nodes.rb
+++ /dev/null
@@ -1,41 +0,0 @@
-# This script finds all of the nodes of a specific type within a given source
-# file. It uses the visitor class to traverse the AST.
-
-require "prism"
-
-class RegexpVisitor < Prism::Visitor
-  def initialize(regexps)
-    @regexps = regexps
-  end
-
-  def visit_regular_expression_node(node)
-    @regexps << node
-    super
-  end
-end
-
-result = Prism.parse_stream(DATA)
-regexps = []
-
-result.value.accept(RegexpVisitor.new(regexps))
-puts regexps.map(&:inspect)
-
-# =>
-# @ RegularExpressionNode (location: (2,9)-(2,14))
-# ├── flags: forced_us_ascii_encoding
-# ├── opening_loc: (2,9)-(2,10) = "/"
-# ├── content_loc: (2,10)-(2,13) = "foo"
-# ├── closing_loc: (2,13)-(2,14) = "/"
-# └── unescaped: "foo"
-# @ RegularExpressionNode (location: (3,9)-(3,14))
-# ├── flags: forced_us_ascii_encoding
-# ├── opening_loc: (3,9)-(3,10) = "/"
-# ├── content_loc: (3,10)-(3,13) = "bar"
-# ├── closing_loc: (3,13)-(3,14) = "/"
-# └── unescaped: "bar"
-
-__END__
-class Foo
-  REG1 = /foo/
-  REG2 = /bar/
-end
diff --git a/sample/locate_nodes.rb b/sample/locate_nodes.rb
index 83f67cab0e..7a51db4367 100644
--- a/sample/locate_nodes.rb
+++ b/sample/locate_nodes.rb
@@ -1,63 +1,78 @@
 # This script locates a set of nodes determined by a line and column (in bytes).
 
 require "prism"
+require "pp"
 
+# This method determines if the given location covers the given line and column.
+# It's important to note that columns (and offsets) in prism are always in
+# bytes. This is because prism supports all 90 source encodings that Ruby
+# supports. You can always retrieve the column (or offset) of a location in
+# other units with other provided APIs, like #start_character_column or
+# #start_code_units_column.
+def covers?(location, line:, column:)
+  start_line = location.start_line
+  end_line = location.end_line
+
+  if start_line == end_line
+    # If the location only spans one line, then we only check if the line
+    # matches and that the column is covered by the column range.
+    line == start_line && (location.start_column...location.end_column).cover?(column)
+  else
+    # Otherwise, we check that it is on the start line and the column is greater
+    # than or equal to the start column, or that it is on the end line and the
+    # column is less than the end column, or that it is between the start and
+    # end lines.
+    (line == start_line && column >= location.start_column) ||
+      (line == end_line && column < location.end_column) ||
+      (line > start_line && line < end_line)
+  end
+end
+
+# This method descends down into the AST whose root is `node` and returns the
+# array of all of the nodes that cover the given line and column.
 def locate(node, line:, column:)
   queue = [node]
   result = []
 
+  # We could use a recursive method here instead if we wanted, but it's
+  # important to note that that will not work for ASTs that are nested deeply
+  # enough to cause a stack overflow.
   while (node = queue.shift)
-    # Each node that we visit should be added to the result, so that we end up
-    # with an array of the nodes that we traversed.
     result << node
 
-    # Iterate over each child node.
-    node.compact_child_nodes.each do |child_node|
-      child_location = child_node.location
-
-      start_line = child_location.start_line
-      end_line = child_location.end_line
-
-      # Here we determine if the given coordinates are contained within the
-      # child node's location.
-      if start_line == end_line
-        if line == start_line && column >= child_location.start_column && column < child_location.end_column
-          queue << child_node
-          break
-        end
-      elsif (line == start_line && column >= child_location.start_column) || (line == end_line && column < child_location.end_column)
-        queue << child_node
-        break
-      elsif line > start_line && line < end_line
-        queue << child_node
-        break
-      end
+    # Nodes have `child_nodes` and `compact_child_nodes`. `child_nodes` have
+    # consistent indices but include `nil` for optional fields that are not
+    # present, whereas `compact_child_nodes` has inconsistent indices but does
+    # not include `nil` for optional fields that are not present.
+    node.compact_child_nodes.find do |child|
+      queue << child if covers?(child.location, line: line, column: column)
     end
   end
 
-  # Finally, we return the result.
   result
 end
 
 result = Prism.parse_stream(DATA)
 locate(result.value, line: 4, column: 14).each_with_index do |node, index|
-  location = node.location
-  puts "#{" " * index}#{node.type}@#{location.start_line}:#{location.start_column}-#{location.end_line}:#{location.end_column}"
+  print " " * index
+  print node.class.name.split("::", 2).last
+  print " "
+  puts PP.pp(node.location, +"")
 end
 
 # =>
-# program_node@1:0-7:3
-#  statements_node@1:0-7:3
-#   module_node@1:0-7:3
-#    statements_node@2:2-6:5
-#     class_node@2:2-6:5
-#      statements_node@3:4-5:7
-#       def_node@3:4-5:7
-#        statements_node@4:6-4:21
-#         call_node@4:6-4:21
-#          call_node@4:6-4:15
-#           arguments_node@4:12-4:15
-#            integer_node@4:12-4:15
+# ProgramNode (1,0)-(7,3)
+#  StatementsNode (1,0)-(7,3)
+#   ModuleNode (1,0)-(7,3)
+#    StatementsNode (2,2)-(6,5)
+#     ClassNode (2,2)-(6,5)
+#      StatementsNode (3,4)-(5,7)
+#       DefNode (3,4)-(5,7)
+#        StatementsNode (4,6)-(4,21)
+#         CallNode (4,6)-(4,21)
+#          CallNode (4,6)-(4,15)
+#           ArgumentsNode (4,12)-(4,15)
+#            IntegerNode (4,12)-(4,15)
 
 __END__
 module Foo
diff --git a/sample/visit_nodes.rb b/sample/visit_nodes.rb
new file mode 100644
index 0000000000..5ba703b0a3
--- /dev/null
+++ b/sample/visit_nodes.rb
@@ -0,0 +1,63 @@
+# This script visits all of the nodes of a specific type within a given source
+# file. It uses the visitor class to traverse the AST.
+
+require "prism"
+require "pp"
+
+class CaseInsensitiveRegularExpressionVisitor < Prism::Visitor
+  def initialize(regexps)
+    @regexps = regexps
+  end
+
+  # As the visitor is walking the tree, this method will only be called when it
+  # encounters a regular expression node. We can then call any regular
+  # expression -specific APIs. In this case, we are only interested in the
+  # regular expressions that are case-insensitive, which we can retrieve with
+  # the #ignore_case? method.
+  def visit_regular_expression_node(node)
+    @regexps << node if node.ignore_case?
+    super
+  end
+
+  def visit_interpolated_regular_expression_node(node)
+    @regexps << node if node.ignore_case?
+
+    # The default behavior of the visitor is to continue visiting the children
+    # of the node. Because Ruby is so dynamic, it's actually possible for
+    # another regular expression to be interpolated in statements contained
+    # within the #{} contained in this interpolated regular expression node. By
+    # calling `super`, we ensure the visitor will continue. Failing to call
+    # `super` will cause the visitor to stop the traversal of the tree, which
+    # can also be useful in some cases.
+    super
+  end
+end
+
+result = Prism.parse_stream(DATA)
+regexps = []
+
+result.value.accept(CaseInsensitiveRegularExpressionVisitor.new(regexps))
+regexps.each do |node|
+  print node.class.name.split("::", 2).last
+  print " "
+  puts PP.pp(node.location, +"")
+
+  if node.is_a?(Prism::RegularExpressionNode)
+    print "  "
+    p node.unescaped
+  end
+end
+
+# =>
+# InterpolatedRegularExpressionNode (3,9)-(3,47)
+# RegularExpressionNode (3,16)-(3,22)
+#   "bar"
+# RegularExpressionNode (4,9)-(4,15)
+#   "bar"
+
+__END__
+class Foo
+  REG1 = /foo/
+  REG2 = /foo #{/bar/i =~ "" ? "bar" : "baz"}/i
+  REG3 = /bar/i
+end
author	Kevin Newton <[email protected]>	2024-06-06 10:04:23 -0400
committer	git <[email protected]>	2024-06-06 14:15:55 +0000
commit	78d7b470ec100495a8879ebf319d627f78bf60c2 (patch)
tree	18112bb9f332f0c7bd764e4c49e129ca05830cfe
parent	b0059980d04c2395589f3da4a9babaeb45dd4429 (diff)