diff options
author | Kevin Newton <[email protected]> | 2024-10-16 14:22:26 -0400 |
---|---|---|
committer | git <[email protected]> | 2024-10-16 18:22:35 +0000 |
commit | 9251971335ded7d1eb196f79648fdf12f1208954 (patch) | |
tree | e76d1903e388c8af4826a00f829d0c238a075098 | |
parent | 3affd43c2de8cf556b2f143e577c0a3ccf2acc31 (diff) |
[ruby/prism] Add a sample for multiplexing constants
https://github.com/ruby/prism/commit/e265dc5862
-rw-r--r-- | sample/prism/multiplex_constants.rb | 138 |
1 files changed, 138 insertions, 0 deletions
diff --git a/sample/prism/multiplex_constants.rb b/sample/prism/multiplex_constants.rb new file mode 100644 index 0000000000..5234292dfa --- /dev/null +++ b/sample/prism/multiplex_constants.rb @@ -0,0 +1,138 @@ +# This script indexes the classes and modules within a set of files using the +# saved source functionality. + +require "prism" +require "etc" +require "tempfile" + +module Indexer + # A class that implements the #enter functionality so that it can be passed to + # the various save* APIs. This effectively bundles up all of the node_id and + # field_name pairs so that they can be written back to the parent process. + class Repository + attr_reader :scope, :entries + + def initialize + @scope = [] + @entries = [] + end + + def with(next_scope) + previous_scope = scope + @scope = scope + next_scope + yield + @scope = previous_scope + end + + def empty? + entries.empty? + end + + def enter(node_id, field_name) + entries << [scope.join("::"), node_id, field_name] + end + end + + # Visit the classes and modules in the AST and save their locations into the + # repository. + class Visitor < Prism::Visitor + attr_reader :repository + + def initialize(repository) + @repository = repository + end + + def visit_class_node(node) + repository.with(node.constant_path.full_name_parts) do + node.constant_path.save_location(repository) + visit(node.body) + end + end + + def visit_module_node(node) + repository.with(node.constant_path.full_name_parts) do + node.constant_path.save_location(repository) + visit(node.body) + end + end + end + + # Index the classes and modules within a file. If there are any entries, + # return them as a serialized string to the parent process. + def self.index(filepath) + repository = Repository.new + Prism.parse_file(filepath).value.accept(Visitor.new(repository)) + "#{filepath}|#{repository.entries.join("|")}" unless repository.empty? + end +end + +def index_glob(glob, count = Etc.nprocessors - 1) + process_ids = [] + filepath_writers = [] + index_reader, index_writer = IO.pipe + + # For each number in count, fork off a worker that has access to two pipes. + # The first pipe is the index_writer, to which it writes all of the results of + # indexing the various files. The second pipe is the filepath_reader, from + # which it reads the filepaths that it needs to index. + count.times do + filepath_reader, filepath_writer = IO.pipe + + process_ids << fork do + filepath_writer.close + index_reader.close + + while (filepath = filepath_reader.gets(chomp: true)) + results = Indexer.index(filepath) + index_writer.puts(results) if results + end + end + + filepath_reader.close + filepath_writers << filepath_writer + end + + index_writer.close + + # In a separate thread, write all of the filepaths to the various worker + # processes. This is done in a separate threads since puts will eventually + # block when each of the pipe buffers fills up. We write in a round-robin + # fashion to the various workers. This could be improved using a work-stealing + # algorithm, but is fine if you don't end up having a ton of variety in the + # size of your files. + writer_thread = + Thread.new do + Dir[glob].each_with_index do |filepath, index| + filepath_writers[index % count].puts(filepath) + end + end + + index = Hash.new { |hash, key| hash[key] = [] } + + # In a separate thread, read all of the results from the various worker + # processes and store them in the index. This is done in a separate thread so + # that reads and writes can be interleaved. This is important so that the + # index pipe doesn't fill up and block the writer. + reader_thread = + Thread.new do + while (line = index_reader.gets(chomp: true)) + filepath, *entries = line.split("|") + repository = Prism::Relocation.filepath(filepath).filepath.lines.code_unit_columns(Encoding::UTF_16LE).leading_comments + + entries.each_slice(3) do |(name, node_id, field_name)| + index[name] << repository.enter(Integer(node_id), field_name.to_sym) + end + end + end + + writer_thread.join + filepath_writers.each(&:close) + + reader_thread.join + index_reader.close + + process_ids.each { |process_id| Process.wait(process_id) } + index +end + +index_glob(File.expand_path("../../lib/**/*.rb", __dir__)) |