diff options
Diffstat (limited to 'lib/prism/translation/parser.rb')
-rw-r--r-- | lib/prism/translation/parser.rb | 136 |
1 files changed, 136 insertions, 0 deletions
diff --git a/lib/prism/translation/parser.rb b/lib/prism/translation/parser.rb new file mode 100644 index 0000000000..7cc18ac5de --- /dev/null +++ b/lib/prism/translation/parser.rb @@ -0,0 +1,136 @@ +# frozen_string_literal: true + +require "parser" + +module Prism + module Translation + # This class is the entry-point for converting a prism syntax tree into the + # whitequark/parser gem's syntax tree. It inherits from the base parser for + # the parser gem, and overrides the parse* methods to parse with prism and + # then translate. + class Parser < ::Parser::Base + Racc_debug_parser = false # :nodoc: + + def version # :nodoc: + 33 + end + + # The default encoding for Ruby files is UTF-8. + def default_encoding + Encoding::UTF_8 + end + + def yyerror # :nodoc: + end + + # Parses a source buffer and returns the AST. + def parse(source_buffer) + @source_buffer = source_buffer + source = source_buffer.source + + build_ast( + Prism.parse(source, filepath: source_buffer.name).value, + build_offset_cache(source) + ) + ensure + @source_buffer = nil + end + + # Parses a source buffer and returns the AST and the source code comments. + def parse_with_comments(source_buffer) + @source_buffer = source_buffer + source = source_buffer.source + + offset_cache = build_offset_cache(source) + result = Prism.parse(source, filepath: source_buffer.name) + + [ + build_ast(result.value, offset_cache), + build_comments(result.comments, offset_cache) + ] + ensure + @source_buffer = nil + end + + # Parses a source buffer and returns the AST, the source code comments, + # and the tokens emitted by the lexer. + def tokenize(source_buffer, _recover = false) + @source_buffer = source_buffer + source = source_buffer.source + + offset_cache = build_offset_cache(source) + result = Prism.parse_lex(source, filepath: source_buffer.name) + program, tokens = result.value + + [ + build_ast(program, offset_cache), + build_comments(result.comments, offset_cache), + build_tokens(tokens, offset_cache) + ] + ensure + @source_buffer = nil + end + + # Since prism resolves num params for us, we don't need to support this + # kind of logic here. + def try_declare_numparam(node) + node.children[0].match?(/\A_[1-9]\z/) + end + + private + + # Prism deals with offsets in bytes, while the parser gem deals with + # offsets in characters. We need to handle this conversion in order to + # build the parser gem AST. + # + # If the bytesize of the source is the same as the length, then we can + # just use the offset directly. Otherwise, we build a hash that functions + # as a cache for the conversion. + # + # This is a good opportunity for some optimizations. If the source file + # has any multi-byte characters, this can tank the performance of the + # translator. We could make this significantly faster by using a + # different data structure for the cache. + def build_offset_cache(source) + if source.bytesize == source.length + -> (offset) { offset } + else + Hash.new do |hash, offset| + hash[offset] = source.byteslice(0, offset).length + end + end + end + + # Build the parser gem AST from the prism AST. + def build_ast(program, offset_cache) + program.accept(Compiler.new(self, offset_cache)) + end + + # Build the parser gem comments from the prism comments. + def build_comments(comments, offset_cache) + comments.map do |comment| + location = comment.location + + ::Parser::Source::Comment.new( + ::Parser::Source::Range.new( + source_buffer, + offset_cache[location.start_offset], + offset_cache[location.end_offset] + ) + ) + end + end + + # Build the parser gem tokens from the prism tokens. + def build_tokens(tokens, offset_cache) + Lexer.new(source_buffer, tokens.map(&:first), offset_cache).to_a + end + + require_relative "parser/compiler" + require_relative "parser/lexer" + + private_constant :Compiler + private_constant :Lexer + end + end +end |