From 3b662a92bc0f37952a283a24fa05a40916600a8f Mon Sep 17 00:00:00 2001 From: Vinicius Stock Date: Thu, 10 Nov 2022 16:30:23 -0500 Subject: [PATCH 001/104] Allow formatting code with a different base level of indentation Being able to override the base level of indentation allows us to format parts of a document that may be nested. --- lib/syntax_tree.rb | 7 ++++++- lib/syntax_tree/formatter.rb | 4 ++-- test/formatting_test.rb | 32 ++++++++++++++++++++++++++++++++ 3 files changed, 40 insertions(+), 3 deletions(-) diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 418468a9..bdb4a931 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -44,6 +44,10 @@ module SyntaxTree # It shouldn't really be changed except in very niche circumstances. DEFAULT_RUBY_VERSION = Formatter::SemanticVersion.new(RUBY_VERSION).freeze + # The default indentation level for formatting. We allow changing this so + # that Syntax Tree can format arbitrary parts of a document. + DEFAULT_INDENTATION = 0 + # This is a hook provided so that plugins can register themselves as the # handler for a particular file type. def self.register_handler(extension, handler) @@ -61,12 +65,13 @@ def self.parse(source) def self.format( source, maxwidth = DEFAULT_PRINT_WIDTH, + base_indentation = DEFAULT_INDENTATION, options: Formatter::Options.new ) formatter = Formatter.new(source, [], maxwidth, options: options) parse(source).format(formatter) - formatter.flush + formatter.flush(base_indentation) formatter.output.join end diff --git a/lib/syntax_tree/formatter.rb b/lib/syntax_tree/formatter.rb index d5d251c6..fddc06fe 100644 --- a/lib/syntax_tree/formatter.rb +++ b/lib/syntax_tree/formatter.rb @@ -84,10 +84,10 @@ def initialize(source, *args, options: Options.new) @target_ruby_version = options.target_ruby_version end - def self.format(source, node) + def self.format(source, node, base_indentation = 0) q = new(source, []) q.format(node) - q.flush + q.flush(base_indentation) q.output.join end diff --git a/test/formatting_test.rb b/test/formatting_test.rb index eff7ef71..37ca29e1 100644 --- a/test/formatting_test.rb +++ b/test/formatting_test.rb @@ -27,5 +27,37 @@ def test_stree_ignore assert_equal(source, SyntaxTree.format(source)) end + + def test_formatting_with_different_indentation_level + source = <<~SOURCE + def foo + puts "a" + end + SOURCE + + # Default indentation + assert_equal(source, SyntaxTree.format(source)) + + # Level 2 + assert_equal(<<-EXPECTED.chomp, SyntaxTree.format(source, 80, 2).rstrip) + def foo + puts "a" + end + EXPECTED + + # Level 4 + assert_equal(<<-EXPECTED.chomp, SyntaxTree.format(source, 80, 4).rstrip) + def foo + puts "a" + end + EXPECTED + + # Level 6 + assert_equal(<<-EXPECTED.chomp, SyntaxTree.format(source, 80, 6).rstrip) + def foo + puts "a" + end + EXPECTED + end end end From d76043bcb8952a05f1167dc822efd2a74810a099 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 11 Nov 2022 15:39:29 -0500 Subject: [PATCH 002/104] Anonymous kwargs --- CHANGELOG.md | 4 ++++ lib/syntax_tree/node.rb | 4 ++-- lib/syntax_tree/parser.rb | 2 +- test/fixtures/assoc_splat.rb | 4 ++++ 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e320cd82..20808e3b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ## [Unreleased] +### Changed + +- Support forwarding anonymous keyword arguments with `**`. + ## [5.0.1] - 2022-11-10 ### Changed diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index f32789a3..53fb3905 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -1604,7 +1604,7 @@ def format_contents(q) # { **pairs } # class AssocSplat < Node - # [untyped] the expression that is being splatted + # [nil | untyped] the expression that is being splatted attr_reader :value # [Array[ Comment | EmbDoc ]] the comments attached to this node @@ -1643,7 +1643,7 @@ def deconstruct_keys(_keys) def format(q) q.text("**") - q.format(value) + q.format(value) if value end def ===(other) diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 23a3196c..5b093a87 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -744,7 +744,7 @@ def on_assoc_splat(value) AssocSplat.new( value: value, - location: operator.location.to(value.location) + location: operator.location.to((value || operator).location) ) end diff --git a/test/fixtures/assoc_splat.rb b/test/fixtures/assoc_splat.rb index 2182c2ed..8b595ce9 100644 --- a/test/fixtures/assoc_splat.rb +++ b/test/fixtures/assoc_splat.rb @@ -12,3 +12,7 @@ } - { **foo } +% # >= 3.2.0 +def foo(**) + bar(**) +end From b0b47198baf43d56201cb15341446a9f0674f0cb Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 14 Nov 2022 17:08:50 +0000 Subject: [PATCH 003/104] Bump rubocop from 1.38.0 to 1.39.0 Bumps [rubocop](https://github.com/rubocop/rubocop) from 1.38.0 to 1.39.0. - [Release notes](https://github.com/rubocop/rubocop/releases) - [Changelog](https://github.com/rubocop/rubocop/blob/master/CHANGELOG.md) - [Commits](https://github.com/rubocop/rubocop/compare/v1.38.0...v1.39.0) --- updated-dependencies: - dependency-name: rubocop dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index ffbdc5d1..0e81e5ff 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -19,7 +19,7 @@ GEM rake (13.0.6) regexp_parser (2.6.0) rexml (3.2.5) - rubocop (1.38.0) + rubocop (1.39.0) json (~> 2.3) parallel (~> 1.10) parser (>= 3.1.2.1) From fbc64f5d61ad2e7032312fc4a9f75596565ddfdb Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 10 Nov 2022 09:23:00 -0500 Subject: [PATCH 004/104] Compile to YARV --- .rubocop.yml | 12 + lib/syntax_tree.rb | 1 + lib/syntax_tree/visitor/compiler.rb | 1830 +++++++++++++++++++++++++++ test/compiler_test.rb | 342 +++++ 4 files changed, 2185 insertions(+) create mode 100644 lib/syntax_tree/visitor/compiler.rb create mode 100644 test/compiler_test.rb diff --git a/.rubocop.yml b/.rubocop.yml index 6c9be677..22f1bbef 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -16,6 +16,9 @@ Layout/LineLength: Lint/AmbiguousBlockAssociation: Enabled: false +Lint/BooleanSymbol: + Enabled: false + Lint/DuplicateBranch: Enabled: false @@ -46,9 +49,15 @@ Naming/MethodParameterName: Naming/RescuedExceptionsVariableName: PreferredName: error +Naming/VariableNumber: + Enabled: false + Style/CaseEquality: Enabled: false +Style/CaseLikeIf: + Enabled: false + Style/ExplicitBlockArgument: Enabled: false @@ -88,6 +97,9 @@ Style/ParallelAssignment: Style/PerlBackrefs: Enabled: false +Style/SafeNavigation: + Enabled: false + Style/SpecialGlobalVars: Enabled: false diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 418468a9..aea21d8e 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -13,6 +13,7 @@ require_relative "syntax_tree/basic_visitor" require_relative "syntax_tree/visitor" +require_relative "syntax_tree/visitor/compiler" require_relative "syntax_tree/visitor/field_visitor" require_relative "syntax_tree/visitor/json_visitor" require_relative "syntax_tree/visitor/match_visitor" diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb new file mode 100644 index 00000000..fac19831 --- /dev/null +++ b/lib/syntax_tree/visitor/compiler.rb @@ -0,0 +1,1830 @@ +# frozen_string_literal: true + +module SyntaxTree + class Visitor + # This class is an experiment in transforming Syntax Tree nodes into their + # corresponding YARV instruction sequences. It attempts to mirror the + # behavior of RubyVM::InstructionSequence.compile. + # + # You use this as with any other visitor. First you parse code into a tree, + # then you visit it with this compiler. Visiting the root node of the tree + # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. + # With that object you can call #to_a on it, which will return a serialized + # form of the instruction sequence as an array. This array _should_ mirror + # the array given by RubyVM::InstructionSequence#to_a. + # + # As an example, here is how you would compile a single expression: + # + # program = SyntaxTree.parse("1 + 2") + # program.accept(SyntaxTree::Visitor::Compiler.new).to_a + # + # [ + # "YARVInstructionSequence/SimpleDataFormat", + # 3, + # 1, + # 1, + # {:arg_size=>0, :local_size=>0, :stack_max=>2}, + # "", + # "", + # "", + # 1, + # :top, + # [], + # {}, + # [], + # [ + # [:putobject_INT2FIX_1_], + # [:putobject, 2], + # [:opt_plus, {:mid=>:+, :flag=>16, :orig_argc=>1}], + # [:leave] + # ] + # ] + # + # Note that this is the same output as calling: + # + # RubyVM::InstructionSequence.compile("1 + 2").to_a + # + class Compiler < BasicVisitor + # This visitor is responsible for converting Syntax Tree nodes into their + # corresponding Ruby structures. This is used to convert the operands of + # some instructions like putobject that push a Ruby object directly onto + # the stack. It is only used when the entire structure can be represented + # at compile-time, as opposed to constructed at run-time. + class RubyVisitor < BasicVisitor + # This error is raised whenever a node cannot be converted into a Ruby + # object at compile-time. + class CompilationError < StandardError + end + + def visit_array(node) + visit_all(node.contents.parts) + end + + def visit_bare_assoc_hash(node) + node.assocs.to_h do |assoc| + # We can only convert regular key-value pairs. A double splat ** + # operator means it has to be converted at run-time. + raise CompilationError unless assoc.is_a?(Assoc) + [visit(assoc.key), visit(assoc.value)] + end + end + + def visit_float(node) + node.value.to_f + end + + alias visit_hash visit_bare_assoc_hash + + def visit_imaginary(node) + node.value.to_c + end + + def visit_int(node) + node.value.to_i + end + + def visit_label(node) + node.value.chomp(":").to_sym + end + + def visit_qsymbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_range(node) + left, right = [visit(node.left), visit(node.right)] + node.operator.value === ".." ? left..right : left...right + end + + def visit_rational(node) + node.value.to_r + end + + def visit_regexp_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) + else + # Any interpolation of expressions or variables will result in the + # regular expression being constructed at run-time. + raise CompilationError + end + end + + # This isn't actually a visit method, though maybe it should be. It is + # responsible for converting the set of string options on a regular + # expression into its equivalent integer. + def visit_regexp_literal_flags(node) + node + .options + .chars + .inject(0) do |accum, option| + accum | + case option + when "i" + Regexp::IGNORECASE + when "x" + Regexp::EXTENDED + when "m" + Regexp::MULTILINE + else + raise "Unknown regexp option: #{option}" + end + end + end + + def visit_symbol_literal(node) + node.value.value.to_sym + end + + def visit_symbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_tstring_content(node) + node.value + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + node.parts.first.value + else + # Any interpolation of expressions or variables will result in the + # string being constructed at run-time. + raise CompilationError + end + end + + def visit_unsupported(_node) + raise CompilationError + end + + # Please forgive the metaprogramming here. This is used to create visit + # methods for every node that we did not explicitly handle. By default + # each of these methods will raise a CompilationError. + handled = instance_methods(false) + (Visitor.instance_methods(false) - handled).each do |method| + alias_method method, :visit_unsupported + end + end + + # This object is used to track the size of the stack at any given time. It + # is effectively a mini symbolic interpreter. It's necessary because when + # instruction sequences get serialized they include a :stack_max field on + # them. This field is used to determine how much stack space to allocate + # for the instruction sequence. + class Stack + attr_reader :current_size, :maximum_size + + def initialize + @current_size = 0 + @maximum_size = 0 + end + + def change_by(value) + @current_size += value + @maximum_size = @current_size if @current_size > @maximum_size + end + end + + # This class is meant to mirror RubyVM::InstructionSequence. It contains a + # list of instructions along with the metadata pertaining to them. It also + # functions as a builder for the instruction sequence. + class InstructionSequence + # This is a small data class that captures the level of a local variable + # table (the number of scopes to traverse) and the index of the local + # variable in that table. + class LocalVariable + attr_reader :level, :index + + def initialize(level, index) + @level = level + @index = index + end + end + + # The name of the instruction sequence. + attr_reader :name + + # The parent instruction sequence, if there is one. + attr_reader :parent_iseq + + # The location of the root node of this instruction sequence. + attr_reader :location + + # The list of instructions for this instruction sequence. + attr_reader :insns + + # The array of symbols corresponding to the local variables of this + # instruction sequence. + attr_reader :local_variables + + # The hash of names of instance and class variables pointing to the + # index of their associated inline storage. + attr_reader :inline_storages + + # The index of the next inline storage that will be created. + attr_reader :storage_index + + # An object that will track the current size of the stack and the + # maximum size of the stack for this instruction sequence. + attr_reader :stack + + def initialize(name, parent_iseq, location) + @name = name + @parent_iseq = parent_iseq + @location = location + + @local_variables = [] + @inline_storages = {} + + @insns = [] + @storage_index = 0 + @stack = Stack.new + end + + def local_variable(name, level = 0) + if (index = local_variables.index(name)) + LocalVariable.new(level, index) + elsif parent_iseq + parent_iseq.local_variable(name, level + 1) + else + raise "Unknown local variable: #{name}" + end + end + + def push(insn) + insns << insn + insn + end + + def inline_storage + storage = storage_index + @storage_index += 1 + storage + end + + def inline_storage_for(name) + unless inline_storages.key?(name) + inline_storages[name] = inline_storage + end + + inline_storages[name] + end + + def length + insns.sum(&:length) + end + + def each_child + insns.each do |insn| + insn[1..].each do |operand| + yield operand if operand.is_a?(InstructionSequence) + end + end + end + + def to_a + versions = RUBY_VERSION.split(".").map(&:to_i) + + [ + "YARVInstructionSequence/SimpleDataFormat", + versions[0], + versions[1], + 1, + { + arg_size: 0, + local_size: local_variables.length, + stack_max: stack.maximum_size + }, + name, + "", + "", + 1, + :top, + local_variables, + {}, + [], + insns.map do |insn| + case insn[0] + when :getlocal_WC_0, :setlocal_WC_0 + # Here we need to map the local variable index to the offset + # from the top of the stack where it will be stored. + [insn[0], local_variables.length - (insn[1] - 3) - 1] + when :getlocal_WC_1, :setlocal_WC_1 + # Here we're going to do the same thing as with _WC_0 except + # we're looking at the parent scope. + [ + insn[0], + parent_iseq.local_variables.length - (insn[1] - 3) - 1 + ] + when :getlocal, :setlocal + # Here we're going to do the same thing as the other local + # instructions except that we'll traverse up the instruction + # sequences first. + iseq = self + insn[2].times { iseq = iseq.parent_iseq } + [insn[0], iseq.local_variables.length - (insn[1] - 3) - 1] + when :send + # For any instructions that push instruction sequences onto the + # stack, we need to call #to_a on them as well. + [insn[0], insn[1], (insn[2].to_a if insn[2])] + else + insn + end + end + ] + end + end + + # This class serves as a layer of indirection between the instruction + # sequence and the compiler. It allows us to provide different behavior + # for certain instructions depending on the Ruby version. For example, + # class variable reads and writes gained an inline cache in Ruby 3.0. So + # we place the logic for checking the Ruby version in this class. + class Builder + attr_reader :iseq, :stack + + def initialize(iseq) + @iseq = iseq + @stack = iseq.stack + end + + # This creates a new label at the current length of the instruction + # sequence. It is used as the operand for jump instructions. + def label + :"label_#{iseq.length}" + end + + def adjuststack(number) + stack.change_by(-number) + iseq.push([:adjuststack, number]) + end + + def anytostring + stack.change_by(-2 + 1) + iseq.push([:anytostring]) + end + + def branchif(index) + stack.change_by(-1) + iseq.push([:branchif, index]) + end + + def branchunless(index) + stack.change_by(-1) + iseq.push([:branchunless, index]) + end + + def concatstrings(number) + stack.change_by(-number + 1) + iseq.push([:concatstrings, number]) + end + + def defined(type, name, message) + stack.change_by(-1 + 1) + iseq.push([:defined, type, name, message]) + end + + def dup + stack.change_by(-1 + 2) + iseq.push([:dup]) + end + + def duparray(object) + stack.change_by(+1) + iseq.push([:duparray, object]) + end + + def duphash(object) + stack.change_by(+1) + iseq.push([:duphash, object]) + end + + def dupn(number) + stack.change_by(+number) + iseq.push([:dupn, number]) + end + + def getclassvariable(name) + stack.change_by(+1) + + if RUBY_VERSION >= "3.0" + iseq.push([:getclassvariable, name, iseq.inline_storage_for(name)]) + else + iseq.push([:getclassvariable, name]) + end + end + + def getconstant(name) + stack.change_by(-2 + 1) + iseq.push([:getconstant, name]) + end + + def getglobal(name) + stack.change_by(+1) + iseq.push([:getglobal, name]) + end + + def getinstancevariable(name) + stack.change_by(+1) + + if RUBY_VERSION >= "3.2" + iseq.push([:getinstancevariable, name, iseq.inline_storage]) + else + iseq.push( + [:getinstancevariable, name, iseq.inline_storage_for(name)] + ) + end + end + + def getlocal(index, level) + stack.change_by(+1) + + # Specialize the getlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will look at the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + iseq.push([:getlocal_WC_0, index]) + when 1 + iseq.push([:getlocal_WC_1, index]) + else + iseq.push([:getlocal, index, level]) + end + end + + def getspecial(key, type) + stack.change_by(-0 + 1) + iseq.push([:getspecial, key, type]) + end + + def intern + stack.change_by(-1 + 1) + iseq.push([:intern]) + end + + def invokesuper(method_id, argc, flag, block_iseq) + stack.change_by(-(argc + 1) + 1) + iseq.push( + [:invokesuper, call_data(method_id, argc, flag), block_iseq] + ) + end + + def jump(index) + stack.change_by(0) + iseq.push([:jump, index]) + end + + def leave + stack.change_by(-1) + iseq.push([:leave]) + end + + def newarray(length) + stack.change_by(-length + 1) + iseq.push([:newarray, length]) + end + + def newhash(length) + stack.change_by(-length + 1) + iseq.push([:newhash, length]) + end + + def newrange(flag) + stack.change_by(-2 + 1) + iseq.push([:newrange, flag]) + end + + def objtostring(method_id, argc, flag) + stack.change_by(-1 + 1) + iseq.push([:objtostring, call_data(method_id, argc, flag)]) + end + + def opt_getconstant_path(names) + if RUBY_VERSION >= "3.2" + stack.change_by(+1) + iseq.push([:opt_getconstant_path, names]) + else + inline_storage = iseq.inline_storage + getinlinecache = opt_getinlinecache(-1, inline_storage) + + if names[0] == :"" + names.shift + pop + putobject(Object) + end + + names.each_with_index do |name, index| + putobject(index == 0) + getconstant(name) + end + + opt_setinlinecache(inline_storage) + getinlinecache[1] = label + end + end + + def opt_getinlinecache(offset, inline_storage) + stack.change_by(+1) + iseq.push([:opt_getinlinecache, offset, inline_storage]) + end + + def opt_setinlinecache(inline_storage) + stack.change_by(-1 + 1) + iseq.push([:opt_setinlinecache, inline_storage]) + end + + def pop + stack.change_by(-1) + iseq.push([:pop]) + end + + def putnil + stack.change_by(+1) + iseq.push([:putnil]) + end + + def putobject(object) + stack.change_by(+1) + + # Specialize the putobject instruction based on the value of the + # object. If it's 0 or 1, then there's a specialized instruction + # that will push the object onto the stack and requires fewer + # operands. + if object.eql?(0) + iseq.push([:putobject_INT2FIX_0_]) + elsif object.eql?(1) + iseq.push([:putobject_INT2FIX_1_]) + else + iseq.push([:putobject, object]) + end + end + + def putself + stack.change_by(+1) + iseq.push([:putself]) + end + + def putspecialobject(object) + stack.change_by(+1) + iseq.push([:putspecialobject, object]) + end + + def putstring(object) + stack.change_by(+1) + iseq.push([:putstring, object]) + end + + def send(method_id, argc, flag, block_iseq = nil) + stack.change_by(-(argc + 1) + 1) + cdata = call_data(method_id, argc, flag) + + # Specialize the send instruction. If it doesn't have a block + # attached, then we will replace it with an opt_send_without_block + # and do further specializations based on the called method and the + # number of arguments. + + # stree-ignore + if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 + case [method_id, argc] + when [:length, 0] then iseq.push([:opt_length, cdata]) + when [:size, 0] then iseq.push([:opt_size, cdata]) + when [:empty?, 0] then iseq.push([:opt_empty_p, cdata]) + when [:nil?, 0] then iseq.push([:opt_nil_p, cdata]) + when [:succ, 0] then iseq.push([:opt_succ, cdata]) + when [:!, 0] then iseq.push([:opt_not, cdata]) + when [:+, 1] then iseq.push([:opt_plus, cdata]) + when [:-, 1] then iseq.push([:opt_minus, cdata]) + when [:*, 1] then iseq.push([:opt_mult, cdata]) + when [:/, 1] then iseq.push([:opt_div, cdata]) + when [:%, 1] then iseq.push([:opt_mod, cdata]) + when [:==, 1] then iseq.push([:opt_eq, cdata]) + when [:=~, 1] then iseq.push([:opt_regexpmatch2, cdata]) + when [:<, 1] then iseq.push([:opt_lt, cdata]) + when [:<=, 1] then iseq.push([:opt_le, cdata]) + when [:>, 1] then iseq.push([:opt_gt, cdata]) + when [:>=, 1] then iseq.push([:opt_ge, cdata]) + when [:<<, 1] then iseq.push([:opt_ltlt, cdata]) + when [:[], 1] then iseq.push([:opt_aref, cdata]) + when [:&, 1] then iseq.push([:opt_and, cdata]) + when [:|, 1] then iseq.push([:opt_or, cdata]) + when [:[]=, 2] then iseq.push([:opt_aset, cdata]) + when [:!=, 1] + eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) + iseq.push([:opt_neq, eql_data, cdata]) + else + iseq.push([:opt_send_without_block, cdata]) + end + else + iseq.push([:send, cdata, block_iseq]) + end + end + + def setclassvariable(name) + stack.change_by(-1) + + if RUBY_VERSION >= "3.0" + iseq.push([:setclassvariable, name, iseq.inline_storage_for(name)]) + else + iseq.push([:setclassvariable, name]) + end + end + + def setconstant(name) + stack.change_by(-2) + iseq.push([:setconstant, name]) + end + + def setglobal(name) + stack.change_by(-1) + iseq.push([:setglobal, name]) + end + + def setinstancevariable(name) + stack.change_by(-1) + + if RUBY_VERSION >= "3.2" + iseq.push([:setinstancevariable, name, iseq.inline_storage]) + else + iseq.push( + [:setinstancevariable, name, iseq.inline_storage_for(name)] + ) + end + end + + def setlocal(index, level) + stack.change_by(-1) + + # Specialize the setlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will write to the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + iseq.push([:setlocal_WC_0, index]) + when 1 + iseq.push([:setlocal_WC_1, index]) + else + iseq.push([:setlocal, index, level]) + end + end + + def setn(number) + stack.change_by(-1 + 1) + iseq.push([:setn, number]) + end + + def splatarray(flag) + stack.change_by(-1 + 1) + iseq.push([:splatarray, flag]) + end + + def swap + stack.change_by(-2 + 2) + iseq.push([:swap]) + end + + def topn(number) + stack.change_by(+1) + iseq.push([:topn, number]) + end + + private + + # This creates a call data object that is used as the operand for the + # send, invokesuper, and objtostring instructions. + def call_data(method_id, argc, flag) + { mid: method_id, flag: flag, orig_argc: argc } + end + end + + # These constants correspond to the putspecialobject instruction. They are + # used to represent special objects that are pushed onto the stack. + VM_SPECIAL_OBJECT_VMCORE = 1 + VM_SPECIAL_OBJECT_CBASE = 2 + VM_SPECIAL_OBJECT_CONST_BASE = 3 + + # These constants correspond to the flag passed as part of the call data + # structure on the send instruction. They are used to represent various + # metadata about the callsite (e.g., were keyword arguments used?, was a + # block given?, etc.). + VM_CALL_ARGS_SPLAT = 1 << 0 + VM_CALL_ARGS_BLOCKARG = 1 << 1 + VM_CALL_FCALL = 1 << 2 + VM_CALL_VCALL = 1 << 3 + VM_CALL_ARGS_SIMPLE = 1 << 4 + VM_CALL_BLOCKISEQ = 1 << 5 + VM_CALL_KWARG = 1 << 6 + VM_CALL_KW_SPLAT = 1 << 7 + VM_CALL_TAILCALL = 1 << 8 + VM_CALL_SUPER = 1 << 9 + VM_CALL_ZSUPER = 1 << 10 + VM_CALL_OPT_SEND = 1 << 11 + VM_CALL_KW_SPLAT_MUT = 1 << 12 + + # These constants correspond to the value passed as part of the defined + # instruction. It's an enum defined in the CRuby codebase that tells that + # instruction what kind of defined check to perform. + DEFINED_NIL = 1 + DEFINED_IVAR = 2 + DEFINED_LVAR = 3 + DEFINED_GVAR = 4 + DEFINED_CVAR = 5 + DEFINED_CONST = 6 + DEFINED_METHOD = 7 + DEFINED_YIELD = 8 + DEFINED_ZSUPER = 9 + DEFINED_SELF = 10 + DEFINED_TRUE = 11 + DEFINED_FALSE = 12 + DEFINED_ASGN = 13 + DEFINED_EXPR = 14 + DEFINED_REF = 15 + DEFINED_FUNC = 16 + DEFINED_CONST_FROM = 17 + + # The current instruction sequence that is being compiled. + attr_reader :current_iseq + + # This is the current builder that is being used to construct the current + # instruction sequence. + attr_reader :builder + + # A boolean that tracks whether or not we're currently compiling and + # inline storage for a constant lookup. + attr_reader :writing_storage + + # A boolean to track if we're currently compiling the last statement + # within a set of statements. This information is necessary to determine + # if we need to return the value of the last statement. + attr_reader :last_statement + + # Whether or not the frozen_string_literal pragma has been set. + attr_reader :frozen_string_literal + + def initialize + @current_iseq = nil + @builder = nil + @writing_storage = false + @last_statement = false + @frozen_string_literal = false + end + + def visit_CHAR(node) + if frozen_string_literal + builder.putobject(node.value[1..]) + else + builder.putstring(node.value[1..]) + end + end + + def visit_alias(node) + builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) + builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) + visit(node.left) + visit(node.right) + builder.send(:"core#set_method_alias", 3, VM_CALL_ARGS_SIMPLE) + end + + def visit_aref(node) + visit(node.collection) + visit(node.index) + builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) + end + + def visit_arg_block(node) + visit(node.value) + end + + def visit_arg_paren(node) + visit(node.arguments) + end + + def visit_arg_star(node) + visit(node.value) + builder.splatarray(false) + end + + def visit_args(node) + visit_all(node.parts) + end + + def visit_array(node) + builder.duparray(node.accept(RubyVisitor.new)) + rescue RubyVisitor::CompilationError + visit_all(node.contents.parts) + builder.newarray(node.contents.parts.length) + end + + def visit_assign(node) + case node.target + when ARefField + builder.putnil + visit(node.target.collection) + visit(node.target.index) + visit(node.value) + builder.setn(3) + builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + builder.pop + when ConstPathField + names = constant_names(node.target) + name = names.pop + + if RUBY_VERSION >= "3.2" + builder.opt_getconstant_path(names) + visit(node.value) + builder.swap + builder.topn(1) + builder.swap + builder.setconstant(name) + else + visit(node.value) + builder.dup if last_statement? + builder.opt_getconstant_path(names) + builder.setconstant(name) + end + when Field + builder.putnil + visit(node.target) + visit(node.value) + builder.setn(2) + builder.send(:"#{node.target.name.value}=", 1, VM_CALL_ARGS_SIMPLE) + builder.pop + when TopConstField + name = node.target.constant.value.to_sym + + if RUBY_VERSION >= "3.2" + builder.putobject(Object) + visit(node.value) + builder.swap + builder.topn(1) + builder.swap + builder.setconstant(name) + else + visit(node.value) + builder.dup if last_statement? + builder.putobject(Object) + builder.setconstant(name) + end + when VarField + visit(node.value) + builder.dup if last_statement? + + case node.target.value + when Const + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.setconstant(node.target.value.value.to_sym) + when CVar + builder.setclassvariable(node.target.value.value.to_sym) + when GVar + builder.setglobal(node.target.value.value.to_sym) + when Ident + local_variable = visit(node.target) + builder.setlocal(local_variable.index, local_variable.level) + when IVar + builder.setinstancevariable(node.target.value.value.to_sym) + end + end + end + + def visit_assoc(node) + visit(node.key) + visit(node.value) + end + + def visit_assoc_splat(node) + visit(node.value) + end + + def visit_backref(node) + builder.getspecial(1, 2 * node.value[1..].to_i) + end + + def visit_bare_assoc_hash(node) + builder.duphash(node.accept(RubyVisitor.new)) + rescue RubyVisitor::CompilationError + visit_all(node.assocs) + end + + def visit_binary(node) + case node.operator + when :"&&" + visit(node.left) + builder.dup + + branchunless = builder.branchunless(-1) + builder.pop + + visit(node.right) + branchunless[1] = builder.label + when :"||" + visit(node.left) + builder.dup + + branchif = builder.branchif(-1) + builder.pop + + visit(node.right) + branchif[1] = builder.label + else + visit(node.left) + visit(node.right) + builder.send(node.operator, 1, VM_CALL_ARGS_SIMPLE) + end + end + + def visit_call(node) + node.receiver ? visit(node.receiver) : builder.putself + + visit(node.arguments) + arg_parts = argument_parts(node.arguments) + + if arg_parts.last.is_a?(ArgBlock) + flag = node.receiver.nil? ? VM_CALL_FCALL : 0 + flag |= VM_CALL_ARGS_BLOCKARG + + if arg_parts.any? { |part| part.is_a?(ArgStar) } + flag |= VM_CALL_ARGS_SPLAT + end + + if arg_parts.any? { |part| part.is_a?(BareAssocHash) } + flag |= VM_CALL_KW_SPLAT + end + + builder.send(node.message.value.to_sym, arg_parts.length - 1, flag) + else + flag = 0 + arg_parts.each do |arg_part| + case arg_part + when ArgStar + flag |= VM_CALL_ARGS_SPLAT + when BareAssocHash + flag |= VM_CALL_KW_SPLAT + end + end + + flag |= VM_CALL_ARGS_SIMPLE if flag == 0 + flag |= VM_CALL_FCALL if node.receiver.nil? + builder.send(node.message.value.to_sym, arg_parts.length, flag) + end + end + + def visit_command(node) + call_node = + CallNode.new( + receiver: nil, + operator: nil, + message: node.message, + arguments: node.arguments, + location: node.location + ) + + call_node.comments.concat(node.comments) + visit_call(call_node) + end + + def visit_command_call(node) + call_node = + CallNode.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + location: node.location + ) + + call_node.comments.concat(node.comments) + visit_call(call_node) + end + + def visit_const_path_field(node) + visit(node.parent) + end + + def visit_const_path_ref(node) + names = constant_names(node) + builder.opt_getconstant_path(names) + end + + def visit_defined(node) + case node.value + when Assign + # If we're assigning to a local variable, then we need to make sure + # that we put it into the local table. + if node.value.target.is_a?(VarField) && + node.value.target.value.is_a?(Ident) + name = node.value.target.value.value.to_sym + unless current_iseq.local_variables.include?(name) + current_iseq.local_variables << name + end + end + + builder.putobject("assignment") + when VarRef + value = node.value.value + name = value.value.to_sym + + case value + when Const + builder.putnil + builder.defined(DEFINED_CONST, name, "constant") + when CVar + builder.putnil + builder.defined(DEFINED_CVAR, name, "class variable") + when GVar + builder.putnil + builder.defined(DEFINED_GVAR, name, "global-variable") + when Ident + builder.putobject("local-variable") + when IVar + builder.putnil + builder.defined(DEFINED_IVAR, name, "instance-variable") + when Kw + case name + when :false + builder.putobject("false") + when :nil + builder.putobject("nil") + when :self + builder.putobject("self") + when :true + builder.putobject("true") + end + end + when VCall + builder.putself + + name = node.value.value.value.to_sym + builder.defined(DEFINED_FUNC, name, "method") + when YieldNode + builder.putnil + builder.defined(DEFINED_YIELD, false, "yield") + when ZSuper + builder.putnil + builder.defined(DEFINED_ZSUPER, false, "super") + else + builder.putobject("expression") + end + end + + def visit_dyna_symbol(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + builder.putobject(node.parts.first.value.to_sym) + end + end + + def visit_else(node) + visit(node.statements) + builder.pop unless last_statement? + end + + def visit_field(node) + visit(node.parent) + end + + def visit_float(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_for(node) + visit(node.collection) + + # Be sure we set up the local table before we start compiling the body + # of the for loop. + if node.index.is_a?(VarField) && node.index.value.is_a?(Ident) + name = node.index.value.value.to_sym + unless current_iseq.local_variables.include?(name) + current_iseq.local_variables << name + end + end + + block_iseq = + with_instruction_sequence( + "block in #{current_iseq.name}", + current_iseq, + node.statements + ) do + visit(node.statements) + builder.leave + end + + builder.send(:each, 0, 0, block_iseq) + end + + def visit_hash(node) + builder.duphash(node.accept(RubyVisitor.new)) + rescue RubyVisitor::CompilationError + visit_all(node.assocs) + builder.newhash(node.assocs.length * 2) + end + + def visit_heredoc(node) + if node.beginning.value.end_with?("`") + visit_xstring_literal(node) + elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + visit_string_parts(node) + builder.concatstrings(node.parts.length) + end + end + + def visit_if(node) + visit(node.predicate) + branchunless = builder.branchunless(-1) + visit(node.statements) + + if last_statement? + builder.leave + branchunless[1] = builder.label + + node.consequent ? visit(node.consequent) : builder.putnil + else + builder.pop + + if node.consequent + jump = builder.jump(-1) + branchunless[1] = builder.label + visit(node.consequent) + jump[1] = builder.label + else + branchunless[1] = builder.label + end + end + end + + def visit_imaginary(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_int(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_label(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_not(node) + visit(node.statement) + builder.send(:!, 0, VM_CALL_ARGS_SIMPLE) + end + + def visit_opassign(node) + flag = VM_CALL_ARGS_SIMPLE + if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) + flag |= VM_CALL_FCALL + end + + case (operator = node.operator.value.chomp("=").to_sym) + when :"&&" + branchunless = nil + + with_opassign(node) do + builder.dup + branchunless = builder.branchunless(-1) + builder.pop + visit(node.value) + end + + case node.target + when ARefField + builder.leave + branchunless[1] = builder.label + builder.setn(3) + builder.adjuststack(3) + when ConstPathField, TopConstField + branchunless[1] = builder.label + builder.swap + builder.pop + else + branchunless[1] = builder.label + end + when :"||" + if node.target.is_a?(ConstPathField) || + node.target.is_a?(TopConstField) + opassign_defined(node) + builder.swap + builder.pop + elsif node.target.is_a?(VarField) && + [Const, CVar, GVar].include?(node.target.value.class) + opassign_defined(node) + else + branchif = nil + + with_opassign(node) do + builder.dup + branchif = builder.branchif(-1) + builder.pop + visit(node.value) + end + + if node.target.is_a?(ARefField) + builder.leave + branchif[1] = builder.label + builder.setn(3) + builder.adjuststack(3) + else + branchif[1] = builder.label + end + end + else + with_opassign(node) do + visit(node.value) + builder.send(operator, 1, flag) + end + end + end + + def visit_paren(node) + visit(node.contents) + end + + def visit_program(node) + node.statements.body.each do |statement| + break unless statement.is_a?(Comment) + + if statement.value == "# frozen_string_literal: true" + @frozen_string_literal = true + end + end + + statements = + node.statements.body.select do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + false + else + true + end + end + + with_instruction_sequence("", nil, node) do + if statements.empty? + builder.putnil + else + *statements, last_statement = statements + visit_all(statements) + with_last_statement { visit(last_statement) } + end + + builder.leave + end + end + + def visit_qsymbols(node) + builder.duparray(node.accept(RubyVisitor.new)) + end + + def visit_qwords(node) + visit_all(node.elements) + builder.newarray(node.elements.length) + end + + def visit_range(node) + builder.putobject(node.accept(RubyVisitor.new)) + rescue RubyVisitor::CompilationError + visit(node.left) + visit(node.right) + builder.newrange(node.operator.value == ".." ? 0 : 1) + end + + def visit_rational(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_regexp_literal(node) + builder.putobject(node.accept(RubyVisitor.new)) + rescue RubyVisitor::CompilationError + visit_string_parts(node) + + flags = RubyVisitor.new.visit_regexp_literal_flags(node) + builder.toregexp(flags, node.parts.length) + end + + def visit_statements(node) + statements = + node.body.select do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + false + else + true + end + end + + statements.empty? ? builder.putnil : visit_all(statements) + end + + def visit_string_concat(node) + value = node.left.parts.first.value + node.right.parts.first.value + content = TStringContent.new(value: value, location: node.location) + + literal = + StringLiteral.new( + parts: [content], + quote: node.left.quote, + location: node.location + ) + visit_string_literal(literal) + end + + def visit_string_embexpr(node) + visit(node.statements) + end + + def visit_string_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + visit_string_parts(node) + builder.concatstrings(node.parts.length) + end + end + + def visit_symbol_literal(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_symbols(node) + builder.duparray(node.accept(RubyVisitor.new)) + rescue RubyVisitor::CompilationError + node.elements.each do |element| + if element.parts.length == 1 && + element.parts.first.is_a?(TStringContent) + builder.putobject(element.parts.first.value.to_sym) + else + length = element.parts.length + unless element.parts.first.is_a?(TStringContent) + builder.putobject("") + length += 1 + end + + visit_string_parts(element) + builder.concatstrings(length) + builder.intern + end + end + + builder.newarray(node.elements.length) + end + + def visit_top_const_ref(node) + builder.opt_getconstant_path(constant_names(node)) + end + + def visit_tstring_content(node) + if frozen_string_literal + builder.putobject(node.accept(RubyVisitor.new)) + else + builder.putstring(node.accept(RubyVisitor.new)) + end + end + + def visit_unary(node) + visit(node.statement) + + method_id = + case node.operator + when "+", "-" + :"#{node.operator}@" + else + node.operator.to_sym + end + + builder.send(method_id, 0, VM_CALL_ARGS_SIMPLE) + end + + def visit_undef(node) + node.symbols.each_with_index do |symbol, index| + builder.pop if index != 0 + builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) + builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) + visit(symbol) + builder.send(:"core#undef_method", 2, VM_CALL_ARGS_SIMPLE) + end + end + + def visit_var_field(node) + case node.value + when CVar, IVar + name = node.value.value.to_sym + current_iseq.inline_storage_for(name) + when Ident + name = node.value.value.to_sym + unless current_iseq.local_variables.include?(name) + current_iseq.local_variables << name + end + current_iseq.local_variable(name) + end + end + + def visit_var_ref(node) + case node.value + when Const + builder.opt_getconstant_path(constant_names(node)) + when CVar + name = node.value.value.to_sym + builder.getclassvariable(name) + when GVar + builder.getglobal(node.value.value.to_sym) + when Ident + local_variable = current_iseq.local_variable(node.value.value.to_sym) + builder.getlocal(local_variable.index, local_variable.level) + when IVar + name = node.value.value.to_sym + builder.getinstancevariable(name) + when Kw + case node.value.value + when "false" + builder.putobject(false) + when "nil" + builder.putnil + when "self" + builder.putself + when "true" + builder.putobject(true) + end + end + end + + def visit_vcall(node) + builder.putself + + flag = VM_CALL_FCALL | VM_CALL_VCALL | VM_CALL_ARGS_SIMPLE + builder.send(node.value.value.to_sym, 0, flag) + end + + def visit_while(node) + jumps = [] + + jumps << builder.jump(-1) + builder.putnil + builder.pop + jumps << builder.jump(-1) + + label = builder.label + visit(node.statements) + builder.pop + jumps.each { |jump| jump[1] = builder.label } + + visit(node.predicate) + builder.branchif(label) + builder.putnil if last_statement? + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = node.parts.length + unless node.parts.first.is_a?(TStringContent) + builder.putobject("") + length += 1 + end + + visit_string_parts(node) + builder.concatstrings(length) + end + end + + def visit_words(node) + visit_all(node.elements) + builder.newarray(node.elements.length) + end + + def visit_xstring_literal(node) + builder.putself + visit_string_parts(node) + builder.concatstrings(node.parts.length) if node.parts.length > 1 + builder.send(:`, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + end + + def visit_zsuper(_node) + builder.putself + builder.invokesuper( + nil, + 0, + VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER | VM_CALL_ZSUPER, + nil + ) + end + + private + + # This is a helper that is used in places where arguments may be present + # or they may be wrapped in parentheses. It's meant to descend down the + # tree and return an array of argument nodes. + def argument_parts(node) + case node + when nil + [] + when Args + node.parts + when ArgParen + node.arguments.parts + end + end + + # Constant names when they are being assigned or referenced come in as a + # tree, but it's more convenient to work with them as an array. This + # method converts them into that array. This is nice because it's the + # operand that goes to opt_getconstant_path in Ruby 3.2. + def constant_names(node) + current = node + names = [] + + while current.is_a?(ConstPathField) || current.is_a?(ConstPathRef) + names.unshift(current.constant.value.to_sym) + current = current.parent + end + + case current + when VarField, VarRef + names.unshift(current.value.value.to_sym) + when TopConstRef + names.unshift(current.constant.value.to_sym) + names.unshift(:"") + end + + names + end + + # For the most part when an OpAssign (operator assignment) node with a ||= + # operator is being compiled it's a matter of reading the target, checking + # if the value should be evaluated, evaluating it if so, and then writing + # the result back to the target. + # + # However, in certain kinds of assignments (X, ::X, X::Y, @@x, and $x) we + # first check if the value is defined using the defined instruction. I + # don't know why it is necessary, and suspect that it isn't. + def opassign_defined(node) + case node.target + when ConstPathField + visit(node.target.parent) + name = node.target.constant.value.to_sym + + builder.dup + builder.defined(DEFINED_CONST_FROM, name, true) + when TopConstField + name = node.target.constant.value.to_sym + + builder.putobject(Object) + builder.dup + builder.defined(DEFINED_CONST_FROM, name, true) + when VarField + name = node.target.value.value.to_sym + builder.putnil + + case node.target.value + when Const + builder.defined(DEFINED_CONST, name, true) + when CVar + builder.defined(DEFINED_CVAR, name, true) + when GVar + builder.defined(DEFINED_GVAR, name, true) + end + end + + branchunless = builder.branchunless(-1) + + case node.target + when ConstPathField, TopConstField + builder.dup + builder.putobject(true) + builder.getconstant(name) + when VarField + case node.target.value + when Const + builder.opt_getconstant_path(constant_names(node.target)) + when CVar + builder.getclassvariable(name) + when GVar + builder.getglobal(name) + end + end + + builder.dup + branchif = builder.branchif(-1) + builder.pop + + branchunless[1] = builder.label + visit(node.value) + + case node.target + when ConstPathField, TopConstField + builder.dupn(2) + builder.swap + builder.setconstant(name) + when VarField + builder.dup + + case node.target.value + when Const + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.setconstant(name) + when CVar + builder.setclassvariable(name) + when GVar + builder.setglobal(name) + end + end + + branchif[1] = builder.label + end + + # Whenever a value is interpolated into a string-like structure, these + # three instructions are pushed. + def push_interpolate + builder.dup + builder.objtostring(:to_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + builder.anytostring + end + + # There are a lot of nodes in the AST that act as contains of parts of + # strings. This includes things like string literals, regular expressions, + # heredocs, etc. This method will visit all the parts of a string within + # those containers. + def visit_string_parts(node) + node.parts.each do |part| + case part + when StringDVar + visit(part.variable) + push_interpolate + when StringEmbExpr + visit(part) + push_interpolate + when TStringContent + builder.putobject(part.accept(RubyVisitor.new)) + end + end + end + + # This is a helper method for compiling a constant lookup. In order to + # avoid having to look up the tree to determine if the constant is part of + # a larger path or not, we store a boolean flag that indicates that we're + # already in the middle of a constant lookup. That way we only get one set + # of opt_getinlinecache..opt_setinlinecache instructions. + def with_inline_storage + return yield if writing_storage + + @writing_storage = true + inline_storage = current_iseq.inline_storage + + getinlinecache = builder.opt_getinlinecache(-1, inline_storage) + yield + builder.opt_setinlinecache(inline_storage) + + getinlinecache[1] = builder.label + @writing_storage = false + end + + # The current instruction sequence that we're compiling is always stored + # on the compiler. When we descend into a node that has its own + # instruction sequence, this method can be called to temporarily set the + # new value of the instruction sequence, yield, and then set it back. + def with_instruction_sequence(name, parent_iseq, node) + previous_iseq = current_iseq + previous_builder = builder + + begin + iseq = InstructionSequence.new(name, parent_iseq, node.location) + @current_iseq = iseq + @builder = Builder.new(iseq) + yield + iseq + ensure + @current_iseq = previous_iseq + @builder = previous_builder + end + end + + # When we're compiling the last statement of a set of statements within a + # scope, the instructions sometimes change from pops to leaves. These + # kinds of peephole optimizations can reduce the overall number of + # instructions. Therefore, we keep track of whether we're compiling the + # last statement of a scope and allow visit methods to query that + # information. + def with_last_statement + @last_statement = true + + begin + yield + ensure + @last_statement = false + end + end + + def last_statement? + @last_statement + end + + # OpAssign nodes can have a number of different kinds of nodes as their + # "target" (i.e., the left-hand side of the assignment). When compiling + # these nodes we typically need to first fetch the current value of the + # variable, then perform some kind of action, then store the result back + # into the variable. This method handles that by first fetching the value, + # then yielding to the block, then storing the result. + def with_opassign(node) + case node.target + when ARefField + builder.putnil + visit(node.target.collection) + visit(node.target.index) + + builder.dupn(2) + builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) + + yield + + builder.setn(3) + builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + builder.pop + when ConstPathField + name = node.target.constant.value.to_sym + + visit(node.target.parent) + builder.dup + builder.putobject(true) + builder.getconstant(name) + + yield + + if node.operator.value == "&&=" + builder.dupn(2) + else + builder.swap + builder.topn(1) + end + + builder.swap + builder.setconstant(name) + when TopConstField + name = node.target.constant.value.to_sym + + builder.putobject(Object) + builder.dup + builder.putobject(true) + builder.getconstant(name) + + yield + + if node.operator.value == "&&=" + builder.dupn(2) + else + builder.swap + builder.topn(1) + end + + builder.swap + builder.setconstant(name) + when VarField + case node.target.value + when Const + names = constant_names(node.target) + builder.opt_getconstant_path(names) + + yield + + builder.dup + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.setconstant(names.last) + when CVar + name = node.target.value.value.to_sym + builder.getclassvariable(name) + + yield + + builder.dup + builder.setclassvariable(name) + when GVar + name = node.target.value.value.to_sym + builder.getglobal(name) + + yield + + builder.dup + builder.setglobal(name) + when Ident + local_variable = visit(node.target) + builder.getlocal(local_variable.index, local_variable.level) + + yield + + builder.dup + builder.setlocal(local_variable.index, local_variable.level) + when IVar + name = node.target.value.value.to_sym + builder.getinstancevariable(name) + + yield + + builder.dup + builder.setinstancevariable(name) + end + end + end + end + end +end diff --git a/test/compiler_test.rb b/test/compiler_test.rb new file mode 100644 index 00000000..4ed5bd0b --- /dev/null +++ b/test/compiler_test.rb @@ -0,0 +1,342 @@ +# frozen_string_literal: true + +return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.1" +require_relative "test_helper" + +module SyntaxTree + class CompilerTest < Minitest::Test + CASES = [ + # Various literals placed on the stack + "true", + "false", + "nil", + "self", + "0", + "1", + "2", + "1.0", + "1i", + "1r", + "1..2", + "1...2", + "(1)", + "%w[foo bar baz]", + "%W[foo bar baz]", + "%i[foo bar baz]", + "%I[foo bar baz]", + "{ foo: 1, bar: 1.0, baz: 1i }", + "'foo'", + "\"foo\"", + "\"foo\#{bar}\"", + "\"foo\#@bar\"", + "%q[foo]", + "%Q[foo]", + <<~RUBY, + "foo" \\ + "bar" + RUBY + <<~RUBY, + < 2", + "1 >= 2", + "1 == 2", + "1 != 2", + "1 & 2", + "1 | 2", + "1 << 2", + "1 ^ 2", + "foo.empty?", + "foo.length", + "foo.nil?", + "foo.size", + "foo.succ", + "/foo/ =~ \"foo\" && $1", + # Various method calls + "foo?", + "foo.bar", + "foo.bar(baz)", + "foo bar", + "foo.bar baz", + "foo(*bar)", + "foo(**bar)", + "foo(&bar)", + "foo.bar = baz", + "not foo", + "!foo", + "~foo", + "+foo", + "-foo", + "`foo`", + "`foo \#{bar} baz`", + # Local variables + "foo", + "foo = 1", + "foo = 1; bar = 2; baz = 3", + "foo = 1; foo", + "foo += 1", + "foo -= 1", + "foo *= 1", + "foo /= 1", + "foo %= 1", + "foo &= 1", + "foo |= 1", + "foo &&= 1", + "foo ||= 1", + "foo <<= 1", + "foo ^= 1", + # Instance variables + "@foo", + "@foo = 1", + "@foo = 1; @bar = 2; @baz = 3", + "@foo = 1; @foo", + "@foo += 1", + "@foo -= 1", + "@foo *= 1", + "@foo /= 1", + "@foo %= 1", + "@foo &= 1", + "@foo |= 1", + "@foo &&= 1", + "@foo ||= 1", + "@foo <<= 1", + "@foo ^= 1", + # Class variables + "@@foo", + "@@foo = 1", + "@@foo = 1; @@bar = 2; @@baz = 3", + "@@foo = 1; @@foo", + "@@foo += 1", + "@@foo -= 1", + "@@foo *= 1", + "@@foo /= 1", + "@@foo %= 1", + "@@foo &= 1", + "@@foo |= 1", + "@@foo &&= 1", + "@@foo ||= 1", + "@@foo <<= 1", + "@@foo ^= 1", + # Global variables + "$foo", + "$foo = 1", + "$foo = 1; $bar = 2; $baz = 3", + "$foo = 1; $foo", + "$foo += 1", + "$foo -= 1", + "$foo *= 1", + "$foo /= 1", + "$foo %= 1", + "$foo &= 1", + "$foo |= 1", + "$foo &&= 1", + "$foo ||= 1", + "$foo <<= 1", + "$foo ^= 1", + # Index access + "foo[bar]", + "foo[bar] = 1", + "foo[bar] += 1", + "foo[bar] -= 1", + "foo[bar] *= 1", + "foo[bar] /= 1", + "foo[bar] %= 1", + "foo[bar] &= 1", + "foo[bar] |= 1", + "foo[bar] &&= 1", + "foo[bar] ||= 1", + "foo[bar] <<= 1", + "foo[bar] ^= 1", + # Constants (single) + "Foo", + "Foo = 1", + "Foo += 1", + "Foo -= 1", + "Foo *= 1", + "Foo /= 1", + "Foo %= 1", + "Foo &= 1", + "Foo |= 1", + "Foo &&= 1", + "Foo ||= 1", + "Foo <<= 1", + "Foo ^= 1", + # Constants (top) + "::Foo", + "::Foo = 1", + "::Foo += 1", + "::Foo -= 1", + "::Foo *= 1", + "::Foo /= 1", + "::Foo %= 1", + "::Foo &= 1", + "::Foo |= 1", + "::Foo &&= 1", + "::Foo ||= 1", + "::Foo <<= 1", + "::Foo ^= 1", + # Constants (nested) + "Foo::Bar::Baz", + "Foo::Bar::Baz += 1", + "Foo::Bar::Baz -= 1", + "Foo::Bar::Baz *= 1", + "Foo::Bar::Baz /= 1", + "Foo::Bar::Baz %= 1", + "Foo::Bar::Baz &= 1", + "Foo::Bar::Baz |= 1", + "Foo::Bar::Baz &&= 1", + "Foo::Bar::Baz ||= 1", + "Foo::Bar::Baz <<= 1", + "Foo::Bar::Baz ^= 1", + # Constants (top nested) + "::Foo::Bar::Baz", + "::Foo::Bar::Baz = 1", + "::Foo::Bar::Baz += 1", + "::Foo::Bar::Baz -= 1", + "::Foo::Bar::Baz *= 1", + "::Foo::Bar::Baz /= 1", + "::Foo::Bar::Baz %= 1", + "::Foo::Bar::Baz &= 1", + "::Foo::Bar::Baz |= 1", + "::Foo::Bar::Baz &&= 1", + "::Foo::Bar::Baz ||= 1", + "::Foo::Bar::Baz <<= 1", + "::Foo::Bar::Baz ^= 1", + # Constants (calls) + "Foo::Bar.baz", + "::Foo::Bar.baz", + "Foo::Bar.baz = 1", + "::Foo::Bar.baz = 1", + # Control flow + "1 && 2", + "1 || 2", + "if foo then bar end", + "if foo then bar else baz end", + "foo if bar", + "foo while bar", + # Constructed values + "foo..bar", + "foo...bar", + "[1, 1.0, 1i, 1r]", + "[foo, bar, baz]", + "[@foo, @bar, @baz]", + "[@@foo, @@bar, @@baz]", + "[$foo, $bar, $baz]", + "%W[foo \#{bar} baz]", + "%I[foo \#{bar} baz]", + "[foo, bar] + [baz, qux]", + "{ foo: bar, baz: qux }", + "{ :foo => bar, :baz => qux }", + "{ foo => bar, baz => qux }", + "%s[foo]", + "[$1, $2, $3, $4, $5, $6, $7, $8, $9]", + # Core method calls + "alias foo bar", + "alias :foo :bar", + "undef foo", + "undef :foo", + "undef foo, bar, baz", + "undef :foo, :bar, :baz", + "super", + # defined? usage + "defined?(foo)", + "defined?(\"foo\")", + "defined?(:foo)", + "defined?(@foo)", + "defined?(@@foo)", + "defined?($foo)", + "defined?(Foo)", + "defined?(yield)", + "defined?(super)", + "foo = 1; defined?(foo)", + "defined?(self)", + "defined?(true)", + "defined?(false)", + "defined?(nil)", + "defined?(foo = 1)", + # Ignored content + ";;;", + "# comment", + "=begin\nfoo\n=end", + <<~RUBY + __END__ + RUBY + ] + + CASES.each do |source| + define_method(:"test_#{source}") { assert_compiles source } + end + + private + + def serialize_iseq(iseq) + serialized = iseq.to_a + + serialized[4].delete(:node_id) + serialized[4].delete(:code_location) + serialized[4].delete(:node_ids) + + serialized[13] = serialized[13].filter_map do |insn| + next unless insn.is_a?(Array) + + insn.map do |operand| + if operand.is_a?(Array) && + operand[0] == "YARVInstructionSequence/SimpleDataFormat" + serialize_iseq(operand) + else + operand + end + end + end + + serialized + end + + def assert_compiles(source) + assert_equal( + serialize_iseq(RubyVM::InstructionSequence.compile(source)), + serialize_iseq(SyntaxTree.parse(source).accept(Visitor::Compiler.new)) + ) + end + end +end From f51e211a76df9b8398709d7760132cb736561e1f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 14 Nov 2022 18:34:04 -0500 Subject: [PATCH 005/104] Remove unused writing_storage --- lib/syntax_tree/visitor/compiler.rb | 24 ------------------------ 1 file changed, 24 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index fac19831..002614b5 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -751,10 +751,6 @@ def call_data(method_id, argc, flag) # instruction sequence. attr_reader :builder - # A boolean that tracks whether or not we're currently compiling and - # inline storage for a constant lookup. - attr_reader :writing_storage - # A boolean to track if we're currently compiling the last statement # within a set of statements. This information is necessary to determine # if we need to return the value of the last statement. @@ -766,7 +762,6 @@ def call_data(method_id, argc, flag) def initialize @current_iseq = nil @builder = nil - @writing_storage = false @last_statement = false @frozen_string_literal = false end @@ -1661,25 +1656,6 @@ def visit_string_parts(node) end end - # This is a helper method for compiling a constant lookup. In order to - # avoid having to look up the tree to determine if the constant is part of - # a larger path or not, we store a boolean flag that indicates that we're - # already in the middle of a constant lookup. That way we only get one set - # of opt_getinlinecache..opt_setinlinecache instructions. - def with_inline_storage - return yield if writing_storage - - @writing_storage = true - inline_storage = current_iseq.inline_storage - - getinlinecache = builder.opt_getinlinecache(-1, inline_storage) - yield - builder.opt_setinlinecache(inline_storage) - - getinlinecache[1] = builder.label - @writing_storage = false - end - # The current instruction sequence that we're compiling is always stored # on the compiler. When we descend into a node that has its own # instruction sequence, this method can be called to temporarily set the From 8464fc7a775969d26bbcedd819041ceecee0595e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 14 Nov 2022 22:11:26 -0500 Subject: [PATCH 006/104] Handle various compilation options --- .rubocop.yml | 6 + lib/syntax_tree/visitor/compiler.rb | 373 +++++++++++++++++++--------- test/compiler_test.rb | 39 ++- 3 files changed, 293 insertions(+), 125 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index 22f1bbef..d0bf0830 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -34,6 +34,9 @@ Lint/MissingSuper: Lint/RedundantRequireStatement: Enabled: false +Lint/SuppressedException: + Enabled: false + Lint/UnusedMethodArgument: AllowUnusedKeywordArguments: true @@ -52,6 +55,9 @@ Naming/RescuedExceptionsVariableName: Naming/VariableNumber: Enabled: false +Style/AccessorGrouping: + Enabled: false + Style/CaseEquality: Enabled: false diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 002614b5..14e277ae 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -91,6 +91,10 @@ def visit_qsymbols(node) node.elements.map { |element| visit(element).to_sym } end + def visit_qwords(node) + visit_all(node.elements) + end + def visit_range(node) left, right = [visit(node.left), visit(node.right)] node.operator.value === ".." ? left..right : left...right @@ -154,6 +158,10 @@ def visit_word(node) end end + def visit_words(node) + visit_all(node.elements) + end + def visit_unsupported(_node) raise CompilationError end @@ -202,6 +210,9 @@ def initialize(level, index) end end + # The type of the instruction sequence. + attr_reader :type + # The name of the instruction sequence. attr_reader :name @@ -211,6 +222,11 @@ def initialize(level, index) # The location of the root node of this instruction sequence. attr_reader :location + # This is the list of information about the arguments to this + # instruction sequence. + attr_accessor :argument_size + attr_reader :argument_options + # The list of instructions for this instruction sequence. attr_reader :insns @@ -229,14 +245,17 @@ def initialize(level, index) # maximum size of the stack for this instruction sequence. attr_reader :stack - def initialize(name, parent_iseq, location) + def initialize(type, name, parent_iseq, location) + @type = type @name = name @parent_iseq = parent_iseq @location = location + @argument_size = 0 + @argument_options = {} + @local_variables = [] @inline_storages = {} - @insns = [] @storage_index = 0 @stack = Stack.new @@ -292,7 +311,7 @@ def to_a versions[1], 1, { - arg_size: 0, + arg_size: argument_size, local_size: local_variables.length, stack_max: stack.maximum_size }, @@ -300,40 +319,43 @@ def to_a "", "", 1, - :top, + type, local_variables, - {}, + argument_options, [], - insns.map do |insn| - case insn[0] - when :getlocal_WC_0, :setlocal_WC_0 - # Here we need to map the local variable index to the offset - # from the top of the stack where it will be stored. - [insn[0], local_variables.length - (insn[1] - 3) - 1] - when :getlocal_WC_1, :setlocal_WC_1 - # Here we're going to do the same thing as with _WC_0 except - # we're looking at the parent scope. - [ - insn[0], - parent_iseq.local_variables.length - (insn[1] - 3) - 1 - ] - when :getlocal, :setlocal - # Here we're going to do the same thing as the other local - # instructions except that we'll traverse up the instruction - # sequences first. - iseq = self - insn[2].times { iseq = iseq.parent_iseq } - [insn[0], iseq.local_variables.length - (insn[1] - 3) - 1] - when :send - # For any instructions that push instruction sequences onto the - # stack, we need to call #to_a on them as well. - [insn[0], insn[1], (insn[2].to_a if insn[2])] - else - insn - end - end + insns.map { |insn| serialize(insn) } ] end + + private + + def serialize(insn) + case insn[0] + when :getlocal_WC_0, :getlocal_WC_1, :getlocal, :setlocal_WC_0, + :setlocal_WC_1, :setlocal + iseq = self + + case insn[0] + when :getlocal_WC_1, :setlocal_WC_1 + iseq = iseq.parent_iseq + when :getlocal, :setlocal + insn[2].times { iseq = iseq.parent_iseq } + end + + # Here we need to map the local variable index to the offset + # from the top of the stack where it will be stored. + index = iseq.local_variables.length - (insn[1] - 3) - 1 + [insn[0], index, *insn[2..]] + when :definemethod + [insn[0], insn[1], insn[2].to_a] + when :send + # For any instructions that push instruction sequences onto the + # stack, we need to call #to_a on them as well. + [insn[0], insn[1], (insn[2].to_a if insn[2])] + else + insn + end + end end # This class serves as a layer of indirection between the instruction @@ -343,10 +365,22 @@ def to_a # we place the logic for checking the Ruby version in this class. class Builder attr_reader :iseq, :stack - - def initialize(iseq) + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + def initialize( + iseq, + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) @iseq = iseq @stack = iseq.stack + + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction end # This creates a new label at the current length of the instruction @@ -385,6 +419,11 @@ def defined(type, name, message) iseq.push([:defined, type, name, message]) end + def definemethod(name, method_iseq) + stack.change_by(0) + iseq.push([:definemethod, name, method_iseq]) + end + def dup stack.change_by(-1 + 2) iseq.push([:dup]) @@ -431,24 +470,27 @@ def getinstancevariable(name) if RUBY_VERSION >= "3.2" iseq.push([:getinstancevariable, name, iseq.inline_storage]) else - iseq.push( - [:getinstancevariable, name, iseq.inline_storage_for(name)] - ) + inline_storage = iseq.inline_storage_for(name) + iseq.push([:getinstancevariable, name, inline_storage]) end end def getlocal(index, level) stack.change_by(+1) - # Specialize the getlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will look at the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - iseq.push([:getlocal_WC_0, index]) - when 1 - iseq.push([:getlocal_WC_1, index]) + if operands_unification + # Specialize the getlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will look at the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + iseq.push([:getlocal_WC_0, index]) + when 1 + iseq.push([:getlocal_WC_1, index]) + else + iseq.push([:getlocal, index, level]) + end else iseq.push([:getlocal, index, level]) end @@ -466,9 +508,9 @@ def intern def invokesuper(method_id, argc, flag, block_iseq) stack.change_by(-(argc + 1) + 1) - iseq.push( - [:invokesuper, call_data(method_id, argc, flag), block_iseq] - ) + + cdata = call_data(method_id, argc, flag) + iseq.push([:invokesuper, cdata, block_iseq]) end def jump(index) @@ -548,14 +590,18 @@ def putnil def putobject(object) stack.change_by(+1) - # Specialize the putobject instruction based on the value of the - # object. If it's 0 or 1, then there's a specialized instruction - # that will push the object onto the stack and requires fewer - # operands. - if object.eql?(0) - iseq.push([:putobject_INT2FIX_0_]) - elsif object.eql?(1) - iseq.push([:putobject_INT2FIX_1_]) + if operands_unification + # Specialize the putobject instruction based on the value of the + # object. If it's 0 or 1, then there's a specialized instruction + # that will push the object onto the stack and requires fewer + # operands. + if object.eql?(0) + iseq.push([:putobject_INT2FIX_0_]) + elsif object.eql?(1) + iseq.push([:putobject_INT2FIX_1_]) + else + iseq.push([:putobject, object]) + end else iseq.push([:putobject, object]) end @@ -580,41 +626,45 @@ def send(method_id, argc, flag, block_iseq = nil) stack.change_by(-(argc + 1) + 1) cdata = call_data(method_id, argc, flag) - # Specialize the send instruction. If it doesn't have a block - # attached, then we will replace it with an opt_send_without_block - # and do further specializations based on the called method and the - # number of arguments. - - # stree-ignore - if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 - case [method_id, argc] - when [:length, 0] then iseq.push([:opt_length, cdata]) - when [:size, 0] then iseq.push([:opt_size, cdata]) - when [:empty?, 0] then iseq.push([:opt_empty_p, cdata]) - when [:nil?, 0] then iseq.push([:opt_nil_p, cdata]) - when [:succ, 0] then iseq.push([:opt_succ, cdata]) - when [:!, 0] then iseq.push([:opt_not, cdata]) - when [:+, 1] then iseq.push([:opt_plus, cdata]) - when [:-, 1] then iseq.push([:opt_minus, cdata]) - when [:*, 1] then iseq.push([:opt_mult, cdata]) - when [:/, 1] then iseq.push([:opt_div, cdata]) - when [:%, 1] then iseq.push([:opt_mod, cdata]) - when [:==, 1] then iseq.push([:opt_eq, cdata]) - when [:=~, 1] then iseq.push([:opt_regexpmatch2, cdata]) - when [:<, 1] then iseq.push([:opt_lt, cdata]) - when [:<=, 1] then iseq.push([:opt_le, cdata]) - when [:>, 1] then iseq.push([:opt_gt, cdata]) - when [:>=, 1] then iseq.push([:opt_ge, cdata]) - when [:<<, 1] then iseq.push([:opt_ltlt, cdata]) - when [:[], 1] then iseq.push([:opt_aref, cdata]) - when [:&, 1] then iseq.push([:opt_and, cdata]) - when [:|, 1] then iseq.push([:opt_or, cdata]) - when [:[]=, 2] then iseq.push([:opt_aset, cdata]) - when [:!=, 1] - eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) - iseq.push([:opt_neq, eql_data, cdata]) + if specialized_instruction + # Specialize the send instruction. If it doesn't have a block + # attached, then we will replace it with an opt_send_without_block + # and do further specializations based on the called method and the + # number of arguments. + + # stree-ignore + if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 + case [method_id, argc] + when [:length, 0] then iseq.push([:opt_length, cdata]) + when [:size, 0] then iseq.push([:opt_size, cdata]) + when [:empty?, 0] then iseq.push([:opt_empty_p, cdata]) + when [:nil?, 0] then iseq.push([:opt_nil_p, cdata]) + when [:succ, 0] then iseq.push([:opt_succ, cdata]) + when [:!, 0] then iseq.push([:opt_not, cdata]) + when [:+, 1] then iseq.push([:opt_plus, cdata]) + when [:-, 1] then iseq.push([:opt_minus, cdata]) + when [:*, 1] then iseq.push([:opt_mult, cdata]) + when [:/, 1] then iseq.push([:opt_div, cdata]) + when [:%, 1] then iseq.push([:opt_mod, cdata]) + when [:==, 1] then iseq.push([:opt_eq, cdata]) + when [:=~, 1] then iseq.push([:opt_regexpmatch2, cdata]) + when [:<, 1] then iseq.push([:opt_lt, cdata]) + when [:<=, 1] then iseq.push([:opt_le, cdata]) + when [:>, 1] then iseq.push([:opt_gt, cdata]) + when [:>=, 1] then iseq.push([:opt_ge, cdata]) + when [:<<, 1] then iseq.push([:opt_ltlt, cdata]) + when [:[], 1] then iseq.push([:opt_aref, cdata]) + when [:&, 1] then iseq.push([:opt_and, cdata]) + when [:|, 1] then iseq.push([:opt_or, cdata]) + when [:[]=, 2] then iseq.push([:opt_aset, cdata]) + when [:!=, 1] + eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) + iseq.push([:opt_neq, eql_data, cdata]) + else + iseq.push([:opt_send_without_block, cdata]) + end else - iseq.push([:opt_send_without_block, cdata]) + iseq.push([:send, cdata, block_iseq]) end else iseq.push([:send, cdata, block_iseq]) @@ -647,24 +697,27 @@ def setinstancevariable(name) if RUBY_VERSION >= "3.2" iseq.push([:setinstancevariable, name, iseq.inline_storage]) else - iseq.push( - [:setinstancevariable, name, iseq.inline_storage_for(name)] - ) + inline_storage = iseq.inline_storage_for(name) + iseq.push([:setinstancevariable, name, inline_storage]) end end def setlocal(index, level) stack.change_by(-1) - # Specialize the setlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will write to the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - iseq.push([:setlocal_WC_0, index]) - when 1 - iseq.push([:setlocal_WC_1, index]) + if operands_unification + # Specialize the setlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will write to the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + iseq.push([:setlocal_WC_0, index]) + when 1 + iseq.push([:setlocal_WC_1, index]) + else + iseq.push([:setlocal, index, level]) + end else iseq.push([:setlocal, index, level]) end @@ -744,6 +797,12 @@ def call_data(method_id, argc, flag) DEFINED_FUNC = 16 DEFINED_CONST_FROM = 17 + # These options mirror the compilation options that we currently support + # that can be also passed to RubyVM::InstructionSequence.compile. + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + # The current instruction sequence that is being compiled. attr_reader :current_iseq @@ -756,14 +815,18 @@ def call_data(method_id, argc, flag) # if we need to return the value of the last statement. attr_reader :last_statement - # Whether or not the frozen_string_literal pragma has been set. - attr_reader :frozen_string_literal + def initialize( + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction - def initialize @current_iseq = nil @builder = nil @last_statement = false - @frozen_string_literal = false end def visit_CHAR(node) @@ -929,6 +992,10 @@ def visit_binary(node) end end + def visit_bodystmt(node) + visit(node.statements) + end + def visit_call(node) node.receiver ? visit(node.receiver) : builder.putself @@ -1002,6 +1069,52 @@ def visit_const_path_ref(node) builder.opt_getconstant_path(names) end + def visit_def(node) + params = node.params + params = params.contents if params.is_a?(Paren) + + method_iseq = + with_instruction_sequence( + :method, + node.name.value, + current_iseq, + node + ) do |iseq| + if params + params.requireds.each do |required| + iseq.local_variables << required.value.to_sym + iseq.argument_size += 1 + + iseq.argument_options[:lead_num] ||= 0 + iseq.argument_options[:lead_num] += 1 + end + + params.optionals.each do |(optional, value)| + index = iseq.local_variables.length + name = optional.value.to_sym + + iseq.local_variables << name + iseq.argument_size += 1 + + unless iseq.argument_options.key?(:opt) + iseq.argument_options[:opt] = [builder.label] + end + + visit(value) + builder.setlocal(index, 0) + iseq.argument_options[:opt] << builder.label + end + end + + visit(node.bodystmt) + builder.leave + end + + name = node.name.value.to_sym + builder.definemethod(name, method_iseq) + builder.putobject(name) + end + def visit_defined(node) case node.value when Assign @@ -1096,6 +1209,7 @@ def visit_for(node) block_iseq = with_instruction_sequence( + :block, "block in #{current_iseq.name}", current_iseq, node.statements @@ -1255,7 +1369,7 @@ def visit_program(node) end end - with_instruction_sequence("", nil, node) do + with_instruction_sequence(:top, "", nil, node) do if statements.empty? builder.putnil else @@ -1273,8 +1387,12 @@ def visit_qsymbols(node) end def visit_qwords(node) - visit_all(node.elements) - builder.newarray(node.elements.length) + if frozen_string_literal + builder.duparray(node.accept(RubyVisitor.new)) + else + visit_all(node.elements) + builder.newarray(node.elements.length) + end end def visit_range(node) @@ -1485,8 +1603,21 @@ def visit_word(node) end def visit_words(node) - visit_all(node.elements) - builder.newarray(node.elements.length) + converted = nil + + if frozen_string_literal + begin + converted = node.accept(RubyVisitor.new) + rescue RubyVisitor::CompilationError + end + end + + if converted + builder.duparray(converted) + else + visit_all(node.elements) + builder.newarray(node.elements.length) + end end def visit_xstring_literal(node) @@ -1660,15 +1791,23 @@ def visit_string_parts(node) # on the compiler. When we descend into a node that has its own # instruction sequence, this method can be called to temporarily set the # new value of the instruction sequence, yield, and then set it back. - def with_instruction_sequence(name, parent_iseq, node) + def with_instruction_sequence(type, name, parent_iseq, node) previous_iseq = current_iseq previous_builder = builder begin - iseq = InstructionSequence.new(name, parent_iseq, node.location) + iseq = InstructionSequence.new(type, name, parent_iseq, node.location) + @current_iseq = iseq - @builder = Builder.new(iseq) - yield + @builder = + Builder.new( + iseq, + frozen_string_literal: frozen_string_literal, + operands_unification: operands_unification, + specialized_instruction: specialized_instruction + ) + + yield iseq iseq ensure @current_iseq = previous_iseq diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 4ed5bd0b..e44b35aa 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -248,8 +248,8 @@ class CompilerTest < Minitest::Test "Foo::Bar.baz = 1", "::Foo::Bar.baz = 1", # Control flow - "1 && 2", - "1 || 2", + "foo && bar", + "foo || bar", "if foo then bar end", "if foo then bar else baz end", "foo if bar", @@ -298,13 +298,34 @@ class CompilerTest < Minitest::Test ";;;", "# comment", "=begin\nfoo\n=end", - <<~RUBY + <<~RUBY, __END__ RUBY + # Method definitions + "def foo; end", + "def foo(bar); end", + "def foo(bar, baz); end", + "def foo(bar = 1); end", + "def foo(bar = 1, baz = 2); end" + ] + + # These are the combinations of instructions that we're going to test. + OPTIONS = [ + {}, + { frozen_string_literal: true }, + { operands_unification: false }, + { specialized_instruction: false }, + { operands_unification: false, specialized_instruction: false } ] - CASES.each do |source| - define_method(:"test_#{source}") { assert_compiles source } + OPTIONS.each do |options| + suffix = options.inspect + + CASES.each do |source| + define_method(:"test_#{source}_#{suffix}") do + assert_compiles(source, **options) + end + end end private @@ -332,10 +353,12 @@ def serialize_iseq(iseq) serialized end - def assert_compiles(source) + def assert_compiles(source, **options) + program = SyntaxTree.parse(source) + assert_equal( - serialize_iseq(RubyVM::InstructionSequence.compile(source)), - serialize_iseq(SyntaxTree.parse(source).accept(Visitor::Compiler.new)) + serialize_iseq(RubyVM::InstructionSequence.compile(source, **options)), + serialize_iseq(program.accept(Visitor::Compiler.new(**options))) ) end end From 6c0bbe60a1bec109e52b440fd36a468e3db15653 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 14 Nov 2022 23:20:26 -0500 Subject: [PATCH 007/104] Define modules --- lib/syntax_tree/visitor/compiler.rb | 45 +++++++++++++++++++++++++++++ test/compiler_test.rb | 8 ++++- 2 files changed, 52 insertions(+), 1 deletion(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 14e277ae..ebc98a14 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -346,6 +346,8 @@ def serialize(insn) # from the top of the stack where it will be stored. index = iseq.local_variables.length - (insn[1] - 3) - 1 [insn[0], index, *insn[2..]] + when :defineclass + [insn[0], insn[1], insn[2].to_a, insn[3]] when :definemethod [insn[0], insn[1], insn[2].to_a] when :send @@ -419,6 +421,11 @@ def defined(type, name, message) iseq.push([:defined, type, name, message]) end + def defineclass(name, class_iseq, flags) + stack.change_by(-2 + 1) + iseq.push([:defineclass, name, class_iseq, flags]) + end + def definemethod(name, method_iseq) stack.change_by(0) iseq.push([:definemethod, name, method_iseq]) @@ -797,6 +804,14 @@ def call_data(method_id, argc, flag) DEFINED_FUNC = 16 DEFINED_CONST_FROM = 17 + # These constants correspond to the value passed in the flags as part of + # the defineclass instruction. + VM_DEFINECLASS_TYPE_CLASS = 0 + VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 1 + VM_DEFINECLASS_TYPE_MODULE = 2 + VM_DEFINECLASS_FLAG_SCOPED = 8 + VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 16 + # These options mirror the compilation options that we currently support # that can be also passed to RubyVM::InstructionSequence.compile. attr_reader :frozen_string_literal, @@ -1275,6 +1290,36 @@ def visit_label(node) builder.putobject(node.accept(RubyVisitor.new)) end + def visit_module(node) + name = node.constant.constant.value.to_sym + module_iseq = + with_instruction_sequence( + :class, + "", + current_iseq, + node + ) do + visit(node.bodystmt) + builder.leave + end + + flags = VM_DEFINECLASS_TYPE_MODULE + + case node.constant + when ConstPathRef + flags |= VM_DEFINECLASS_FLAG_SCOPED + visit(node.constant.parent) + when ConstRef + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + when TopConstRef + flags |= VM_DEFINECLASS_FLAG_SCOPED + builder.putobject(Object) + end + + builder.putnil + builder.defineclass(name, module_iseq, flags) + end + def visit_not(node) visit(node.statement) builder.send(:!, 0, VM_CALL_ARGS_SIMPLE) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index e44b35aa..08316ee3 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -306,7 +306,13 @@ class CompilerTest < Minitest::Test "def foo(bar); end", "def foo(bar, baz); end", "def foo(bar = 1); end", - "def foo(bar = 1, baz = 2); end" + "def foo(bar = 1, baz = 2); end", + # Class/module definitions + "module Foo; end", + "module ::Foo; end", + "module Foo::Bar; end", + "module ::Foo::Bar; end", + "module Foo; module Bar; end; end" ] # These are the combinations of instructions that we're going to test. From ebfddc3a1ea713708085da96210897773cda2cf7 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 14 Nov 2022 23:20:26 -0500 Subject: [PATCH 008/104] Visit params --- lib/syntax_tree/visitor/compiler.rb | 83 ++++++++++++++++++----------- test/compiler_test.rb | 5 ++ 2 files changed, 57 insertions(+), 31 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index ebc98a14..ac4bc7c0 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1085,42 +1085,14 @@ def visit_const_path_ref(node) end def visit_def(node) - params = node.params - params = params.contents if params.is_a?(Paren) - method_iseq = with_instruction_sequence( :method, node.name.value, current_iseq, node - ) do |iseq| - if params - params.requireds.each do |required| - iseq.local_variables << required.value.to_sym - iseq.argument_size += 1 - - iseq.argument_options[:lead_num] ||= 0 - iseq.argument_options[:lead_num] += 1 - end - - params.optionals.each do |(optional, value)| - index = iseq.local_variables.length - name = optional.value.to_sym - - iseq.local_variables << name - iseq.argument_size += 1 - - unless iseq.argument_options.key?(:opt) - iseq.argument_options[:opt] = [builder.label] - end - - visit(value) - builder.setlocal(index, 0) - iseq.argument_options[:opt] << builder.label - end - end - + ) do + visit(node.params) if node.params visit(node.bodystmt) builder.leave end @@ -1391,6 +1363,49 @@ def visit_opassign(node) end end + def visit_params(node) + argument_options = current_iseq.argument_options + + if node.requireds.any? + argument_options[:lead_num] = 0 + + node.requireds.each do |required| + current_iseq.local_variables << required.value.to_sym + current_iseq.argument_size += 1 + argument_options[:lead_num] += 1 + end + end + + node.optionals.each do |(optional, value)| + index = current_iseq.local_variables.length + name = optional.value.to_sym + + current_iseq.local_variables << name + current_iseq.argument_size += 1 + + unless argument_options.key?(:opt) + argument_options[:opt] = [builder.label] + end + + visit(value) + builder.setlocal(index, 0) + current_iseq.argument_options[:opt] << builder.label + end + + visit(node.rest) if node.rest + + if node.posts.any? + argument_options[:post_start] = current_iseq.argument_size + argument_options[:post_num] = 0 + + node.posts.each do |post| + current_iseq.local_variables << post.value.to_sym + current_iseq.argument_size += 1 + argument_options[:post_num] += 1 + end + end + end + def visit_paren(node) visit(node.contents) end @@ -1461,6 +1476,12 @@ def visit_regexp_literal(node) builder.toregexp(flags, node.parts.length) end + def visit_rest_param(node) + current_iseq.local_variables << node.name.value.to_sym + current_iseq.argument_options[:rest_start] = current_iseq.argument_size + current_iseq.argument_size += 1 + end + def visit_statements(node) statements = node.body.select do |statement| @@ -1852,7 +1873,7 @@ def with_instruction_sequence(type, name, parent_iseq, node) specialized_instruction: specialized_instruction ) - yield iseq + yield iseq ensure @current_iseq = previous_iseq diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 08316ee3..81a01777 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -307,6 +307,11 @@ class CompilerTest < Minitest::Test "def foo(bar, baz); end", "def foo(bar = 1); end", "def foo(bar = 1, baz = 2); end", + "def foo(*bar); end", + "def foo(bar, *baz); end", + "def foo(*bar, baz, qux); end", + "def foo(bar, *baz, qux); end", + "def foo(bar, baz, *qux, quaz); end", # Class/module definitions "module Foo; end", "module ::Foo; end", From 72e527391e889f62f4e628495da42005ba7244e2 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 15 Nov 2022 00:02:17 -0500 Subject: [PATCH 009/104] Fix implementing toregexp --- lib/syntax_tree/visitor/compiler.rb | 5 +++++ test/compiler_test.rb | 2 ++ 2 files changed, 7 insertions(+) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index ac4bc7c0..4aa50fc1 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -750,6 +750,11 @@ def topn(number) iseq.push([:topn, number]) end + def toregexp(options, length) + stack.change_by(-length + 1) + iseq.push([:toregexp, options, length]) + end + private # This creates a call data object that is used as the operand for the diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 81a01777..ca4d4898 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -270,6 +270,8 @@ class CompilerTest < Minitest::Test "{ foo => bar, baz => qux }", "%s[foo]", "[$1, $2, $3, $4, $5, $6, $7, $8, $9]", + "/foo \#{bar} baz/", + "%r{foo \#{bar} baz}", # Core method calls "alias foo bar", "alias :foo :bar", From e9b767c2bc4b6beea7d88234619a0ab16813006b Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 15 Nov 2022 00:11:31 -0500 Subject: [PATCH 010/104] Compile for loops --- lib/syntax_tree/visitor/compiler.rb | 28 +++++++++++++++++++++------- test/compiler_test.rb | 1 + 2 files changed, 22 insertions(+), 7 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 4aa50fc1..780495d2 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -545,6 +545,11 @@ def newrange(flag) iseq.push([:newrange, flag]) end + def nop + stack.change_by(0) + iseq.push([:nop]) + end + def objtostring(method_id, argc, flag) stack.change_by(-1 + 1) iseq.push([:objtostring, call_data(method_id, argc, flag)]) @@ -1190,13 +1195,9 @@ def visit_float(node) def visit_for(node) visit(node.collection) - # Be sure we set up the local table before we start compiling the body - # of the for loop. - if node.index.is_a?(VarField) && node.index.value.is_a?(Ident) - name = node.index.value.value.to_sym - unless current_iseq.local_variables.include?(name) - current_iseq.local_variables << name - end + name = node.index.value.value.to_sym + unless current_iseq.local_variables.include?(name) + current_iseq.local_variables << name end block_iseq = @@ -1206,6 +1207,19 @@ def visit_for(node) current_iseq, node.statements ) do + current_iseq.argument_options[:lead_num] ||= 0 + current_iseq.argument_options[:lead_num] += 1 + current_iseq.argument_options[:ambiguous_param0] = true + + current_iseq.argument_size += 1 + current_iseq.local_variables << 2 + + builder.getlocal(0, 0) + + local_variable = current_iseq.local_variable(name) + builder.setlocal(local_variable.index, local_variable.level) + builder.nop + visit(node.statements) builder.leave end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index ca4d4898..85d62b5b 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -254,6 +254,7 @@ class CompilerTest < Minitest::Test "if foo then bar else baz end", "foo if bar", "foo while bar", + "for i in [1, 2, 3] do i end", # Constructed values "foo..bar", "foo...bar", From 07afc36ddbbcefddcec5d8062e918d28d479c0b8 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 15 Nov 2022 00:24:20 -0500 Subject: [PATCH 011/104] Handle super with arguments --- lib/syntax_tree/visitor/compiler.rb | 11 +++++++++++ test/compiler_test.rb | 2 ++ 2 files changed, 13 insertions(+) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 780495d2..c56e553d 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1541,6 +1541,17 @@ def visit_string_literal(node) end end + def visit_super(node) + builder.putself + visit(node.arguments) + builder.invokesuper( + nil, + argument_parts(node.arguments).length, + VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER, + nil + ) + end + def visit_symbol_literal(node) builder.putobject(node.accept(RubyVisitor.new)) end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 85d62b5b..1c6cde38 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -281,6 +281,8 @@ class CompilerTest < Minitest::Test "undef foo, bar, baz", "undef :foo, :bar, :baz", "super", + "super(1)", + "super(1, 2, 3)", # defined? usage "defined?(foo)", "defined?(\"foo\")", From a8555eee14eafe504c9c60272f1a7e0dbfb3146c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 09:11:41 -0500 Subject: [PATCH 012/104] Handle block params --- lib/syntax_tree/visitor/compiler.rb | 8 ++++++++ test/compiler_test.rb | 2 ++ 2 files changed, 10 insertions(+) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index c56e553d..a3fc7bbe 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1017,6 +1017,12 @@ def visit_binary(node) end end + def visit_blockarg(node) + current_iseq.argument_options[:block_start] = current_iseq.argument_size + current_iseq.local_variables << node.name.value.to_sym + current_iseq.argument_size += 1 + end + def visit_bodystmt(node) visit(node.statements) end @@ -1423,6 +1429,8 @@ def visit_params(node) argument_options[:post_num] += 1 end end + + visit(node.block) if node.block end def visit_paren(node) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 1c6cde38..8534a3d0 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -317,6 +317,8 @@ class CompilerTest < Minitest::Test "def foo(*bar, baz, qux); end", "def foo(bar, *baz, qux); end", "def foo(bar, baz, *qux, quaz); end", + "def foo(bar, baz, &qux); end", + "def foo(bar, *baz, &qux); end", # Class/module definitions "module Foo; end", "module ::Foo; end", From 72534c52bbf48002f07c0a4eff63a17b465b27c8 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 10:07:51 -0500 Subject: [PATCH 013/104] Block call --- lib/syntax_tree/visitor/compiler.rb | 72 ++++++++++++++++++++--------- test/compiler_test.rb | 1 + 2 files changed, 51 insertions(+), 22 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index a3fc7bbe..870eb4c9 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -201,15 +201,32 @@ class InstructionSequence # This is a small data class that captures the level of a local variable # table (the number of scopes to traverse) and the index of the local # variable in that table. - class LocalVariable - attr_reader :level, :index + class LocalVariableLookup + attr_reader :local_variable, :level, :index - def initialize(level, index) + def initialize(local_variable, level, index) + @local_variable = local_variable @level = level @index = index end end + class PlainLocalVariable + attr_reader :name + + def initialize(name) + @name = name + end + end + + class BlockProxyLocalVariable + attr_reader :name + + def initialize(name) + @name = name + end + end + # The type of the instruction sequence. attr_reader :type @@ -262,8 +279,8 @@ def initialize(type, name, parent_iseq, location) end def local_variable(name, level = 0) - if (index = local_variables.index(name)) - LocalVariable.new(level, index) + if (index = local_variables.index { |local_variable| local_variable.name == name }) + LocalVariableLookup.new(local_variables[index], level, index) elsif parent_iseq parent_iseq.local_variable(name, level + 1) else @@ -320,7 +337,7 @@ def to_a "", 1, type, - local_variables, + local_variables.map(&:name), argument_options, [], insns.map { |insn| serialize(insn) } @@ -331,14 +348,14 @@ def to_a def serialize(insn) case insn[0] - when :getlocal_WC_0, :getlocal_WC_1, :getlocal, :setlocal_WC_0, - :setlocal_WC_1, :setlocal + when :getblockparamproxy, :getlocal_WC_0, :getlocal_WC_1, :getlocal, + :setlocal_WC_0, :setlocal_WC_1, :setlocal iseq = self case insn[0] when :getlocal_WC_1, :setlocal_WC_1 iseq = iseq.parent_iseq - when :getlocal, :setlocal + when :getblockparamproxy, :getlocal, :setlocal insn[2].times { iseq = iseq.parent_iseq } end @@ -451,6 +468,11 @@ def dupn(number) iseq.push([:dupn, number]) end + def getblockparamproxy(index, level) + stack.change_by(+1) + iseq.push([:getblockparamproxy, index, level]) + end + def getclassvariable(name) stack.change_by(+1) @@ -1019,7 +1041,7 @@ def visit_binary(node) def visit_blockarg(node) current_iseq.argument_options[:block_start] = current_iseq.argument_size - current_iseq.local_variables << node.name.value.to_sym + current_iseq.local_variables << InstructionSequence::BlockProxyLocalVariable.new(node.name.value.to_sym) current_iseq.argument_size += 1 end @@ -1126,8 +1148,8 @@ def visit_defined(node) if node.value.target.is_a?(VarField) && node.value.target.value.is_a?(Ident) name = node.value.target.value.value.to_sym - unless current_iseq.local_variables.include?(name) - current_iseq.local_variables << name + unless current_iseq.local_variables.any? { |local_variable| local_variable.name == name } + current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(name) end end @@ -1202,8 +1224,8 @@ def visit_for(node) visit(node.collection) name = node.index.value.value.to_sym - unless current_iseq.local_variables.include?(name) - current_iseq.local_variables << name + unless current_iseq.local_variables.any? { |local_variable| local_variable.name == name } + current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(name) end block_iseq = @@ -1218,7 +1240,7 @@ def visit_for(node) current_iseq.argument_options[:ambiguous_param0] = true current_iseq.argument_size += 1 - current_iseq.local_variables << 2 + current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(2) builder.getlocal(0, 0) @@ -1395,7 +1417,7 @@ def visit_params(node) argument_options[:lead_num] = 0 node.requireds.each do |required| - current_iseq.local_variables << required.value.to_sym + current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(required.value.to_sym) current_iseq.argument_size += 1 argument_options[:lead_num] += 1 end @@ -1405,7 +1427,7 @@ def visit_params(node) index = current_iseq.local_variables.length name = optional.value.to_sym - current_iseq.local_variables << name + current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(name) current_iseq.argument_size += 1 unless argument_options.key?(:opt) @@ -1424,7 +1446,7 @@ def visit_params(node) argument_options[:post_num] = 0 node.posts.each do |post| - current_iseq.local_variables << post.value.to_sym + current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(post.value.to_sym) current_iseq.argument_size += 1 argument_options[:post_num] += 1 end @@ -1504,7 +1526,7 @@ def visit_regexp_literal(node) end def visit_rest_param(node) - current_iseq.local_variables << node.name.value.to_sym + current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(node.name.value.to_sym) current_iseq.argument_options[:rest_start] = current_iseq.argument_size current_iseq.argument_size += 1 end @@ -1630,8 +1652,8 @@ def visit_var_field(node) current_iseq.inline_storage_for(name) when Ident name = node.value.value.to_sym - unless current_iseq.local_variables.include?(name) - current_iseq.local_variables << name + unless current_iseq.local_variables.any? { |local_variable| local_variable.name == name } + current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(name) end current_iseq.local_variable(name) end @@ -1648,7 +1670,13 @@ def visit_var_ref(node) builder.getglobal(node.value.value.to_sym) when Ident local_variable = current_iseq.local_variable(node.value.value.to_sym) - builder.getlocal(local_variable.index, local_variable.level) + + case local_variable.local_variable + when InstructionSequence::BlockProxyLocalVariable + builder.getblockparamproxy(local_variable.index, local_variable.level) + when InstructionSequence::PlainLocalVariable + builder.getlocal(local_variable.index, local_variable.level) + end when IVar name = node.value.value.to_sym builder.getinstancevariable(name) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 8534a3d0..c473e7be 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -319,6 +319,7 @@ class CompilerTest < Minitest::Test "def foo(bar, baz, *qux, quaz); end", "def foo(bar, baz, &qux); end", "def foo(bar, *baz, &qux); end", + "def foo(&qux); qux.call; end", # Class/module definitions "module Foo; end", "module ::Foo; end", From b96ffb51b3524ce2deea7ad81a95c35d5c6a558f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 14:09:06 -0500 Subject: [PATCH 014/104] Refactor local variables --- lib/syntax_tree/visitor/compiler.rb | 142 ++++++++++++++++++---------- 1 file changed, 90 insertions(+), 52 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 870eb4c9..f569ddcd 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -194,24 +194,15 @@ def change_by(value) end end - # This class is meant to mirror RubyVM::InstructionSequence. It contains a - # list of instructions along with the metadata pertaining to them. It also - # functions as a builder for the instruction sequence. - class InstructionSequence - # This is a small data class that captures the level of a local variable - # table (the number of scopes to traverse) and the index of the local - # variable in that table. - class LocalVariableLookup - attr_reader :local_variable, :level, :index - - def initialize(local_variable, level, index) - @local_variable = local_variable - @level = level - @index = index - end - end - - class PlainLocalVariable + # This represents every local variable associated with an instruction + # sequence. There are two kinds of locals: plain locals that are what you + # expect, and block proxy locals, which represent local variables + # associated with blocks that were passed into the current instruction + # sequence. + class LocalTable + # A local representing a block passed into the current instruction + # sequence. + class BlockProxyLocal attr_reader :name def initialize(name) @@ -219,7 +210,8 @@ def initialize(name) end end - class BlockProxyLocalVariable + # A regular local variable. + class PlainLocal attr_reader :name def initialize(name) @@ -227,6 +219,61 @@ def initialize(name) end end + # The result of looking up a local variable in the current local table. + class Lookup + attr_reader :local, :index, :level + + def initialize(local, index, level) + @local = local + @index = index + @level = level + end + end + + attr_reader :locals + + def initialize + @locals = [] + end + + def find(name, level) + index = locals.index { |local| local.name == name } + Lookup.new(locals[index], index, level) if index + end + + def has?(name) + locals.any? { |local| local.name == name } + end + + def names + locals.map(&:name) + end + + def size + locals.length + end + + # Add a BlockProxyLocal to the local table. + def block_proxy(name) + locals << BlockProxyLocal.new(name) unless has?(name) + end + + # Add a PlainLocal to the local table. + def plain(name) + locals << PlainLocal.new(name) unless has?(name) + end + + # This is the offset from the top of the stack where this local variable + # lives. + def offset(index) + size - (index - 3) - 1 + end + end + + # This class is meant to mirror RubyVM::InstructionSequence. It contains a + # list of instructions along with the metadata pertaining to them. It also + # functions as a builder for the instruction sequence. + class InstructionSequence # The type of the instruction sequence. attr_reader :type @@ -247,9 +294,8 @@ def initialize(name) # The list of instructions for this instruction sequence. attr_reader :insns - # The array of symbols corresponding to the local variables of this - # instruction sequence. - attr_reader :local_variables + # The table of local variables. + attr_reader :local_table # The hash of names of instance and class variables pointing to the # index of their associated inline storage. @@ -271,7 +317,7 @@ def initialize(type, name, parent_iseq, location) @argument_size = 0 @argument_options = {} - @local_variables = [] + @local_table = LocalTable.new @inline_storages = {} @insns = [] @storage_index = 0 @@ -279,8 +325,8 @@ def initialize(type, name, parent_iseq, location) end def local_variable(name, level = 0) - if (index = local_variables.index { |local_variable| local_variable.name == name }) - LocalVariableLookup.new(local_variables[index], level, index) + if (lookup = local_table.find(name, level)) + lookup elsif parent_iseq parent_iseq.local_variable(name, level + 1) else @@ -329,7 +375,7 @@ def to_a 1, { arg_size: argument_size, - local_size: local_variables.length, + local_size: local_table.size, stack_max: stack.maximum_size }, name, @@ -337,7 +383,7 @@ def to_a "", 1, type, - local_variables.map(&:name), + local_table.names, argument_options, [], insns.map { |insn| serialize(insn) } @@ -361,8 +407,7 @@ def serialize(insn) # Here we need to map the local variable index to the offset # from the top of the stack where it will be stored. - index = iseq.local_variables.length - (insn[1] - 3) - 1 - [insn[0], index, *insn[2..]] + [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] when :defineclass [insn[0], insn[1], insn[2].to_a, insn[3]] when :definemethod @@ -1041,7 +1086,7 @@ def visit_binary(node) def visit_blockarg(node) current_iseq.argument_options[:block_start] = current_iseq.argument_size - current_iseq.local_variables << InstructionSequence::BlockProxyLocalVariable.new(node.name.value.to_sym) + current_iseq.local_table.block_proxy(node.name.value.to_sym) current_iseq.argument_size += 1 end @@ -1147,10 +1192,7 @@ def visit_defined(node) # that we put it into the local table. if node.value.target.is_a?(VarField) && node.value.target.value.is_a?(Ident) - name = node.value.target.value.value.to_sym - unless current_iseq.local_variables.any? { |local_variable| local_variable.name == name } - current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(name) - end + current_iseq.local_table.plain(node.value.target.value.value.to_sym) end builder.putobject("assignment") @@ -1224,9 +1266,7 @@ def visit_for(node) visit(node.collection) name = node.index.value.value.to_sym - unless current_iseq.local_variables.any? { |local_variable| local_variable.name == name } - current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(name) - end + current_iseq.local_table.plain(name) block_iseq = with_instruction_sequence( @@ -1240,7 +1280,7 @@ def visit_for(node) current_iseq.argument_options[:ambiguous_param0] = true current_iseq.argument_size += 1 - current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(2) + current_iseq.local_table.plain(2) builder.getlocal(0, 0) @@ -1417,17 +1457,17 @@ def visit_params(node) argument_options[:lead_num] = 0 node.requireds.each do |required| - current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(required.value.to_sym) + current_iseq.local_table.plain(required.value.to_sym) current_iseq.argument_size += 1 argument_options[:lead_num] += 1 end end node.optionals.each do |(optional, value)| - index = current_iseq.local_variables.length + index = current_iseq.local_table.size name = optional.value.to_sym - current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(name) + current_iseq.local_table.plain(name) current_iseq.argument_size += 1 unless argument_options.key?(:opt) @@ -1446,7 +1486,7 @@ def visit_params(node) argument_options[:post_num] = 0 node.posts.each do |post| - current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(post.value.to_sym) + current_iseq.local_table.plain(post.value.to_sym) current_iseq.argument_size += 1 argument_options[:post_num] += 1 end @@ -1526,7 +1566,7 @@ def visit_regexp_literal(node) end def visit_rest_param(node) - current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(node.name.value.to_sym) + current_iseq.local_table.plain(node.name.value.to_sym) current_iseq.argument_options[:rest_start] = current_iseq.argument_size current_iseq.argument_size += 1 end @@ -1652,9 +1692,7 @@ def visit_var_field(node) current_iseq.inline_storage_for(name) when Ident name = node.value.value.to_sym - unless current_iseq.local_variables.any? { |local_variable| local_variable.name == name } - current_iseq.local_variables << InstructionSequence::PlainLocalVariable.new(name) - end + current_iseq.local_table.plain(name) current_iseq.local_variable(name) end end @@ -1669,13 +1707,13 @@ def visit_var_ref(node) when GVar builder.getglobal(node.value.value.to_sym) when Ident - local_variable = current_iseq.local_variable(node.value.value.to_sym) + lookup = current_iseq.local_variable(node.value.value.to_sym) - case local_variable.local_variable - when InstructionSequence::BlockProxyLocalVariable - builder.getblockparamproxy(local_variable.index, local_variable.level) - when InstructionSequence::PlainLocalVariable - builder.getlocal(local_variable.index, local_variable.level) + case lookup.local + when LocalTable::BlockProxyLocal + builder.getblockparamproxy(lookup.index, lookup.level) + when LocalTable::PlainLocal + builder.getlocal(lookup.index, lookup.level) end when IVar name = node.value.value.to_sym From ed83ff84973a354577e20595ea8cac43b35431ec Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 14:44:35 -0500 Subject: [PATCH 015/104] Handle required keyword parameters --- lib/syntax_tree/visitor/compiler.rb | 18 ++++++++++++++++++ test/compiler_test.rb | 2 ++ 2 files changed, 20 insertions(+) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index f569ddcd..c624dd56 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1492,6 +1492,24 @@ def visit_params(node) end end + if node.keywords.any? + argument_options[:kwbits] = 0 + argument_options[:keyword] = [] + + node.keywords.each do |(keyword, value)| + name = keyword.value.chomp(":").to_sym + + current_iseq.local_table.plain(name) + current_iseq.argument_size += 1 + + argument_options[:kwbits] += 1 + argument_options[:keyword] << name + end + + current_iseq.argument_size += 1 + current_iseq.local_table.plain(2) + end + visit(node.block) if node.block end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index c473e7be..53975926 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -320,6 +320,8 @@ class CompilerTest < Minitest::Test "def foo(bar, baz, &qux); end", "def foo(bar, *baz, &qux); end", "def foo(&qux); qux.call; end", + "def foo(bar:); end", + "def foo(bar:, baz:); end", # Class/module definitions "module Foo; end", "module ::Foo; end", From 1f7758e5fc1cbc8d3f78f263296326a8ed241802 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 14:58:48 -0500 Subject: [PATCH 016/104] Handle optional keyword parameters --- lib/syntax_tree/visitor/compiler.rb | 35 ++++++++++++++++++++++++----- test/compiler_test.rb | 6 +++++ 2 files changed, 36 insertions(+), 5 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index c624dd56..6cbcb272 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -394,8 +394,9 @@ def to_a def serialize(insn) case insn[0] - when :getblockparamproxy, :getlocal_WC_0, :getlocal_WC_1, :getlocal, - :setlocal_WC_0, :setlocal_WC_1, :setlocal + when :checkkeyword, :getblockparamproxy, :getlocal_WC_0, + :getlocal_WC_1, :getlocal, :setlocal_WC_0, :setlocal_WC_1, + :setlocal iseq = self case insn[0] @@ -473,6 +474,11 @@ def branchunless(index) iseq.push([:branchunless, index]) end + def checkkeyword(index, keyword_index) + stack.change_by(+1) + iseq.push([:checkkeyword, index, keyword_index]) + end + def concatstrings(number) stack.change_by(-number + 1) iseq.push([:concatstrings, number]) @@ -1495,19 +1501,38 @@ def visit_params(node) if node.keywords.any? argument_options[:kwbits] = 0 argument_options[:keyword] = [] + checkkeywords = [] - node.keywords.each do |(keyword, value)| + node.keywords.each_with_index do |(keyword, value), keyword_index| name = keyword.value.chomp(":").to_sym + index = current_iseq.local_table.size current_iseq.local_table.plain(name) current_iseq.argument_size += 1 - argument_options[:kwbits] += 1 - argument_options[:keyword] << name + + if value.nil? + argument_options[:keyword] << name + else + begin + compiled = value.accept(RubyVisitor.new) + argument_options[:keyword] << [name, compiled] + rescue RubyVisitor::CompilationError + argument_options[:keyword] << [name] + checkkeywords << builder.checkkeyword(-1, keyword_index) + branchif = builder.branchif(-1) + visit(value) + builder.setlocal(index, 0) + branchif[1] = builder.label + end + end end current_iseq.argument_size += 1 current_iseq.local_table.plain(2) + + lookup = current_iseq.local_table.find(2, 0) + checkkeywords.each { |checkkeyword| checkkeyword[1] = lookup.index } end visit(node.block) if node.block diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 53975926..2a053d7f 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -322,6 +322,12 @@ class CompilerTest < Minitest::Test "def foo(&qux); qux.call; end", "def foo(bar:); end", "def foo(bar:, baz:); end", + "def foo(bar: 1); end", + "def foo(bar: 1, baz: 2); end", + "def foo(bar: baz); end", + "def foo(bar: 1, baz: qux); end", + "def foo(bar: qux, baz: 1); end", + "def foo(bar: baz, qux: qaz); end", # Class/module definitions "module Foo; end", "module ::Foo; end", From a453b7e1092a97bdff26ea795906993a92cab469 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 15:03:27 -0500 Subject: [PATCH 017/104] Handle splat keyword parameters --- lib/syntax_tree/visitor/compiler.rb | 12 ++++++++++-- test/compiler_test.rb | 9 +++++++++ 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 6cbcb272..a0b757e4 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1351,6 +1351,12 @@ def visit_int(node) builder.putobject(node.accept(RubyVisitor.new)) end + def visit_kwrest_param(node) + current_iseq.argument_options[:kwrest] = current_iseq.argument_size + current_iseq.argument_size += 1 + current_iseq.local_table.plain(node.name.value.to_sym) + end + def visit_label(node) builder.putobject(node.accept(RubyVisitor.new)) end @@ -1528,13 +1534,15 @@ def visit_params(node) end end + name = node.keyword_rest ? 3 : 2 current_iseq.argument_size += 1 - current_iseq.local_table.plain(2) + current_iseq.local_table.plain(name) - lookup = current_iseq.local_table.find(2, 0) + lookup = current_iseq.local_table.find(name, 0) checkkeywords.each { |checkkeyword| checkkeyword[1] = lookup.index } end + visit(node.keyword_rest) if node.keyword_rest visit(node.block) if node.block end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 2a053d7f..8c933ce6 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -328,6 +328,15 @@ class CompilerTest < Minitest::Test "def foo(bar: 1, baz: qux); end", "def foo(bar: qux, baz: 1); end", "def foo(bar: baz, qux: qaz); end", + "def foo(**rest); end", + "def foo(bar:, **rest); end", + "def foo(bar:, baz:, **rest); end", + "def foo(bar: 1, **rest); end", + "def foo(bar: 1, baz: 2, **rest); end", + "def foo(bar: baz, **rest); end", + "def foo(bar: 1, baz: qux, **rest); end", + "def foo(bar: qux, baz: 1, **rest); end", + "def foo(bar: baz, qux: qaz, **rest); end", # Class/module definitions "module Foo; end", "module ::Foo; end", From 22b3bd2efe803973f0f0786080e7d622fcb908cd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 15:10:05 -0500 Subject: [PATCH 018/104] Handle yield without arguments --- lib/syntax_tree/visitor/compiler.rb | 9 +++++++++ test/compiler_test.rb | 7 ++++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index a0b757e4..bc7f6e1e 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -586,6 +586,11 @@ def intern iseq.push([:intern]) end + def invokeblock(method_id, argc, flag) + stack.change_by(-argc + 1) + iseq.push([:invokeblock, call_data(method_id, argc, flag)]) + end + def invokesuper(method_id, argc, flag, block_iseq) stack.change_by(-(argc + 1) + 1) @@ -1848,6 +1853,10 @@ def visit_xstring_literal(node) builder.send(:`, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) end + def visit_yield(node) + builder.invokeblock(nil, 0, VM_CALL_ARGS_SIMPLE) + end + def visit_zsuper(_node) builder.putself builder.invokesuper( diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 8c933ce6..9912e436 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -276,13 +276,14 @@ class CompilerTest < Minitest::Test # Core method calls "alias foo bar", "alias :foo :bar", + "super", + "super(1)", + "super(1, 2, 3)", "undef foo", "undef :foo", "undef foo, bar, baz", "undef :foo, :bar, :baz", - "super", - "super(1)", - "super(1, 2, 3)", + "def foo; yield; end", # defined? usage "defined?(foo)", "defined?(\"foo\")", From 63793deacdc331de69db52ab736e884798164dc0 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 15:12:35 -0500 Subject: [PATCH 019/104] Handle yield with arguments --- lib/syntax_tree/visitor/compiler.rb | 6 +++++- test/compiler_test.rb | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index bc7f6e1e..9d932f94 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1854,7 +1854,9 @@ def visit_xstring_literal(node) end def visit_yield(node) - builder.invokeblock(nil, 0, VM_CALL_ARGS_SIMPLE) + parts = argument_parts(node.arguments) + visit_all(parts) + builder.invokeblock(nil, parts.length, VM_CALL_ARGS_SIMPLE) end def visit_zsuper(_node) @@ -1880,6 +1882,8 @@ def argument_parts(node) node.parts when ArgParen node.arguments.parts + when Paren + node.contents.parts end end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 9912e436..98928304 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -284,6 +284,8 @@ class CompilerTest < Minitest::Test "undef foo, bar, baz", "undef :foo, :bar, :baz", "def foo; yield; end", + "def foo; yield(1); end", + "def foo; yield(1, 2, 3); end", # defined? usage "defined?(foo)", "defined?(\"foo\")", From ae4f7ddf51c8cdc4403ae5a67b9d36cc0d2253fc Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 15:19:40 -0500 Subject: [PATCH 020/104] Handle class definitions --- lib/syntax_tree/visitor/compiler.rb | 36 +++++++++++++++++++++++++++++ test/compiler_test.rb | 13 ++++++++++- 2 files changed, 48 insertions(+), 1 deletion(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 9d932f94..1582f9ff 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1141,6 +1141,42 @@ def visit_call(node) end end + def visit_class(node) + name = node.constant.constant.value.to_sym + class_iseq = + with_instruction_sequence( + :class, + "", + current_iseq, + node + ) do + visit(node.bodystmt) + builder.leave + end + + flags = VM_DEFINECLASS_TYPE_CLASS + + case node.constant + when ConstPathRef + flags |= VM_DEFINECLASS_FLAG_SCOPED + visit(node.constant.parent) + when ConstRef + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + when TopConstRef + flags |= VM_DEFINECLASS_FLAG_SCOPED + builder.putobject(Object) + end + + if node.superclass + flags |= VM_DEFINECLASS_FLAG_HAS_SUPERCLASS + visit(node.superclass) + else + builder.putnil + end + + builder.defineclass(name, class_iseq, flags) + end + def visit_command(node) call_node = CallNode.new( diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 98928304..795d7d13 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -345,7 +345,18 @@ class CompilerTest < Minitest::Test "module ::Foo; end", "module Foo::Bar; end", "module ::Foo::Bar; end", - "module Foo; module Bar; end; end" + "module Foo; module Bar; end; end", + "class Foo; end", + "class ::Foo; end", + "class Foo::Bar; end", + "class ::Foo::Bar; end", + "class Foo; class Bar; end; end", + "class Foo < Baz; end", + "class ::Foo < Baz; end", + "class Foo::Bar < Baz; end", + "class ::Foo::Bar < Baz; end", + "class Foo; class Bar < Baz; end; end", + "class Foo < baz; end" ] # These are the combinations of instructions that we're going to test. From 30d434f053a383cddd1333d3e3649168991372d3 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 15:25:27 -0500 Subject: [PATCH 021/104] Handle splats within array --- lib/syntax_tree/visitor/compiler.rb | 26 ++++++++++++++++++++++++-- test/compiler_test.rb | 1 + 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 1582f9ff..9eda29b5 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -479,6 +479,11 @@ def checkkeyword(index, keyword_index) iseq.push([:checkkeyword, index, keyword_index]) end + def concatarray + stack.change_by(-2 + 1) + iseq.push([:concatarray]) + end + def concatstrings(number) stack.change_by(-number + 1) iseq.push([:concatstrings, number]) @@ -974,8 +979,25 @@ def visit_args(node) def visit_array(node) builder.duparray(node.accept(RubyVisitor.new)) rescue RubyVisitor::CompilationError - visit_all(node.contents.parts) - builder.newarray(node.contents.parts.length) + length = 0 + + node.contents.parts.each do |part| + if part.is_a?(ArgStar) + if length > 0 + builder.newarray(length) + length = 0 + end + + visit(part.value) + builder.concatarray + else + visit(part) + length += 1 + end + end + + builder.newarray(length) + builder.concatarray if length != node.contents.parts.length end def visit_assign(node) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 795d7d13..1fe690d9 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -266,6 +266,7 @@ class CompilerTest < Minitest::Test "%W[foo \#{bar} baz]", "%I[foo \#{bar} baz]", "[foo, bar] + [baz, qux]", + "[foo, bar, *baz, qux]", "{ foo: bar, baz: qux }", "{ :foo => bar, :baz => qux }", "{ foo => bar, baz => qux }", From b80c9ffae6253a9e4136084f237c13d973c5f6a5 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 15:44:14 -0500 Subject: [PATCH 022/104] opt_newarray_min and opt_newarray_max --- lib/syntax_tree/visitor/compiler.rb | 59 +++++++++++++++++++++++++++-- test/compiler_test.rb | 6 +++ 2 files changed, 61 insertions(+), 4 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 9eda29b5..b99b56f8 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -667,6 +667,26 @@ def opt_getinlinecache(offset, inline_storage) iseq.push([:opt_getinlinecache, offset, inline_storage]) end + def opt_newarray_max(length) + if specialized_instruction + stack.change_by(-length + 1) + iseq.push([:opt_newarray_max, length]) + else + newarray(length) + send(:max, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def opt_newarray_min(length) + if specialized_instruction + stack.change_by(-length + 1) + iseq.push([:opt_newarray_min, length]) + else + newarray(length) + send(:min, 0, VM_CALL_ARGS_SIMPLE) + end + end + def opt_setinlinecache(inline_storage) stack.change_by(-1 + 1) iseq.push([:opt_setinlinecache, inline_storage]) @@ -996,8 +1016,8 @@ def visit_array(node) end end - builder.newarray(length) - builder.concatarray if length != node.contents.parts.length + builder.newarray(length) if length > 0 + builder.concatarray if length > 0 && length != node.contents.parts.length end def visit_assign(node) @@ -1128,10 +1148,41 @@ def visit_bodystmt(node) end def visit_call(node) - node.receiver ? visit(node.receiver) : builder.putself + arg_parts = argument_parts(node.arguments) + + # First we're going to check if we're calling a method on an array + # literal without any arguments. In that case there are some + # specializations we might be able to perform. + if node.receiver.is_a?(ArrayLiteral) && arg_parts.length == 0 && node.message.is_a?(Ident) + parts = node.receiver.contents&.parts || [] + + unless parts.any? { |part| part.is_a?(ArgStar) } + begin + # If we can compile the receiver, then we won't be attempting to + # specialize the instruction. Otherwise we will. + node.receiver.accept(RubyVisitor.new) + rescue RubyVisitor::CompilationError + case node.message.value + when "max" + visit(node.receiver.contents) + builder.opt_newarray_max(parts.length) + return + when "min" + visit(node.receiver.contents) + builder.opt_newarray_min(parts.length) + return + end + end + end + end + + if node.receiver + visit(node.receiver) + else + builder.putself + end visit(node.arguments) - arg_parts = argument_parts(node.arguments) if arg_parts.last.is_a?(ArgBlock) flag = node.receiver.nil? ? VM_CALL_FCALL : 0 diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 1fe690d9..2af625f6 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -274,6 +274,12 @@ class CompilerTest < Minitest::Test "[$1, $2, $3, $4, $5, $6, $7, $8, $9]", "/foo \#{bar} baz/", "%r{foo \#{bar} baz}", + "[1, 2, 3].max", + "[foo, bar, baz].max", + "[foo, bar, baz].max(1)", + "[1, 2, 3].min", + "[foo, bar, baz].min", + "[foo, bar, baz].min(1)", # Core method calls "alias foo bar", "alias :foo :bar", From b12953867d1608bd4d943c5172da0392f27e7071 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 15:49:31 -0500 Subject: [PATCH 023/104] New RubyVisitor.compile --- lib/syntax_tree/visitor/compiler.rb | 51 ++++++++++++++++++----------- 1 file changed, 31 insertions(+), 20 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index b99b56f8..de970461 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -56,6 +56,13 @@ class RubyVisitor < BasicVisitor class CompilationError < StandardError end + # This will attempt to compile the given node. If it's possible, then + # it will return the compiled object. Otherwise it will return nil. + def self.compile(node) + node.accept(new) + rescue CompilationError + end + def visit_array(node) visit_all(node.contents.parts) end @@ -997,27 +1004,29 @@ def visit_args(node) end def visit_array(node) - builder.duparray(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - length = 0 + if compiled = RubyVisitor.compile(node) + builder.duparray(compiled) + else + length = 0 - node.contents.parts.each do |part| - if part.is_a?(ArgStar) - if length > 0 - builder.newarray(length) - length = 0 - end + node.contents.parts.each do |part| + if part.is_a?(ArgStar) + if length > 0 + builder.newarray(length) + length = 0 + end - visit(part.value) - builder.concatarray - else - visit(part) - length += 1 + visit(part.value) + builder.concatarray + else + visit(part) + length += 1 + end end - end - builder.newarray(length) if length > 0 - builder.concatarray if length > 0 && length != node.contents.parts.length + builder.newarray(length) if length > 0 + builder.concatarray if length > 0 && length != node.contents.parts.length + end end def visit_assign(node) @@ -1105,9 +1114,11 @@ def visit_backref(node) end def visit_bare_assoc_hash(node) - builder.duphash(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - visit_all(node.assocs) + if compiled = RubyVisitor.compile(node) + builder.duphash(compiled) + else + visit_all(node.assocs) + end end def visit_binary(node) From f9a86c537e59b86dabdfb80d7e45d03f863544b1 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 15:56:30 -0500 Subject: [PATCH 024/104] opt_str_freeze --- lib/syntax_tree/visitor/compiler.rb | 33 +++++++++++++++++++++-------- test/compiler_test.rb | 2 ++ 2 files changed, 26 insertions(+), 9 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index de970461..00201618 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -699,6 +699,16 @@ def opt_setinlinecache(inline_storage) iseq.push([:opt_setinlinecache, inline_storage]) end + def opt_str_freeze(value) + if specialized_instruction + stack.change_by(+1) + iseq.push([:opt_str_freeze, value, call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE)]) + else + putstring(value) + send(:freeze, 0, VM_CALL_ARGS_SIMPLE) + end + end + def pop stack.change_by(-1) iseq.push([:pop]) @@ -1164,15 +1174,12 @@ def visit_call(node) # First we're going to check if we're calling a method on an array # literal without any arguments. In that case there are some # specializations we might be able to perform. - if node.receiver.is_a?(ArrayLiteral) && arg_parts.length == 0 && node.message.is_a?(Ident) - parts = node.receiver.contents&.parts || [] - - unless parts.any? { |part| part.is_a?(ArgStar) } - begin - # If we can compile the receiver, then we won't be attempting to - # specialize the instruction. Otherwise we will. - node.receiver.accept(RubyVisitor.new) - rescue RubyVisitor::CompilationError + if arg_parts.length == 0 && node.message.is_a?(Ident) + case node.receiver + when ArrayLiteral + parts = node.receiver.contents&.parts || [] + + if parts.none? { |part| part.is_a?(ArgStar) } && RubyVisitor.compile(node.receiver).nil? case node.message.value when "max" visit(node.receiver.contents) @@ -1184,6 +1191,14 @@ def visit_call(node) return end end + when StringLiteral + if RubyVisitor.compile(node.receiver).nil? + case node.message.value + when "freeze" + builder.opt_str_freeze(node.receiver.parts.first.value) + return + end + end end end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 2af625f6..62a7cc7c 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -92,6 +92,8 @@ class CompilerTest < Minitest::Test "foo.size", "foo.succ", "/foo/ =~ \"foo\" && $1", + "\"foo\".freeze", + "\"foo\".freeze(1)", # Various method calls "foo?", "foo.bar", From 7fa75c946c14b2d546c1b2c781455ccab91ba437 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 17:42:57 -0500 Subject: [PATCH 025/104] opt_str_uminus --- lib/syntax_tree/visitor/compiler.rb | 43 +++++++++++++++++++---------- test/compiler_test.rb | 3 ++ 2 files changed, 32 insertions(+), 14 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 00201618..4d77314e 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -709,6 +709,16 @@ def opt_str_freeze(value) end end + def opt_str_uminus(value) + if specialized_instruction + stack.change_by(+1) + iseq.push([:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)]) + else + putstring(value) + send(:-@, 0, VM_CALL_ARGS_SIMPLE) + end + end + def pop stack.change_by(-1) iseq.push([:pop]) @@ -1174,7 +1184,7 @@ def visit_call(node) # First we're going to check if we're calling a method on an array # literal without any arguments. In that case there are some # specializations we might be able to perform. - if arg_parts.length == 0 && node.message.is_a?(Ident) + if arg_parts.length == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) case node.receiver when ArrayLiteral parts = node.receiver.contents&.parts || [] @@ -1194,6 +1204,9 @@ def visit_call(node) when StringLiteral if RubyVisitor.compile(node.receiver).nil? case node.message.value + when "-@" + builder.opt_str_uminus(node.receiver.parts.first.value) + return when "freeze" builder.opt_str_freeze(node.receiver.parts.first.value) return @@ -1277,7 +1290,7 @@ def visit_class(node) end def visit_command(node) - call_node = + visit_call( CallNode.new( receiver: nil, operator: nil, @@ -1285,13 +1298,11 @@ def visit_command(node) arguments: node.arguments, location: node.location ) - - call_node.comments.concat(node.comments) - visit_call(call_node) + ) end def visit_command_call(node) - call_node = + visit_call( CallNode.new( receiver: node.receiver, operator: node.operator, @@ -1299,9 +1310,7 @@ def visit_command_call(node) arguments: node.arguments, location: node.location ) - - call_node.comments.concat(node.comments) - visit_call(call_node) + ) end def visit_const_path_field(node) @@ -1853,17 +1862,23 @@ def visit_tstring_content(node) end def visit_unary(node) - visit(node.statement) - method_id = case node.operator when "+", "-" - :"#{node.operator}@" + "#{node.operator}@" else - node.operator.to_sym + node.operator end - builder.send(method_id, 0, VM_CALL_ARGS_SIMPLE) + visit_call( + CallNode.new( + receiver: node.statement, + operator: nil, + message: Ident.new(value: method_id, location: node.location), + arguments: nil, + location: node.location + ) + ) end def visit_undef(node) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 62a7cc7c..cdb9e72a 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -94,6 +94,9 @@ class CompilerTest < Minitest::Test "/foo/ =~ \"foo\" && $1", "\"foo\".freeze", "\"foo\".freeze(1)", + "-\"foo\"", + "\"foo\".-@", + "\"foo\".-@(1)", # Various method calls "foo?", "foo.bar", From 1bec2b4ee634c60aa68f45dd81a4a026c68076d1 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 17:51:28 -0500 Subject: [PATCH 026/104] Handle ternaries --- lib/syntax_tree/visitor/compiler.rb | 20 ++++++++++++++++++-- test/compiler_test.rb | 1 + 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 4d77314e..be476816 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1492,6 +1492,22 @@ def visit_if(node) end end + def visit_if_op(node) + visit_if( + IfNode.new( + predicate: node.predicate, + statements: node.truthy, + consequent: + Else.new( + keyword: Kw.new(value: "else", location: Location.default), + statements: node.falsy, + location: Location.default + ), + location: Location.default + ) + ) + end + def visit_imaginary(node) builder.putobject(node.accept(RubyVisitor.new)) end @@ -1874,9 +1890,9 @@ def visit_unary(node) CallNode.new( receiver: node.statement, operator: nil, - message: Ident.new(value: method_id, location: node.location), + message: Ident.new(value: method_id, location: Location.default), arguments: nil, - location: node.location + location: Location.default ) ) end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index cdb9e72a..720e18be 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -260,6 +260,7 @@ class CompilerTest < Minitest::Test "foo if bar", "foo while bar", "for i in [1, 2, 3] do i end", + "foo ? bar : baz", # Constructed values "foo..bar", "foo...bar", From 61924f8f2c0a458e990c5ab1a1c9f688f416cabb Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 16 Nov 2022 17:54:04 -0500 Subject: [PATCH 027/104] Handle elsif --- lib/syntax_tree/visitor/compiler.rb | 11 +++++++++++ test/compiler_test.rb | 1 + 2 files changed, 12 insertions(+) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index be476816..32bead26 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1409,6 +1409,17 @@ def visit_else(node) builder.pop unless last_statement? end + def visit_elsif(node) + visit_if( + IfNode.new( + predicate: node.predicate, + statements: node.statements, + consequent: node.consequent, + location: node.location + ) + ) + end + def visit_field(node) visit(node.parent) end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 720e18be..fdca6985 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -257,6 +257,7 @@ class CompilerTest < Minitest::Test "foo || bar", "if foo then bar end", "if foo then bar else baz end", + "if foo then bar elsif baz then qux end", "foo if bar", "foo while bar", "for i in [1, 2, 3] do i end", From 76428d06c625ad0ab8bd59e47366656ce80efff8 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Thu, 17 Nov 2022 11:26:47 -0500 Subject: [PATCH 028/104] Compile blocks --- lib/syntax_tree/visitor/compiler.rb | 100 +++++++++++++++++++++++----- test/compiler_test.rb | 8 ++- 2 files changed, 89 insertions(+), 19 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 32bead26..10c59a77 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -702,7 +702,13 @@ def opt_setinlinecache(inline_storage) def opt_str_freeze(value) if specialized_instruction stack.change_by(+1) - iseq.push([:opt_str_freeze, value, call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE)]) + iseq.push( + [ + :opt_str_freeze, + value, + call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE) + ] + ) else putstring(value) send(:freeze, 0, VM_CALL_ARGS_SIMPLE) @@ -712,7 +718,9 @@ def opt_str_freeze(value) def opt_str_uminus(value) if specialized_instruction stack.change_by(+1) - iseq.push([:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)]) + iseq.push( + [:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)] + ) else putstring(value) send(:-@, 0, VM_CALL_ARGS_SIMPLE) @@ -1024,7 +1032,7 @@ def visit_args(node) end def visit_array(node) - if compiled = RubyVisitor.compile(node) + if (compiled = RubyVisitor.compile(node)) builder.duparray(compiled) else length = 0 @@ -1045,7 +1053,9 @@ def visit_array(node) end builder.newarray(length) if length > 0 - builder.concatarray if length > 0 && length != node.contents.parts.length + if length > 0 && length != node.contents.parts.length + builder.concatarray + end end end @@ -1134,7 +1144,7 @@ def visit_backref(node) end def visit_bare_assoc_hash(node) - if compiled = RubyVisitor.compile(node) + if (compiled = RubyVisitor.compile(node)) builder.duphash(compiled) else visit_all(node.assocs) @@ -1168,6 +1178,35 @@ def visit_binary(node) end end + def visit_block(node) + with_instruction_sequence( + :block, + "block in #{current_iseq.name}", + current_iseq, + node + ) do + visit(node.block_var) + visit(node.bodystmt) + builder.leave + end + end + + def visit_block_var(node) + params = node.params + + if params.requireds.length == 1 && params.optionals.empty? && + !params.rest && params.posts.empty? && params.keywords.empty? && + !params.keyword_rest && !params.block + current_iseq.argument_options[:ambiguous_param0] = true + end + + visit(node.params) + + node.locals.each do |local| + current_iseq.local_table.plain(local.value.to_sym) + end + end + def visit_blockarg(node) current_iseq.argument_options[:block_start] = current_iseq.argument_size current_iseq.local_table.block_proxy(node.name.value.to_sym) @@ -1184,12 +1223,14 @@ def visit_call(node) # First we're going to check if we're calling a method on an array # literal without any arguments. In that case there are some # specializations we might be able to perform. - if arg_parts.length == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) + if arg_parts.empty? && + (node.message.is_a?(Ident) || node.message.is_a?(Op)) case node.receiver when ArrayLiteral parts = node.receiver.contents&.parts || [] - if parts.none? { |part| part.is_a?(ArgStar) } && RubyVisitor.compile(node.receiver).nil? + if parts.none? { |part| part.is_a?(ArgStar) } && + RubyVisitor.compile(node.receiver).nil? case node.message.value when "max" visit(node.receiver.contents) @@ -1215,13 +1256,10 @@ def visit_call(node) end end - if node.receiver - visit(node.receiver) - else - builder.putself - end + node.receiver ? visit(node.receiver) : builder.putself visit(node.arguments) + block_iseq = visit(node.block) if node.respond_to?(:block) && node.block if arg_parts.last.is_a?(ArgBlock) flag = node.receiver.nil? ? VM_CALL_FCALL : 0 @@ -1235,7 +1273,12 @@ def visit_call(node) flag |= VM_CALL_KW_SPLAT end - builder.send(node.message.value.to_sym, arg_parts.length - 1, flag) + builder.send( + node.message.value.to_sym, + arg_parts.length - 1, + flag, + block_iseq + ) else flag = 0 arg_parts.each do |arg_part| @@ -1247,9 +1290,14 @@ def visit_call(node) end end - flag |= VM_CALL_ARGS_SIMPLE if flag == 0 + flag |= VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 flag |= VM_CALL_FCALL if node.receiver.nil? - builder.send(node.message.value.to_sym, arg_parts.length, flag) + builder.send( + node.message.value.to_sym, + arg_parts.length, + flag, + block_iseq + ) end end @@ -1291,11 +1339,12 @@ def visit_class(node) def visit_command(node) visit_call( - CallNode.new( + CommandCall.new( receiver: nil, operator: nil, message: node.message, arguments: node.arguments, + block: node.block, location: node.location ) ) @@ -1303,11 +1352,12 @@ def visit_command(node) def visit_command_call(node) visit_call( - CallNode.new( + CommandCall.new( receiver: node.receiver, operator: node.operator, message: node.message, arguments: node.arguments, + block: node.block, location: node.location ) ) @@ -1537,6 +1587,19 @@ def visit_label(node) builder.putobject(node.accept(RubyVisitor.new)) end + def visit_method_add_block(node) + visit_call( + CommandCall.new( + receiver: node.call.receiver, + operator: node.call.operator, + message: node.call.message, + arguments: node.call.arguments, + block: node.block, + location: node.location + ) + ) + end + def visit_module(node) name = node.constant.constant.value.to_sym module_iseq = @@ -1898,11 +1961,12 @@ def visit_unary(node) end visit_call( - CallNode.new( + CommandCall.new( receiver: node.statement, operator: nil, message: Ident.new(value: method_id, location: Location.default), arguments: nil, + block: nil, location: Location.default ) ) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index fdca6985..fe0bd1f6 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -370,7 +370,13 @@ class CompilerTest < Minitest::Test "class Foo::Bar < Baz; end", "class ::Foo::Bar < Baz; end", "class Foo; class Bar < Baz; end; end", - "class Foo < baz; end" + "class Foo < baz; end", + # Block + "foo do end", + "foo {}", + "foo do |bar| end", + "foo { |bar| }", + "foo { |bar; baz| }" ] # These are the combinations of instructions that we're going to test. From 0b2d012beddc2cb0ed6f4dbdca8486a8ce396b23 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 10:32:57 -0500 Subject: [PATCH 029/104] Handle args forwarding --- lib/syntax_tree/visitor/compiler.rb | 99 +++++++++++++++++------------ test/compiler_test.rb | 4 ++ 2 files changed, 63 insertions(+), 40 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 10c59a77..029d858a 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1218,13 +1218,26 @@ def visit_bodystmt(node) end def visit_call(node) + if node.is_a?(CallNode) + return visit_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: nil, + location: node.location + ) + ) + end + arg_parts = argument_parts(node.arguments) + argc = arg_parts.length # First we're going to check if we're calling a method on an array # literal without any arguments. In that case there are some # specializations we might be able to perform. - if arg_parts.empty? && - (node.message.is_a?(Ident) || node.message.is_a?(Op)) + if argc == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) case node.receiver when ArrayLiteral parts = node.receiver.contents&.parts || [] @@ -1257,48 +1270,39 @@ def visit_call(node) end node.receiver ? visit(node.receiver) : builder.putself - - visit(node.arguments) - block_iseq = visit(node.block) if node.respond_to?(:block) && node.block - - if arg_parts.last.is_a?(ArgBlock) - flag = node.receiver.nil? ? VM_CALL_FCALL : 0 - flag |= VM_CALL_ARGS_BLOCKARG - - if arg_parts.any? { |part| part.is_a?(ArgStar) } + flag = 0 + + arg_parts.each do |arg_part| + case arg_part + when ArgBlock + argc -= 1 + flag |= VM_CALL_ARGS_BLOCKARG + visit(arg_part) + when ArgStar flag |= VM_CALL_ARGS_SPLAT - end + visit(arg_part) + when ArgsForward + flag |= VM_CALL_ARGS_SPLAT | VM_CALL_ARGS_BLOCKARG - if arg_parts.any? { |part| part.is_a?(BareAssocHash) } + lookup = current_iseq.local_table.find(:*, 0) + builder.getlocal(lookup.index, lookup.level) + builder.splatarray(arg_parts.length != 1) + + lookup = current_iseq.local_table.find(:&, 0) + builder.getblockparamproxy(lookup.index, lookup.level) + when BareAssocHash flag |= VM_CALL_KW_SPLAT + visit(arg_part) + else + visit(arg_part) end + end - builder.send( - node.message.value.to_sym, - arg_parts.length - 1, - flag, - block_iseq - ) - else - flag = 0 - arg_parts.each do |arg_part| - case arg_part - when ArgStar - flag |= VM_CALL_ARGS_SPLAT - when BareAssocHash - flag |= VM_CALL_KW_SPLAT - end - end + block_iseq = visit(node.block) if node.block + flag |= VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 + flag |= VM_CALL_FCALL if node.receiver.nil? - flag |= VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 - flag |= VM_CALL_FCALL if node.receiver.nil? - builder.send( - node.message.value.to_sym, - arg_parts.length, - flag, - block_iseq - ) - end + builder.send(node.message.value.to_sym, argc, flag, block_iseq) end def visit_class(node) @@ -1781,7 +1785,18 @@ def visit_params(node) checkkeywords.each { |checkkeyword| checkkeyword[1] = lookup.index } end - visit(node.keyword_rest) if node.keyword_rest + if node.keyword_rest.is_a?(ArgsForward) + current_iseq.local_table.plain(:*) + current_iseq.local_table.plain(:&) + + current_iseq.argument_options[:rest_start] = current_iseq.argument_size + current_iseq.argument_options[:block_start] = current_iseq.argument_size + 1 + + current_iseq.argument_size += 2 + elsif node.keyword_rest + visit(node.keyword_rest) + end + visit(node.block) if node.block end @@ -2122,7 +2137,11 @@ def argument_parts(node) when Args node.parts when ArgParen - node.arguments.parts + if node.arguments.is_a?(ArgsForward) + [node.arguments] + else + node.arguments.parts + end when Paren node.contents.parts end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index fe0bd1f6..ec3766e2 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -354,6 +354,10 @@ class CompilerTest < Minitest::Test "def foo(bar: 1, baz: qux, **rest); end", "def foo(bar: qux, baz: 1, **rest); end", "def foo(bar: baz, qux: qaz, **rest); end", + "def foo(...); end", + "def foo(bar, ...); end", + "def foo(...); bar(...); end", + "def foo(bar, ...); baz(1, 2, 3, ...); end", # Class/module definitions "module Foo; end", "module ::Foo; end", From c59c58550b19186a7d8e61fd3b6bee3796760263 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 10:35:12 -0500 Subject: [PATCH 030/104] Handle until --- lib/syntax_tree/visitor/compiler.rb | 18 ++++++++++++++++++ test/compiler_test.rb | 3 +++ 2 files changed, 21 insertions(+) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 029d858a..1fe4365f 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1997,6 +1997,24 @@ def visit_undef(node) end end + def visit_until(node) + jumps = [] + + jumps << builder.jump(-1) + builder.putnil + builder.pop + jumps << builder.jump(-1) + + label = builder.label + visit(node.statements) + builder.pop + jumps.each { |jump| jump[1] = builder.label } + + visit(node.predicate) + builder.branchunless(label) + builder.putnil if last_statement? + end + def visit_var_field(node) case node.value when CVar, IVar diff --git a/test/compiler_test.rb b/test/compiler_test.rb index ec3766e2..9fd3cfe9 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -260,6 +260,9 @@ class CompilerTest < Minitest::Test "if foo then bar elsif baz then qux end", "foo if bar", "foo while bar", + "while foo do bar end", + "foo until bar", + "until foo do bar end", "for i in [1, 2, 3] do i end", "foo ? bar : baz", # Constructed values From 2f78d142b15a2aefae11d4dbf94961ad9c4fee28 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 10:38:49 -0500 Subject: [PATCH 031/104] Handle sclass --- lib/syntax_tree/visitor/compiler.rb | 13 +++++++++++++ test/compiler_test.rb | 2 ++ 2 files changed, 15 insertions(+) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 1fe4365f..d93021ef 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1876,6 +1876,19 @@ def visit_rest_param(node) current_iseq.argument_size += 1 end + def visit_sclass(node) + visit(node.target) + builder.putnil + + singleton_iseq = + with_instruction_sequence(:class, "singleton class", current_iseq, node) do + visit(node.bodystmt) + builder.leave + end + + builder.defineclass(:singletonclass, singleton_iseq, VM_DEFINECLASS_TYPE_SINGLETON_CLASS) + end + def visit_statements(node) statements = node.body.select do |statement| diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 9fd3cfe9..aba9cff5 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -378,6 +378,8 @@ class CompilerTest < Minitest::Test "class ::Foo::Bar < Baz; end", "class Foo; class Bar < Baz; end; end", "class Foo < baz; end", + "class << Object; end", + "class << ::String; end", # Block "foo do end", "foo {}", From 69bf4bcf447180e5bfc5daf20a282815cbbad307 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 10:44:00 -0500 Subject: [PATCH 032/104] Handle lambda --- lib/syntax_tree/visitor/compiler.rb | 16 ++++++++++++++++ test/compiler_test.rb | 7 ++++++- 2 files changed, 22 insertions(+), 1 deletion(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index d93021ef..e5807c0f 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1591,6 +1591,22 @@ def visit_label(node) builder.putobject(node.accept(RubyVisitor.new)) end + def visit_lambda(node) + lambda_iseq = + with_instruction_sequence(:block, "block in #{current_iseq.name}", current_iseq, node) do + visit(node.params) + visit(node.statements) + builder.leave + end + + builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) + builder.send(:lambda, 0, VM_CALL_FCALL, lambda_iseq) + end + + def visit_lambda_var(node) + visit_block_var(node) + end + def visit_method_add_block(node) visit_call( CommandCall.new( diff --git a/test/compiler_test.rb b/test/compiler_test.rb index aba9cff5..d0fed6fd 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -385,7 +385,12 @@ class CompilerTest < Minitest::Test "foo {}", "foo do |bar| end", "foo { |bar| }", - "foo { |bar; baz| }" + "foo { |bar; baz| }", + "-> do end", + "-> {}", + "-> (bar) do end", + "-> (bar) {}", + "-> (bar; baz) { }" ] # These are the combinations of instructions that we're going to test. From 5fa8a6f44800a638b14c886a0a11b7a2e9bd21bb Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 10:47:49 -0500 Subject: [PATCH 033/104] Handle unless --- lib/syntax_tree/visitor/compiler.rb | 24 ++++++++++++++++++++++++ test/compiler_test.rb | 3 +++ 2 files changed, 27 insertions(+) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index e5807c0f..fab538ae 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -2026,6 +2026,30 @@ def visit_undef(node) end end + def visit_unless(node) + visit(node.predicate) + branchunless = builder.branchunless(-1) + node.consequent ? visit(node.consequent) : builder.putnil + + if last_statement? + builder.leave + branchunless[1] = builder.label + + visit(node.statements) + else + builder.pop + + if node.consequent + jump = builder.jump(-1) + branchunless[1] = builder.label + visit(node.consequent) + jump[1] = builder.label + else + branchunless[1] = builder.label + end + end + end + def visit_until(node) jumps = [] diff --git a/test/compiler_test.rb b/test/compiler_test.rb index d0fed6fd..c9476b81 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -259,6 +259,9 @@ class CompilerTest < Minitest::Test "if foo then bar else baz end", "if foo then bar elsif baz then qux end", "foo if bar", + "unless foo then bar end", + "unless foo then bar else baz end", + "foo unless bar", "foo while bar", "while foo do bar end", "foo until bar", From 8e88b0d09fe32f759339fa6fc61d1efdf2e5d1cb Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 10:51:06 -0500 Subject: [PATCH 034/104] definesmethod --- lib/syntax_tree/visitor/compiler.rb | 16 ++++++++++++++-- test/compiler_test.rb | 2 ++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index fab538ae..c8e5b3e8 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -418,7 +418,7 @@ def serialize(insn) [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] when :defineclass [insn[0], insn[1], insn[2].to_a, insn[3]] - when :definemethod + when :definemethod, :definesmethod [insn[0], insn[1], insn[2].to_a] when :send # For any instructions that push instruction sequences onto the @@ -511,6 +511,11 @@ def definemethod(name, method_iseq) iseq.push([:definemethod, name, method_iseq]) end + def definesmethod(name, method_iseq) + stack.change_by(-1) + iseq.push([:definesmethod, name, method_iseq]) + end + def dup stack.change_by(-1 + 2) iseq.push([:dup]) @@ -1390,7 +1395,14 @@ def visit_def(node) end name = node.name.value.to_sym - builder.definemethod(name, method_iseq) + + if node.target + visit(node.target) + builder.definesmethod(name, method_iseq) + else + builder.definemethod(name, method_iseq) + end + builder.putobject(name) end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index c9476b81..af42fe0a 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -364,6 +364,8 @@ class CompilerTest < Minitest::Test "def foo(bar, ...); end", "def foo(...); bar(...); end", "def foo(bar, ...); baz(1, 2, 3, ...); end", + "def self.foo; end", + "def foo.bar(baz); end", # Class/module definitions "module Foo; end", "module ::Foo; end", From f8ac1227dad0fc0c54db01c38e916d4e0f47b109 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 10:57:28 -0500 Subject: [PATCH 035/104] getblockparam --- lib/syntax_tree/visitor/compiler.rb | 38 ++++++++++++++++++++--------- test/compiler_test.rb | 1 + 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index c8e5b3e8..df5e4838 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -209,7 +209,7 @@ def change_by(value) class LocalTable # A local representing a block passed into the current instruction # sequence. - class BlockProxyLocal + class BlockLocal attr_reader :name def initialize(name) @@ -260,9 +260,9 @@ def size locals.length end - # Add a BlockProxyLocal to the local table. - def block_proxy(name) - locals << BlockProxyLocal.new(name) unless has?(name) + # Add a BlockLocal to the local table. + def block(name) + locals << BlockLocal.new(name) unless has?(name) end # Add a PlainLocal to the local table. @@ -401,15 +401,15 @@ def to_a def serialize(insn) case insn[0] - when :checkkeyword, :getblockparamproxy, :getlocal_WC_0, - :getlocal_WC_1, :getlocal, :setlocal_WC_0, :setlocal_WC_1, - :setlocal + when :checkkeyword, :getblockparam, :getblockparamproxy, + :getlocal_WC_0, :getlocal_WC_1, :getlocal, + :setlocal_WC_0, :setlocal_WC_1, :setlocal iseq = self case insn[0] when :getlocal_WC_1, :setlocal_WC_1 iseq = iseq.parent_iseq - when :getblockparamproxy, :getlocal, :setlocal + when :getblockparam, :getblockparamproxy, :getlocal, :setlocal insn[2].times { iseq = iseq.parent_iseq } end @@ -536,6 +536,11 @@ def dupn(number) iseq.push([:dupn, number]) end + def getblockparam(index, level) + stack.change_by(+1) + iseq.push([:getblockparam, index, level]) + end + def getblockparamproxy(index, level) stack.change_by(+1) iseq.push([:getblockparamproxy, index, level]) @@ -1214,7 +1219,7 @@ def visit_block_var(node) def visit_blockarg(node) current_iseq.argument_options[:block_start] = current_iseq.argument_size - current_iseq.local_table.block_proxy(node.name.value.to_sym) + current_iseq.local_table.block(node.name.value.to_sym) current_iseq.argument_size += 1 end @@ -1274,7 +1279,16 @@ def visit_call(node) end end - node.receiver ? visit(node.receiver) : builder.putself + if node.receiver + if node.receiver.is_a?(VarRef) && (lookup = current_iseq.local_variable(node.receiver.value.value.to_sym)) && lookup.local.is_a?(LocalTable::BlockLocal) + builder.getblockparamproxy(lookup.index, lookup.level) + else + visit(node.receiver) + end + else + builder.putself + end + flag = 0 arg_parts.each do |arg_part| @@ -2105,8 +2119,8 @@ def visit_var_ref(node) lookup = current_iseq.local_variable(node.value.value.to_sym) case lookup.local - when LocalTable::BlockProxyLocal - builder.getblockparamproxy(lookup.index, lookup.level) + when LocalTable::BlockLocal + builder.getblockparam(lookup.index, lookup.level) when LocalTable::PlainLocal builder.getlocal(lookup.index, lookup.level) end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index af42fe0a..5f320ef2 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -342,6 +342,7 @@ class CompilerTest < Minitest::Test "def foo(bar, baz, *qux, quaz); end", "def foo(bar, baz, &qux); end", "def foo(bar, *baz, &qux); end", + "def foo(&qux); qux; end", "def foo(&qux); qux.call; end", "def foo(bar:); end", "def foo(bar:, baz:); end", From db88d3309390f061c62fa2896bd8d07d9ab81f61 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 11:17:51 -0500 Subject: [PATCH 036/104] Handle case/when --- lib/syntax_tree/visitor/compiler.rb | 47 +++++++++++++++++++++++++++++ test/compiler_test.rb | 2 ++ 2 files changed, 49 insertions(+) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index df5e4838..331a937e 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1324,6 +1324,49 @@ def visit_call(node) builder.send(node.message.value.to_sym, argc, flag, block_iseq) end + def visit_case(node) + visit(node.value) if node.value + + clauses = [] + else_clause = nil + + current = node.consequent + + while current + clauses << current + + if (current = current.consequent).is_a?(Else) + else_clause = current + break + end + end + + branches = + clauses.map do |clause| + visit(clause.arguments) + builder.topn(1) + builder.send(:===, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + [clause, builder.branchif(:label_00)] + end + + builder.pop + + if else_clause + visit(else_clause) + else + builder.putnil + end + + builder.leave + + branches.each_with_index do |(clause, branchif), index| + builder.leave if index != 0 + branchif[1] = builder.label + builder.pop + visit(clause) + end + end + def visit_class(node) name = node.constant.constant.value.to_sym class_iseq = @@ -2148,6 +2191,10 @@ def visit_vcall(node) builder.send(node.value.value.to_sym, 0, flag) end + def visit_when(node) + visit(node.statements) + end + def visit_while(node) jumps = [] diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 5f320ef2..8b95c07a 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -268,6 +268,8 @@ class CompilerTest < Minitest::Test "until foo do bar end", "for i in [1, 2, 3] do i end", "foo ? bar : baz", + "case foo when bar then 1 end", + "case foo when bar then 1 else 2 end", # Constructed values "foo..bar", "foo...bar", From a7e78259dccd31606e1e2078ae72c3a22ccf5634 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 11:41:39 -0500 Subject: [PATCH 037/104] Handle BEGIN{} and END{} --- lib/syntax_tree/visitor/compiler.rb | 70 +++++++++++++++++++++++------ 1 file changed, 56 insertions(+), 14 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 331a937e..8114a9c5 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -336,8 +336,6 @@ def local_variable(name, level = 0) lookup elsif parent_iseq parent_iseq.local_variable(name, level + 1) - else - raise "Unknown local variable: #{name}" end end @@ -388,7 +386,7 @@ def to_a name, "", "", - 1, + location.start_line, type, local_table.names, argument_options, @@ -424,6 +422,8 @@ def serialize(insn) # For any instructions that push instruction sequences onto the # stack, we need to call #to_a on them as well. [insn[0], insn[1], (insn[2].to_a if insn[2])] + when :once + [insn[0], insn[1].to_a, insn[2]] else insn end @@ -655,6 +655,11 @@ def objtostring(method_id, argc, flag) iseq.push([:objtostring, call_data(method_id, argc, flag)]) end + def once(postexe_iseq, inline_storage) + stack.change_by(+1) + iseq.push([:once, postexe_iseq, inline_storage]) + end + def opt_getconstant_path(names) if RUBY_VERSION >= "3.2" stack.change_by(+1) @@ -1002,6 +1007,10 @@ def initialize( @last_statement = false end + def visit_BEGIN(node) + visit(node.statements) + end + def visit_CHAR(node) if frozen_string_literal builder.putobject(node.value[1..]) @@ -1010,6 +1019,27 @@ def visit_CHAR(node) end end + def visit_END(node) + name = "block in #{current_iseq.name}" + once_iseq = + with_instruction_sequence(:block, name, current_iseq, node) do + postexe_iseq = + with_instruction_sequence(:block, name, current_iseq, node) do + *statements, last_statement = node.statements.body + visit_all(statements) + with_last_statement { visit(last_statement) } + builder.leave + end + + builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) + builder.send(:"core#set_postexe", 0, VM_CALL_FCALL, postexe_iseq) + builder.leave + end + + builder.once(once_iseq, current_iseq.inline_storage) + builder.pop + end + def visit_alias(node) builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) @@ -1898,17 +1928,23 @@ def visit_program(node) end end - statements = - node.statements.body.select do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - false - else - true - end + preexes = [] + statements = [] + + node.statements.body.each do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + # ignore + when BEGINBlock + preexes << statement + else + statements << statement end + end with_instruction_sequence(:top, "", nil, node) do + visit_all(preexes) + if statements.empty? builder.putnil else @@ -2144,8 +2180,13 @@ def visit_var_field(node) current_iseq.inline_storage_for(name) when Ident name = node.value.value.to_sym - current_iseq.local_table.plain(name) - current_iseq.local_variable(name) + + if (local_variable = current_iseq.local_variable(name)) + local_variable + else + current_iseq.local_table.plain(name) + current_iseq.local_variable(name) + end end end @@ -2460,12 +2501,13 @@ def with_instruction_sequence(type, name, parent_iseq, node) # last statement of a scope and allow visit methods to query that # information. def with_last_statement + previous = @last_statement @last_statement = true begin yield ensure - @last_statement = false + @last_statement = previous end end From b022d5617560f57cc23a367946a4722b5dd6a9fd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 11:50:43 -0500 Subject: [PATCH 038/104] Handle mrhs, mlhs, massign --- lib/syntax_tree/visitor/compiler.rb | 41 +++++++++++++++++++++++++++++ test/compiler_test.rb | 6 +++++ 2 files changed, 47 insertions(+) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 8114a9c5..c09efe84 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -94,6 +94,10 @@ def visit_label(node) node.value.chomp(":").to_sym end + def visit_mrhs(node) + visit_all(node.parts) + end + def visit_qsymbols(node) node.elements.map { |element| visit(element).to_sym } end @@ -536,6 +540,11 @@ def dupn(number) iseq.push([:dupn, number]) end + def expandarray(length, flag) + stack.change_by(-1 + length) + iseq.push([:expandarray, length, flag]) + end + def getblockparam(index, level) stack.change_by(+1) iseq.push([:getblockparam, index, level]) @@ -1706,6 +1715,12 @@ def visit_lambda_var(node) visit_block_var(node) end + def visit_massign(node) + visit(node.value) + builder.dup + visit(node.target) + end + def visit_method_add_block(node) visit_call( CommandCall.new( @@ -1719,6 +1734,23 @@ def visit_method_add_block(node) ) end + def visit_mlhs(node) + lookups = [] + + node.parts.each do |part| + case part + when VarField + lookups << visit(part) + end + end + + builder.expandarray(lookups.length, 0) + + lookups.each do |lookup| + builder.setlocal(lookup.index, lookup.level) + end + end + def visit_module(node) name = node.constant.constant.value.to_sym module_iseq = @@ -1749,6 +1781,15 @@ def visit_module(node) builder.defineclass(name, module_iseq, flags) end + def visit_mrhs(node) + if (compiled = RubyVisitor.compile(node)) + builder.duparray(compiled) + else + visit_all(node.parts) + builder.newarray(node.parts.length) + end + end + def visit_not(node) visit(node.statement) builder.send(:!, 0, VM_CALL_ARGS_SIMPLE) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 8b95c07a..8868b801 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -130,6 +130,12 @@ class CompilerTest < Minitest::Test "foo ||= 1", "foo <<= 1", "foo ^= 1", + "foo, bar = 1, 2", + "foo, bar, = 1, 2", + "foo, bar, baz = 1, 2", + "foo, bar = 1, 2, 3", + "foo = 1, 2, 3", + "foo, * = 1, 2, 3", # Instance variables "@foo", "@foo = 1", From 358d029abd6bfa9bafee78154f89a03c95999d04 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 11:55:54 -0500 Subject: [PATCH 039/104] Better handle visit_string_parts --- lib/syntax_tree/visitor/compiler.rb | 44 ++++++++++++++--------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index c09efe84..8b42613c 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -1636,8 +1636,8 @@ def visit_heredoc(node) elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) visit(node.parts.first) else - visit_string_parts(node) - builder.concatstrings(node.parts.length) + length = visit_string_parts(node) + builder.concatstrings(length) end end @@ -2026,10 +2026,9 @@ def visit_rational(node) def visit_regexp_literal(node) builder.putobject(node.accept(RubyVisitor.new)) rescue RubyVisitor::CompilationError - visit_string_parts(node) - flags = RubyVisitor.new.visit_regexp_literal_flags(node) - builder.toregexp(flags, node.parts.length) + length = visit_string_parts(node) + builder.toregexp(flags, length) end def visit_rest_param(node) @@ -2086,8 +2085,8 @@ def visit_string_literal(node) if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) visit(node.parts.first) else - visit_string_parts(node) - builder.concatstrings(node.parts.length) + length = visit_string_parts(node) + builder.concatstrings(length) end end @@ -2114,13 +2113,7 @@ def visit_symbols(node) element.parts.first.is_a?(TStringContent) builder.putobject(element.parts.first.value.to_sym) else - length = element.parts.length - unless element.parts.first.is_a?(TStringContent) - builder.putobject("") - length += 1 - end - - visit_string_parts(element) + length = visit_string_parts(element) builder.concatstrings(length) builder.intern end @@ -2299,13 +2292,7 @@ def visit_word(node) if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) visit(node.parts.first) else - length = node.parts.length - unless node.parts.first.is_a?(TStringContent) - builder.putobject("") - length += 1 - end - - visit_string_parts(node) + length = visit_string_parts(node) builder.concatstrings(length) end end @@ -2330,8 +2317,8 @@ def visit_words(node) def visit_xstring_literal(node) builder.putself - visit_string_parts(node) - builder.concatstrings(node.parts.length) if node.parts.length > 1 + length = visit_string_parts(node) + builder.concatstrings(node.parts.length) if length > 1 builder.send(:`, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) end @@ -2493,6 +2480,13 @@ def push_interpolate # heredocs, etc. This method will visit all the parts of a string within # those containers. def visit_string_parts(node) + length = 0 + + unless node.parts.first.is_a?(TStringContent) + builder.putobject("") + length += 1 + end + node.parts.each do |part| case part when StringDVar @@ -2504,7 +2498,11 @@ def visit_string_parts(node) when TStringContent builder.putobject(part.accept(RubyVisitor.new)) end + + length += 1 end + + length end # The current instruction sequence that we're compiling is always stored From 593486dff299c01b39bf16a0ad4cd40a9147e45f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 11:59:56 -0500 Subject: [PATCH 040/104] Handle the &. operator --- lib/syntax_tree/visitor/compiler.rb | 82 ++++++++++++++++++++--------- test/compiler_test.rb | 4 ++ 2 files changed, 60 insertions(+), 26 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index 8b42613c..bac8b914 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -404,8 +404,8 @@ def to_a def serialize(insn) case insn[0] when :checkkeyword, :getblockparam, :getblockparamproxy, - :getlocal_WC_0, :getlocal_WC_1, :getlocal, - :setlocal_WC_0, :setlocal_WC_1, :setlocal + :getlocal_WC_0, :getlocal_WC_1, :getlocal, :setlocal_WC_0, + :setlocal_WC_1, :setlocal iseq = self case insn[0] @@ -480,6 +480,11 @@ def branchif(index) iseq.push([:branchif, index]) end + def branchnil(index) + stack.change_by(-1) + iseq.push([:branchnil, index]) + end + def branchunless(index) stack.change_by(-1) iseq.push([:branchunless, index]) @@ -1268,14 +1273,16 @@ def visit_bodystmt(node) def visit_call(node) if node.is_a?(CallNode) - return visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: nil, - location: node.location + return( + visit_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: nil, + location: node.location + ) ) ) end @@ -1319,7 +1326,11 @@ def visit_call(node) end if node.receiver - if node.receiver.is_a?(VarRef) && (lookup = current_iseq.local_variable(node.receiver.value.value.to_sym)) && lookup.local.is_a?(LocalTable::BlockLocal) + if node.receiver.is_a?(VarRef) && + ( + lookup = + current_iseq.local_variable(node.receiver.value.value.to_sym) + ) && lookup.local.is_a?(LocalTable::BlockLocal) builder.getblockparamproxy(lookup.index, lookup.level) else visit(node.receiver) @@ -1328,6 +1339,12 @@ def visit_call(node) builder.putself end + branchnil = + if node.operator&.value == "&." + builder.dup + builder.branchnil(-1) + end + flag = 0 arg_parts.each do |arg_part| @@ -1361,6 +1378,7 @@ def visit_call(node) flag |= VM_CALL_FCALL if node.receiver.nil? builder.send(node.message.value.to_sym, argc, flag, block_iseq) + branchnil[1] = builder.label if branchnil end def visit_case(node) @@ -1390,11 +1408,7 @@ def visit_case(node) builder.pop - if else_clause - visit(else_clause) - else - builder.putnil - end + else_clause ? visit(else_clause) : builder.putnil builder.leave @@ -1701,7 +1715,12 @@ def visit_label(node) def visit_lambda(node) lambda_iseq = - with_instruction_sequence(:block, "block in #{current_iseq.name}", current_iseq, node) do + with_instruction_sequence( + :block, + "block in #{current_iseq.name}", + current_iseq, + node + ) do visit(node.params) visit(node.statements) builder.leave @@ -1746,9 +1765,7 @@ def visit_mlhs(node) builder.expandarray(lookups.length, 0) - lookups.each do |lookup| - builder.setlocal(lookup.index, lookup.level) - end + lookups.each { |lookup| builder.setlocal(lookup.index, lookup.level) } end def visit_module(node) @@ -1944,10 +1961,14 @@ def visit_params(node) if node.keyword_rest.is_a?(ArgsForward) current_iseq.local_table.plain(:*) current_iseq.local_table.plain(:&) - - current_iseq.argument_options[:rest_start] = current_iseq.argument_size - current_iseq.argument_options[:block_start] = current_iseq.argument_size + 1 - + + current_iseq.argument_options[ + :rest_start + ] = current_iseq.argument_size + current_iseq.argument_options[ + :block_start + ] = current_iseq.argument_size + 1 + current_iseq.argument_size += 2 elsif node.keyword_rest visit(node.keyword_rest) @@ -2042,12 +2063,21 @@ def visit_sclass(node) builder.putnil singleton_iseq = - with_instruction_sequence(:class, "singleton class", current_iseq, node) do + with_instruction_sequence( + :class, + "singleton class", + current_iseq, + node + ) do visit(node.bodystmt) builder.leave end - builder.defineclass(:singletonclass, singleton_iseq, VM_DEFINECLASS_TYPE_SINGLETON_CLASS) + builder.defineclass( + :singletonclass, + singleton_iseq, + VM_DEFINECLASS_TYPE_SINGLETON_CLASS + ) end def visit_statements(node) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 8868b801..7afd920e 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -259,6 +259,10 @@ class CompilerTest < Minitest::Test "Foo::Bar.baz = 1", "::Foo::Bar.baz = 1", # Control flow + "foo&.bar", + "foo&.bar(1)", + "foo&.bar 1, 2, 3", + "foo&.bar {}", "foo && bar", "foo || bar", "if foo then bar end", From 7c58e9204e12c84f17825175fa65bc67f3489bd0 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 13:48:11 -0500 Subject: [PATCH 041/104] Test evaluation --- test/compiler_test.rb | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 7afd920e..632b3e55 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -2,9 +2,17 @@ return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.1" require_relative "test_helper" +require "fiddle" module SyntaxTree class CompilerTest < Minitest::Test + ISEQ_LOAD = + Fiddle::Function.new( + Fiddle::Handle::DEFAULT["rb_iseq_load"], + [Fiddle::TYPE_VOIDP] * 3, + Fiddle::TYPE_VOIDP + ) + CASES = [ # Various literals placed on the stack "true", @@ -430,6 +438,11 @@ class CompilerTest < Minitest::Test end end + def test_evaluation + assert_evaluates 5, "2 + 3" + assert_evaluates 5, "a = 2; b = 3; a + b" + end + private def serialize_iseq(iseq) @@ -463,5 +476,17 @@ def assert_compiles(source, **options) serialize_iseq(program.accept(Visitor::Compiler.new(**options))) ) end + + def assert_evaluates(expected, source, **options) + program = SyntaxTree.parse(source) + compiled = program.accept(Visitor::Compiler.new(**options)).to_a + + # Temporary hack until we get these working. + compiled[4][:node_id] = 11 + compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] + + iseq = Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)) + assert_equal expected, iseq.eval + end end end From af8c5203f92e5b8f45ba07c60690aa43ad17e7f4 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 14:04:42 -0500 Subject: [PATCH 042/104] Handle tracepoint events except line --- lib/syntax_tree/visitor/compiler.rb | 31 ++++++++++++++++++++++++++--- test/compiler_test.rb | 21 +++++++++++-------- 2 files changed, 41 insertions(+), 11 deletions(-) diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb index bac8b914..82155d37 100644 --- a/lib/syntax_tree/visitor/compiler.rb +++ b/lib/syntax_tree/visitor/compiler.rb @@ -285,6 +285,8 @@ def offset(index) # list of instructions along with the metadata pertaining to them. It also # functions as a builder for the instruction sequence. class InstructionSequence + MAGIC = "YARVInstructionSequence/SimpleDataFormat" + # The type of the instruction sequence. attr_reader :type @@ -363,7 +365,9 @@ def inline_storage_for(name) end def length - insns.sum(&:length) + insns.inject(0) do |sum, insn| + insn.is_a?(Array) ? sum + insn.length : sum + end end def each_child @@ -378,7 +382,7 @@ def to_a versions = RUBY_VERSION.split(".").map(&:to_i) [ - "YARVInstructionSequence/SimpleDataFormat", + MAGIC, versions[0], versions[1], 1, @@ -462,7 +466,13 @@ def initialize( # This creates a new label at the current length of the instruction # sequence. It is used as the operand for jump instructions. def label - :"label_#{iseq.length}" + name = :"label_#{iseq.length}" + iseq.insns.last == name ? name : event(name) + end + + def event(name) + iseq.push(name) + name end def adjuststack(number) @@ -1239,8 +1249,10 @@ def visit_block(node) current_iseq, node ) do + builder.event(:RUBY_EVENT_B_CALL) visit(node.block_var) visit(node.bodystmt) + builder.event(:RUBY_EVENT_B_RETURN) builder.leave end end @@ -1429,7 +1441,9 @@ def visit_class(node) current_iseq, node ) do + builder.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) + builder.event(:RUBY_EVENT_END) builder.leave end @@ -1500,7 +1514,9 @@ def visit_def(node) node ) do visit(node.params) if node.params + builder.event(:RUBY_EVENT_CALL) visit(node.bodystmt) + builder.event(:RUBY_EVENT_RETURN) builder.leave end @@ -1628,9 +1644,12 @@ def visit_for(node) local_variable = current_iseq.local_variable(name) builder.setlocal(local_variable.index, local_variable.level) + + builder.event(:RUBY_EVENT_B_CALL) builder.nop visit(node.statements) + builder.event(:RUBY_EVENT_B_RETURN) builder.leave end @@ -1721,8 +1740,10 @@ def visit_lambda(node) current_iseq, node ) do + builder.event(:RUBY_EVENT_B_CALL) visit(node.params) visit(node.statements) + builder.event(:RUBY_EVENT_B_RETURN) builder.leave end @@ -1777,7 +1798,9 @@ def visit_module(node) current_iseq, node ) do + builder.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) + builder.event(:RUBY_EVENT_END) builder.leave end @@ -2069,7 +2092,9 @@ def visit_sclass(node) current_iseq, node ) do + builder.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) + builder.event(:RUBY_EVENT_END) builder.leave end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 632b3e55..cf0667bb 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -453,15 +453,20 @@ def serialize_iseq(iseq) serialized[4].delete(:node_ids) serialized[13] = serialized[13].filter_map do |insn| - next unless insn.is_a?(Array) - - insn.map do |operand| - if operand.is_a?(Array) && - operand[0] == "YARVInstructionSequence/SimpleDataFormat" - serialize_iseq(operand) - else - operand + case insn + when Array + insn.map do |operand| + if operand.is_a?(Array) && + operand[0] == Visitor::Compiler::InstructionSequence::MAGIC + serialize_iseq(operand) + else + operand + end end + when Integer, :RUBY_EVENT_LINE + # ignore these for now + else + insn end end From f40ae12519f52b32a78dd60e87fe69e4f3fa12ce Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 20:00:55 -0500 Subject: [PATCH 043/104] Move compiler to its own file --- lib/syntax_tree.rb | 4 +- lib/syntax_tree/compiler.rb | 2737 +++++++++++++++++++++++++++ lib/syntax_tree/visitor/compiler.rb | 2719 -------------------------- test/compiler_test.rb | 21 +- 4 files changed, 2743 insertions(+), 2738 deletions(-) create mode 100644 lib/syntax_tree/compiler.rb delete mode 100644 lib/syntax_tree/visitor/compiler.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index aea21d8e..c62132e6 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -1,6 +1,7 @@ # frozen_string_literal: true require "etc" +require "fiddle" require "json" require "pp" require "prettier_print" @@ -13,7 +14,6 @@ require_relative "syntax_tree/basic_visitor" require_relative "syntax_tree/visitor" -require_relative "syntax_tree/visitor/compiler" require_relative "syntax_tree/visitor/field_visitor" require_relative "syntax_tree/visitor/json_visitor" require_relative "syntax_tree/visitor/match_visitor" @@ -26,6 +26,8 @@ require_relative "syntax_tree/pattern" require_relative "syntax_tree/search" +require_relative "syntax_tree/compiler" + # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the # tools necessary to inspect and manipulate that syntax tree. It can be used to diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb new file mode 100644 index 00000000..d9b7e787 --- /dev/null +++ b/lib/syntax_tree/compiler.rb @@ -0,0 +1,2737 @@ +# frozen_string_literal: true + +module SyntaxTree + # This class is an experiment in transforming Syntax Tree nodes into their + # corresponding YARV instruction sequences. It attempts to mirror the + # behavior of RubyVM::InstructionSequence.compile. + # + # You use this as with any other visitor. First you parse code into a tree, + # then you visit it with this compiler. Visiting the root node of the tree + # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. + # With that object you can call #to_a on it, which will return a serialized + # form of the instruction sequence as an array. This array _should_ mirror + # the array given by RubyVM::InstructionSequence#to_a. + # + # As an example, here is how you would compile a single expression: + # + # program = SyntaxTree.parse("1 + 2") + # program.accept(SyntaxTree::Visitor::Compiler.new).to_a + # + # [ + # "YARVInstructionSequence/SimpleDataFormat", + # 3, + # 1, + # 1, + # {:arg_size=>0, :local_size=>0, :stack_max=>2}, + # "", + # "", + # "", + # 1, + # :top, + # [], + # {}, + # [], + # [ + # [:putobject_INT2FIX_1_], + # [:putobject, 2], + # [:opt_plus, {:mid=>:+, :flag=>16, :orig_argc=>1}], + # [:leave] + # ] + # ] + # + # Note that this is the same output as calling: + # + # RubyVM::InstructionSequence.compile("1 + 2").to_a + # + class Compiler < BasicVisitor + # This visitor is responsible for converting Syntax Tree nodes into their + # corresponding Ruby structures. This is used to convert the operands of + # some instructions like putobject that push a Ruby object directly onto + # the stack. It is only used when the entire structure can be represented + # at compile-time, as opposed to constructed at run-time. + class RubyVisitor < BasicVisitor + # This error is raised whenever a node cannot be converted into a Ruby + # object at compile-time. + class CompilationError < StandardError + end + + # This will attempt to compile the given node. If it's possible, then + # it will return the compiled object. Otherwise it will return nil. + def self.compile(node) + node.accept(new) + rescue CompilationError + end + + def visit_array(node) + visit_all(node.contents.parts) + end + + def visit_bare_assoc_hash(node) + node.assocs.to_h do |assoc| + # We can only convert regular key-value pairs. A double splat ** + # operator means it has to be converted at run-time. + raise CompilationError unless assoc.is_a?(Assoc) + [visit(assoc.key), visit(assoc.value)] + end + end + + def visit_float(node) + node.value.to_f + end + + alias visit_hash visit_bare_assoc_hash + + def visit_imaginary(node) + node.value.to_c + end + + def visit_int(node) + node.value.to_i + end + + def visit_label(node) + node.value.chomp(":").to_sym + end + + def visit_mrhs(node) + visit_all(node.parts) + end + + def visit_qsymbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_qwords(node) + visit_all(node.elements) + end + + def visit_range(node) + left, right = [visit(node.left), visit(node.right)] + node.operator.value === ".." ? left..right : left...right + end + + def visit_rational(node) + node.value.to_r + end + + def visit_regexp_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) + else + # Any interpolation of expressions or variables will result in the + # regular expression being constructed at run-time. + raise CompilationError + end + end + + # This isn't actually a visit method, though maybe it should be. It is + # responsible for converting the set of string options on a regular + # expression into its equivalent integer. + def visit_regexp_literal_flags(node) + node + .options + .chars + .inject(0) do |accum, option| + accum | + case option + when "i" + Regexp::IGNORECASE + when "x" + Regexp::EXTENDED + when "m" + Regexp::MULTILINE + else + raise "Unknown regexp option: #{option}" + end + end + end + + def visit_symbol_literal(node) + node.value.value.to_sym + end + + def visit_symbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_tstring_content(node) + node.value + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + node.parts.first.value + else + # Any interpolation of expressions or variables will result in the + # string being constructed at run-time. + raise CompilationError + end + end + + def visit_words(node) + visit_all(node.elements) + end + + def visit_unsupported(_node) + raise CompilationError + end + + # Please forgive the metaprogramming here. This is used to create visit + # methods for every node that we did not explicitly handle. By default + # each of these methods will raise a CompilationError. + handled = instance_methods(false) + (Visitor.instance_methods(false) - handled).each do |method| + alias_method method, :visit_unsupported + end + end + + # This object is used to track the size of the stack at any given time. It + # is effectively a mini symbolic interpreter. It's necessary because when + # instruction sequences get serialized they include a :stack_max field on + # them. This field is used to determine how much stack space to allocate + # for the instruction sequence. + class Stack + attr_reader :current_size, :maximum_size + + def initialize + @current_size = 0 + @maximum_size = 0 + end + + def change_by(value) + @current_size += value + @maximum_size = @current_size if @current_size > @maximum_size + end + end + + # This represents every local variable associated with an instruction + # sequence. There are two kinds of locals: plain locals that are what you + # expect, and block proxy locals, which represent local variables + # associated with blocks that were passed into the current instruction + # sequence. + class LocalTable + # A local representing a block passed into the current instruction + # sequence. + class BlockLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # A regular local variable. + class PlainLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # The result of looking up a local variable in the current local table. + class Lookup + attr_reader :local, :index, :level + + def initialize(local, index, level) + @local = local + @index = index + @level = level + end + end + + attr_reader :locals + + def initialize + @locals = [] + end + + def find(name, level) + index = locals.index { |local| local.name == name } + Lookup.new(locals[index], index, level) if index + end + + def has?(name) + locals.any? { |local| local.name == name } + end + + def names + locals.map(&:name) + end + + def size + locals.length + end + + # Add a BlockLocal to the local table. + def block(name) + locals << BlockLocal.new(name) unless has?(name) + end + + # Add a PlainLocal to the local table. + def plain(name) + locals << PlainLocal.new(name) unless has?(name) + end + + # This is the offset from the top of the stack where this local variable + # lives. + def offset(index) + size - (index - 3) - 1 + end + end + + # This class is meant to mirror RubyVM::InstructionSequence. It contains a + # list of instructions along with the metadata pertaining to them. It also + # functions as a builder for the instruction sequence. + class InstructionSequence + MAGIC = "YARVInstructionSequence/SimpleDataFormat" + + # This provides a handle to the rb_iseq_load function, which allows you to + # pass a serialized iseq to Ruby and have it return a + # RubyVM::InstructionSequence object. + ISEQ_LOAD = + Fiddle::Function.new( + Fiddle::Handle::DEFAULT["rb_iseq_load"], + [Fiddle::TYPE_VOIDP] * 3, + Fiddle::TYPE_VOIDP + ) + + # The type of the instruction sequence. + attr_reader :type + + # The name of the instruction sequence. + attr_reader :name + + # The parent instruction sequence, if there is one. + attr_reader :parent_iseq + + # The location of the root node of this instruction sequence. + attr_reader :location + + # This is the list of information about the arguments to this + # instruction sequence. + attr_accessor :argument_size + attr_reader :argument_options + + # The list of instructions for this instruction sequence. + attr_reader :insns + + # The table of local variables. + attr_reader :local_table + + # The hash of names of instance and class variables pointing to the + # index of their associated inline storage. + attr_reader :inline_storages + + # The index of the next inline storage that will be created. + attr_reader :storage_index + + # An object that will track the current size of the stack and the + # maximum size of the stack for this instruction sequence. + attr_reader :stack + + def initialize(type, name, parent_iseq, location) + @type = type + @name = name + @parent_iseq = parent_iseq + @location = location + + @argument_size = 0 + @argument_options = {} + + @local_table = LocalTable.new + @inline_storages = {} + @insns = [] + @storage_index = 0 + @stack = Stack.new + end + + def local_variable(name, level = 0) + if (lookup = local_table.find(name, level)) + lookup + elsif parent_iseq + parent_iseq.local_variable(name, level + 1) + end + end + + def push(insn) + insns << insn + insn + end + + def inline_storage + storage = storage_index + @storage_index += 1 + storage + end + + def inline_storage_for(name) + unless inline_storages.key?(name) + inline_storages[name] = inline_storage + end + + inline_storages[name] + end + + def length + insns.inject(0) do |sum, insn| + insn.is_a?(Array) ? sum + insn.length : sum + end + end + + def each_child + insns.each do |insn| + insn[1..].each do |operand| + yield operand if operand.is_a?(InstructionSequence) + end + end + end + + def eval + compiled = to_a + + # Temporary hack until we get these working. + compiled[4][:node_id] = 11 + compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] + + Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval + end + + def to_a + versions = RUBY_VERSION.split(".").map(&:to_i) + + [ + MAGIC, + versions[0], + versions[1], + 1, + { + arg_size: argument_size, + local_size: local_table.size, + stack_max: stack.maximum_size + }, + name, + "", + "", + location.start_line, + type, + local_table.names, + argument_options, + [], + insns.map { |insn| serialize(insn) } + ] + end + + private + + def serialize(insn) + case insn[0] + when :checkkeyword, :getblockparam, :getblockparamproxy, + :getlocal_WC_0, :getlocal_WC_1, :getlocal, :setlocal_WC_0, + :setlocal_WC_1, :setlocal + iseq = self + + case insn[0] + when :getlocal_WC_1, :setlocal_WC_1 + iseq = iseq.parent_iseq + when :getblockparam, :getblockparamproxy, :getlocal, :setlocal + insn[2].times { iseq = iseq.parent_iseq } + end + + # Here we need to map the local variable index to the offset + # from the top of the stack where it will be stored. + [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] + when :defineclass + [insn[0], insn[1], insn[2].to_a, insn[3]] + when :definemethod, :definesmethod + [insn[0], insn[1], insn[2].to_a] + when :send + # For any instructions that push instruction sequences onto the + # stack, we need to call #to_a on them as well. + [insn[0], insn[1], (insn[2].to_a if insn[2])] + when :once + [insn[0], insn[1].to_a, insn[2]] + else + insn + end + end + end + + # This class serves as a layer of indirection between the instruction + # sequence and the compiler. It allows us to provide different behavior + # for certain instructions depending on the Ruby version. For example, + # class variable reads and writes gained an inline cache in Ruby 3.0. So + # we place the logic for checking the Ruby version in this class. + class Builder + attr_reader :iseq, :stack + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + def initialize( + iseq, + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) + @iseq = iseq + @stack = iseq.stack + + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction + end + + # This creates a new label at the current length of the instruction + # sequence. It is used as the operand for jump instructions. + def label + name = :"label_#{iseq.length}" + iseq.insns.last == name ? name : event(name) + end + + def event(name) + iseq.push(name) + name + end + + def adjuststack(number) + stack.change_by(-number) + iseq.push([:adjuststack, number]) + end + + def anytostring + stack.change_by(-2 + 1) + iseq.push([:anytostring]) + end + + def branchif(index) + stack.change_by(-1) + iseq.push([:branchif, index]) + end + + def branchnil(index) + stack.change_by(-1) + iseq.push([:branchnil, index]) + end + + def branchunless(index) + stack.change_by(-1) + iseq.push([:branchunless, index]) + end + + def checkkeyword(index, keyword_index) + stack.change_by(+1) + iseq.push([:checkkeyword, index, keyword_index]) + end + + def concatarray + stack.change_by(-2 + 1) + iseq.push([:concatarray]) + end + + def concatstrings(number) + stack.change_by(-number + 1) + iseq.push([:concatstrings, number]) + end + + def defined(type, name, message) + stack.change_by(-1 + 1) + iseq.push([:defined, type, name, message]) + end + + def defineclass(name, class_iseq, flags) + stack.change_by(-2 + 1) + iseq.push([:defineclass, name, class_iseq, flags]) + end + + def definemethod(name, method_iseq) + stack.change_by(0) + iseq.push([:definemethod, name, method_iseq]) + end + + def definesmethod(name, method_iseq) + stack.change_by(-1) + iseq.push([:definesmethod, name, method_iseq]) + end + + def dup + stack.change_by(-1 + 2) + iseq.push([:dup]) + end + + def duparray(object) + stack.change_by(+1) + iseq.push([:duparray, object]) + end + + def duphash(object) + stack.change_by(+1) + iseq.push([:duphash, object]) + end + + def dupn(number) + stack.change_by(+number) + iseq.push([:dupn, number]) + end + + def expandarray(length, flag) + stack.change_by(-1 + length) + iseq.push([:expandarray, length, flag]) + end + + def getblockparam(index, level) + stack.change_by(+1) + iseq.push([:getblockparam, index, level]) + end + + def getblockparamproxy(index, level) + stack.change_by(+1) + iseq.push([:getblockparamproxy, index, level]) + end + + def getclassvariable(name) + stack.change_by(+1) + + if RUBY_VERSION >= "3.0" + iseq.push([:getclassvariable, name, iseq.inline_storage_for(name)]) + else + iseq.push([:getclassvariable, name]) + end + end + + def getconstant(name) + stack.change_by(-2 + 1) + iseq.push([:getconstant, name]) + end + + def getglobal(name) + stack.change_by(+1) + iseq.push([:getglobal, name]) + end + + def getinstancevariable(name) + stack.change_by(+1) + + if RUBY_VERSION >= "3.2" + iseq.push([:getinstancevariable, name, iseq.inline_storage]) + else + inline_storage = iseq.inline_storage_for(name) + iseq.push([:getinstancevariable, name, inline_storage]) + end + end + + def getlocal(index, level) + stack.change_by(+1) + + if operands_unification + # Specialize the getlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will look at the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + iseq.push([:getlocal_WC_0, index]) + when 1 + iseq.push([:getlocal_WC_1, index]) + else + iseq.push([:getlocal, index, level]) + end + else + iseq.push([:getlocal, index, level]) + end + end + + def getspecial(key, type) + stack.change_by(-0 + 1) + iseq.push([:getspecial, key, type]) + end + + def intern + stack.change_by(-1 + 1) + iseq.push([:intern]) + end + + def invokeblock(method_id, argc, flag) + stack.change_by(-argc + 1) + iseq.push([:invokeblock, call_data(method_id, argc, flag)]) + end + + def invokesuper(method_id, argc, flag, block_iseq) + stack.change_by(-(argc + 1) + 1) + + cdata = call_data(method_id, argc, flag) + iseq.push([:invokesuper, cdata, block_iseq]) + end + + def jump(index) + stack.change_by(0) + iseq.push([:jump, index]) + end + + def leave + stack.change_by(-1) + iseq.push([:leave]) + end + + def newarray(length) + stack.change_by(-length + 1) + iseq.push([:newarray, length]) + end + + def newhash(length) + stack.change_by(-length + 1) + iseq.push([:newhash, length]) + end + + def newrange(flag) + stack.change_by(-2 + 1) + iseq.push([:newrange, flag]) + end + + def nop + stack.change_by(0) + iseq.push([:nop]) + end + + def objtostring(method_id, argc, flag) + stack.change_by(-1 + 1) + iseq.push([:objtostring, call_data(method_id, argc, flag)]) + end + + def once(postexe_iseq, inline_storage) + stack.change_by(+1) + iseq.push([:once, postexe_iseq, inline_storage]) + end + + def opt_getconstant_path(names) + if RUBY_VERSION >= "3.2" + stack.change_by(+1) + iseq.push([:opt_getconstant_path, names]) + else + inline_storage = iseq.inline_storage + getinlinecache = opt_getinlinecache(-1, inline_storage) + + if names[0] == :"" + names.shift + pop + putobject(Object) + end + + names.each_with_index do |name, index| + putobject(index == 0) + getconstant(name) + end + + opt_setinlinecache(inline_storage) + getinlinecache[1] = label + end + end + + def opt_getinlinecache(offset, inline_storage) + stack.change_by(+1) + iseq.push([:opt_getinlinecache, offset, inline_storage]) + end + + def opt_newarray_max(length) + if specialized_instruction + stack.change_by(-length + 1) + iseq.push([:opt_newarray_max, length]) + else + newarray(length) + send(:max, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def opt_newarray_min(length) + if specialized_instruction + stack.change_by(-length + 1) + iseq.push([:opt_newarray_min, length]) + else + newarray(length) + send(:min, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def opt_setinlinecache(inline_storage) + stack.change_by(-1 + 1) + iseq.push([:opt_setinlinecache, inline_storage]) + end + + def opt_str_freeze(value) + if specialized_instruction + stack.change_by(+1) + iseq.push( + [ + :opt_str_freeze, + value, + call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE) + ] + ) + else + putstring(value) + send(:freeze, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def opt_str_uminus(value) + if specialized_instruction + stack.change_by(+1) + iseq.push( + [:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)] + ) + else + putstring(value) + send(:-@, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def pop + stack.change_by(-1) + iseq.push([:pop]) + end + + def putnil + stack.change_by(+1) + iseq.push([:putnil]) + end + + def putobject(object) + stack.change_by(+1) + + if operands_unification + # Specialize the putobject instruction based on the value of the + # object. If it's 0 or 1, then there's a specialized instruction + # that will push the object onto the stack and requires fewer + # operands. + if object.eql?(0) + iseq.push([:putobject_INT2FIX_0_]) + elsif object.eql?(1) + iseq.push([:putobject_INT2FIX_1_]) + else + iseq.push([:putobject, object]) + end + else + iseq.push([:putobject, object]) + end + end + + def putself + stack.change_by(+1) + iseq.push([:putself]) + end + + def putspecialobject(object) + stack.change_by(+1) + iseq.push([:putspecialobject, object]) + end + + def putstring(object) + stack.change_by(+1) + iseq.push([:putstring, object]) + end + + def send(method_id, argc, flag, block_iseq = nil) + stack.change_by(-(argc + 1) + 1) + cdata = call_data(method_id, argc, flag) + + if specialized_instruction + # Specialize the send instruction. If it doesn't have a block + # attached, then we will replace it with an opt_send_without_block + # and do further specializations based on the called method and the + # number of arguments. + + # stree-ignore + if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 + case [method_id, argc] + when [:length, 0] then iseq.push([:opt_length, cdata]) + when [:size, 0] then iseq.push([:opt_size, cdata]) + when [:empty?, 0] then iseq.push([:opt_empty_p, cdata]) + when [:nil?, 0] then iseq.push([:opt_nil_p, cdata]) + when [:succ, 0] then iseq.push([:opt_succ, cdata]) + when [:!, 0] then iseq.push([:opt_not, cdata]) + when [:+, 1] then iseq.push([:opt_plus, cdata]) + when [:-, 1] then iseq.push([:opt_minus, cdata]) + when [:*, 1] then iseq.push([:opt_mult, cdata]) + when [:/, 1] then iseq.push([:opt_div, cdata]) + when [:%, 1] then iseq.push([:opt_mod, cdata]) + when [:==, 1] then iseq.push([:opt_eq, cdata]) + when [:=~, 1] then iseq.push([:opt_regexpmatch2, cdata]) + when [:<, 1] then iseq.push([:opt_lt, cdata]) + when [:<=, 1] then iseq.push([:opt_le, cdata]) + when [:>, 1] then iseq.push([:opt_gt, cdata]) + when [:>=, 1] then iseq.push([:opt_ge, cdata]) + when [:<<, 1] then iseq.push([:opt_ltlt, cdata]) + when [:[], 1] then iseq.push([:opt_aref, cdata]) + when [:&, 1] then iseq.push([:opt_and, cdata]) + when [:|, 1] then iseq.push([:opt_or, cdata]) + when [:[]=, 2] then iseq.push([:opt_aset, cdata]) + when [:!=, 1] + eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) + iseq.push([:opt_neq, eql_data, cdata]) + else + iseq.push([:opt_send_without_block, cdata]) + end + else + iseq.push([:send, cdata, block_iseq]) + end + else + iseq.push([:send, cdata, block_iseq]) + end + end + + def setclassvariable(name) + stack.change_by(-1) + + if RUBY_VERSION >= "3.0" + iseq.push([:setclassvariable, name, iseq.inline_storage_for(name)]) + else + iseq.push([:setclassvariable, name]) + end + end + + def setconstant(name) + stack.change_by(-2) + iseq.push([:setconstant, name]) + end + + def setglobal(name) + stack.change_by(-1) + iseq.push([:setglobal, name]) + end + + def setinstancevariable(name) + stack.change_by(-1) + + if RUBY_VERSION >= "3.2" + iseq.push([:setinstancevariable, name, iseq.inline_storage]) + else + inline_storage = iseq.inline_storage_for(name) + iseq.push([:setinstancevariable, name, inline_storage]) + end + end + + def setlocal(index, level) + stack.change_by(-1) + + if operands_unification + # Specialize the setlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will write to the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + iseq.push([:setlocal_WC_0, index]) + when 1 + iseq.push([:setlocal_WC_1, index]) + else + iseq.push([:setlocal, index, level]) + end + else + iseq.push([:setlocal, index, level]) + end + end + + def setn(number) + stack.change_by(-1 + 1) + iseq.push([:setn, number]) + end + + def splatarray(flag) + stack.change_by(-1 + 1) + iseq.push([:splatarray, flag]) + end + + def swap + stack.change_by(-2 + 2) + iseq.push([:swap]) + end + + def topn(number) + stack.change_by(+1) + iseq.push([:topn, number]) + end + + def toregexp(options, length) + stack.change_by(-length + 1) + iseq.push([:toregexp, options, length]) + end + + private + + # This creates a call data object that is used as the operand for the + # send, invokesuper, and objtostring instructions. + def call_data(method_id, argc, flag) + { mid: method_id, flag: flag, orig_argc: argc } + end + end + + # These constants correspond to the putspecialobject instruction. They are + # used to represent special objects that are pushed onto the stack. + VM_SPECIAL_OBJECT_VMCORE = 1 + VM_SPECIAL_OBJECT_CBASE = 2 + VM_SPECIAL_OBJECT_CONST_BASE = 3 + + # These constants correspond to the flag passed as part of the call data + # structure on the send instruction. They are used to represent various + # metadata about the callsite (e.g., were keyword arguments used?, was a + # block given?, etc.). + VM_CALL_ARGS_SPLAT = 1 << 0 + VM_CALL_ARGS_BLOCKARG = 1 << 1 + VM_CALL_FCALL = 1 << 2 + VM_CALL_VCALL = 1 << 3 + VM_CALL_ARGS_SIMPLE = 1 << 4 + VM_CALL_BLOCKISEQ = 1 << 5 + VM_CALL_KWARG = 1 << 6 + VM_CALL_KW_SPLAT = 1 << 7 + VM_CALL_TAILCALL = 1 << 8 + VM_CALL_SUPER = 1 << 9 + VM_CALL_ZSUPER = 1 << 10 + VM_CALL_OPT_SEND = 1 << 11 + VM_CALL_KW_SPLAT_MUT = 1 << 12 + + # These constants correspond to the value passed as part of the defined + # instruction. It's an enum defined in the CRuby codebase that tells that + # instruction what kind of defined check to perform. + DEFINED_NIL = 1 + DEFINED_IVAR = 2 + DEFINED_LVAR = 3 + DEFINED_GVAR = 4 + DEFINED_CVAR = 5 + DEFINED_CONST = 6 + DEFINED_METHOD = 7 + DEFINED_YIELD = 8 + DEFINED_ZSUPER = 9 + DEFINED_SELF = 10 + DEFINED_TRUE = 11 + DEFINED_FALSE = 12 + DEFINED_ASGN = 13 + DEFINED_EXPR = 14 + DEFINED_REF = 15 + DEFINED_FUNC = 16 + DEFINED_CONST_FROM = 17 + + # These constants correspond to the value passed in the flags as part of + # the defineclass instruction. + VM_DEFINECLASS_TYPE_CLASS = 0 + VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 1 + VM_DEFINECLASS_TYPE_MODULE = 2 + VM_DEFINECLASS_FLAG_SCOPED = 8 + VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 16 + + # These options mirror the compilation options that we currently support + # that can be also passed to RubyVM::InstructionSequence.compile. + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + # The current instruction sequence that is being compiled. + attr_reader :current_iseq + + # This is the current builder that is being used to construct the current + # instruction sequence. + attr_reader :builder + + # A boolean to track if we're currently compiling the last statement + # within a set of statements. This information is necessary to determine + # if we need to return the value of the last statement. + attr_reader :last_statement + + def initialize( + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction + + @current_iseq = nil + @builder = nil + @last_statement = false + end + + def visit_BEGIN(node) + visit(node.statements) + end + + def visit_CHAR(node) + if frozen_string_literal + builder.putobject(node.value[1..]) + else + builder.putstring(node.value[1..]) + end + end + + def visit_END(node) + name = "block in #{current_iseq.name}" + once_iseq = + with_instruction_sequence(:block, name, current_iseq, node) do + postexe_iseq = + with_instruction_sequence(:block, name, current_iseq, node) do + *statements, last_statement = node.statements.body + visit_all(statements) + with_last_statement { visit(last_statement) } + builder.leave + end + + builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) + builder.send(:"core#set_postexe", 0, VM_CALL_FCALL, postexe_iseq) + builder.leave + end + + builder.once(once_iseq, current_iseq.inline_storage) + builder.pop + end + + def visit_alias(node) + builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) + builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) + visit(node.left) + visit(node.right) + builder.send(:"core#set_method_alias", 3, VM_CALL_ARGS_SIMPLE) + end + + def visit_aref(node) + visit(node.collection) + visit(node.index) + builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) + end + + def visit_arg_block(node) + visit(node.value) + end + + def visit_arg_paren(node) + visit(node.arguments) + end + + def visit_arg_star(node) + visit(node.value) + builder.splatarray(false) + end + + def visit_args(node) + visit_all(node.parts) + end + + def visit_array(node) + if (compiled = RubyVisitor.compile(node)) + builder.duparray(compiled) + else + length = 0 + + node.contents.parts.each do |part| + if part.is_a?(ArgStar) + if length > 0 + builder.newarray(length) + length = 0 + end + + visit(part.value) + builder.concatarray + else + visit(part) + length += 1 + end + end + + builder.newarray(length) if length > 0 + if length > 0 && length != node.contents.parts.length + builder.concatarray + end + end + end + + def visit_assign(node) + case node.target + when ARefField + builder.putnil + visit(node.target.collection) + visit(node.target.index) + visit(node.value) + builder.setn(3) + builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + builder.pop + when ConstPathField + names = constant_names(node.target) + name = names.pop + + if RUBY_VERSION >= "3.2" + builder.opt_getconstant_path(names) + visit(node.value) + builder.swap + builder.topn(1) + builder.swap + builder.setconstant(name) + else + visit(node.value) + builder.dup if last_statement? + builder.opt_getconstant_path(names) + builder.setconstant(name) + end + when Field + builder.putnil + visit(node.target) + visit(node.value) + builder.setn(2) + builder.send(:"#{node.target.name.value}=", 1, VM_CALL_ARGS_SIMPLE) + builder.pop + when TopConstField + name = node.target.constant.value.to_sym + + if RUBY_VERSION >= "3.2" + builder.putobject(Object) + visit(node.value) + builder.swap + builder.topn(1) + builder.swap + builder.setconstant(name) + else + visit(node.value) + builder.dup if last_statement? + builder.putobject(Object) + builder.setconstant(name) + end + when VarField + visit(node.value) + builder.dup if last_statement? + + case node.target.value + when Const + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.setconstant(node.target.value.value.to_sym) + when CVar + builder.setclassvariable(node.target.value.value.to_sym) + when GVar + builder.setglobal(node.target.value.value.to_sym) + when Ident + local_variable = visit(node.target) + builder.setlocal(local_variable.index, local_variable.level) + when IVar + builder.setinstancevariable(node.target.value.value.to_sym) + end + end + end + + def visit_assoc(node) + visit(node.key) + visit(node.value) + end + + def visit_assoc_splat(node) + visit(node.value) + end + + def visit_backref(node) + builder.getspecial(1, 2 * node.value[1..].to_i) + end + + def visit_bare_assoc_hash(node) + if (compiled = RubyVisitor.compile(node)) + builder.duphash(compiled) + else + visit_all(node.assocs) + end + end + + def visit_binary(node) + case node.operator + when :"&&" + visit(node.left) + builder.dup + + branchunless = builder.branchunless(-1) + builder.pop + + visit(node.right) + branchunless[1] = builder.label + when :"||" + visit(node.left) + builder.dup + + branchif = builder.branchif(-1) + builder.pop + + visit(node.right) + branchif[1] = builder.label + else + visit(node.left) + visit(node.right) + builder.send(node.operator, 1, VM_CALL_ARGS_SIMPLE) + end + end + + def visit_block(node) + with_instruction_sequence( + :block, + "block in #{current_iseq.name}", + current_iseq, + node + ) do + builder.event(:RUBY_EVENT_B_CALL) + visit(node.block_var) + visit(node.bodystmt) + builder.event(:RUBY_EVENT_B_RETURN) + builder.leave + end + end + + def visit_block_var(node) + params = node.params + + if params.requireds.length == 1 && params.optionals.empty? && + !params.rest && params.posts.empty? && params.keywords.empty? && + !params.keyword_rest && !params.block + current_iseq.argument_options[:ambiguous_param0] = true + end + + visit(node.params) + + node.locals.each do |local| + current_iseq.local_table.plain(local.value.to_sym) + end + end + + def visit_blockarg(node) + current_iseq.argument_options[:block_start] = current_iseq.argument_size + current_iseq.local_table.block(node.name.value.to_sym) + current_iseq.argument_size += 1 + end + + def visit_bodystmt(node) + visit(node.statements) + end + + def visit_call(node) + if node.is_a?(CallNode) + return( + visit_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: nil, + location: node.location + ) + ) + ) + end + + arg_parts = argument_parts(node.arguments) + argc = arg_parts.length + + # First we're going to check if we're calling a method on an array + # literal without any arguments. In that case there are some + # specializations we might be able to perform. + if argc == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) + case node.receiver + when ArrayLiteral + parts = node.receiver.contents&.parts || [] + + if parts.none? { |part| part.is_a?(ArgStar) } && + RubyVisitor.compile(node.receiver).nil? + case node.message.value + when "max" + visit(node.receiver.contents) + builder.opt_newarray_max(parts.length) + return + when "min" + visit(node.receiver.contents) + builder.opt_newarray_min(parts.length) + return + end + end + when StringLiteral + if RubyVisitor.compile(node.receiver).nil? + case node.message.value + when "-@" + builder.opt_str_uminus(node.receiver.parts.first.value) + return + when "freeze" + builder.opt_str_freeze(node.receiver.parts.first.value) + return + end + end + end + end + + if node.receiver + if node.receiver.is_a?(VarRef) && + ( + lookup = + current_iseq.local_variable(node.receiver.value.value.to_sym) + ) && lookup.local.is_a?(LocalTable::BlockLocal) + builder.getblockparamproxy(lookup.index, lookup.level) + else + visit(node.receiver) + end + else + builder.putself + end + + branchnil = + if node.operator&.value == "&." + builder.dup + builder.branchnil(-1) + end + + flag = 0 + + arg_parts.each do |arg_part| + case arg_part + when ArgBlock + argc -= 1 + flag |= VM_CALL_ARGS_BLOCKARG + visit(arg_part) + when ArgStar + flag |= VM_CALL_ARGS_SPLAT + visit(arg_part) + when ArgsForward + flag |= VM_CALL_ARGS_SPLAT | VM_CALL_ARGS_BLOCKARG + + lookup = current_iseq.local_table.find(:*, 0) + builder.getlocal(lookup.index, lookup.level) + builder.splatarray(arg_parts.length != 1) + + lookup = current_iseq.local_table.find(:&, 0) + builder.getblockparamproxy(lookup.index, lookup.level) + when BareAssocHash + flag |= VM_CALL_KW_SPLAT + visit(arg_part) + else + visit(arg_part) + end + end + + block_iseq = visit(node.block) if node.block + flag |= VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 + flag |= VM_CALL_FCALL if node.receiver.nil? + + builder.send(node.message.value.to_sym, argc, flag, block_iseq) + branchnil[1] = builder.label if branchnil + end + + def visit_case(node) + visit(node.value) if node.value + + clauses = [] + else_clause = nil + + current = node.consequent + + while current + clauses << current + + if (current = current.consequent).is_a?(Else) + else_clause = current + break + end + end + + branches = + clauses.map do |clause| + visit(clause.arguments) + builder.topn(1) + builder.send(:===, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + [clause, builder.branchif(:label_00)] + end + + builder.pop + + else_clause ? visit(else_clause) : builder.putnil + + builder.leave + + branches.each_with_index do |(clause, branchif), index| + builder.leave if index != 0 + branchif[1] = builder.label + builder.pop + visit(clause) + end + end + + def visit_class(node) + name = node.constant.constant.value.to_sym + class_iseq = + with_instruction_sequence( + :class, + "", + current_iseq, + node + ) do + builder.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + builder.event(:RUBY_EVENT_END) + builder.leave + end + + flags = VM_DEFINECLASS_TYPE_CLASS + + case node.constant + when ConstPathRef + flags |= VM_DEFINECLASS_FLAG_SCOPED + visit(node.constant.parent) + when ConstRef + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + when TopConstRef + flags |= VM_DEFINECLASS_FLAG_SCOPED + builder.putobject(Object) + end + + if node.superclass + flags |= VM_DEFINECLASS_FLAG_HAS_SUPERCLASS + visit(node.superclass) + else + builder.putnil + end + + builder.defineclass(name, class_iseq, flags) + end + + def visit_command(node) + visit_call( + CommandCall.new( + receiver: nil, + operator: nil, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_command_call(node) + visit_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_const_path_field(node) + visit(node.parent) + end + + def visit_const_path_ref(node) + names = constant_names(node) + builder.opt_getconstant_path(names) + end + + def visit_def(node) + method_iseq = + with_instruction_sequence( + :method, + node.name.value, + current_iseq, + node + ) do + visit(node.params) if node.params + builder.event(:RUBY_EVENT_CALL) + visit(node.bodystmt) + builder.event(:RUBY_EVENT_RETURN) + builder.leave + end + + name = node.name.value.to_sym + + if node.target + visit(node.target) + builder.definesmethod(name, method_iseq) + else + builder.definemethod(name, method_iseq) + end + + builder.putobject(name) + end + + def visit_defined(node) + case node.value + when Assign + # If we're assigning to a local variable, then we need to make sure + # that we put it into the local table. + if node.value.target.is_a?(VarField) && + node.value.target.value.is_a?(Ident) + current_iseq.local_table.plain(node.value.target.value.value.to_sym) + end + + builder.putobject("assignment") + when VarRef + value = node.value.value + name = value.value.to_sym + + case value + when Const + builder.putnil + builder.defined(DEFINED_CONST, name, "constant") + when CVar + builder.putnil + builder.defined(DEFINED_CVAR, name, "class variable") + when GVar + builder.putnil + builder.defined(DEFINED_GVAR, name, "global-variable") + when Ident + builder.putobject("local-variable") + when IVar + builder.putnil + builder.defined(DEFINED_IVAR, name, "instance-variable") + when Kw + case name + when :false + builder.putobject("false") + when :nil + builder.putobject("nil") + when :self + builder.putobject("self") + when :true + builder.putobject("true") + end + end + when VCall + builder.putself + + name = node.value.value.value.to_sym + builder.defined(DEFINED_FUNC, name, "method") + when YieldNode + builder.putnil + builder.defined(DEFINED_YIELD, false, "yield") + when ZSuper + builder.putnil + builder.defined(DEFINED_ZSUPER, false, "super") + else + builder.putobject("expression") + end + end + + def visit_dyna_symbol(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + builder.putobject(node.parts.first.value.to_sym) + end + end + + def visit_else(node) + visit(node.statements) + builder.pop unless last_statement? + end + + def visit_elsif(node) + visit_if( + IfNode.new( + predicate: node.predicate, + statements: node.statements, + consequent: node.consequent, + location: node.location + ) + ) + end + + def visit_field(node) + visit(node.parent) + end + + def visit_float(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_for(node) + visit(node.collection) + + name = node.index.value.value.to_sym + current_iseq.local_table.plain(name) + + block_iseq = + with_instruction_sequence( + :block, + "block in #{current_iseq.name}", + current_iseq, + node.statements + ) do + current_iseq.argument_options[:lead_num] ||= 0 + current_iseq.argument_options[:lead_num] += 1 + current_iseq.argument_options[:ambiguous_param0] = true + + current_iseq.argument_size += 1 + current_iseq.local_table.plain(2) + + builder.getlocal(0, 0) + + local_variable = current_iseq.local_variable(name) + builder.setlocal(local_variable.index, local_variable.level) + + builder.event(:RUBY_EVENT_B_CALL) + builder.nop + + visit(node.statements) + builder.event(:RUBY_EVENT_B_RETURN) + builder.leave + end + + builder.send(:each, 0, 0, block_iseq) + end + + def visit_hash(node) + builder.duphash(node.accept(RubyVisitor.new)) + rescue RubyVisitor::CompilationError + visit_all(node.assocs) + builder.newhash(node.assocs.length * 2) + end + + def visit_heredoc(node) + if node.beginning.value.end_with?("`") + visit_xstring_literal(node) + elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + builder.concatstrings(length) + end + end + + def visit_if(node) + visit(node.predicate) + branchunless = builder.branchunless(-1) + visit(node.statements) + + if last_statement? + builder.leave + branchunless[1] = builder.label + + node.consequent ? visit(node.consequent) : builder.putnil + else + builder.pop + + if node.consequent + jump = builder.jump(-1) + branchunless[1] = builder.label + visit(node.consequent) + jump[1] = builder.label + else + branchunless[1] = builder.label + end + end + end + + def visit_if_op(node) + visit_if( + IfNode.new( + predicate: node.predicate, + statements: node.truthy, + consequent: + Else.new( + keyword: Kw.new(value: "else", location: Location.default), + statements: node.falsy, + location: Location.default + ), + location: Location.default + ) + ) + end + + def visit_imaginary(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_int(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_kwrest_param(node) + current_iseq.argument_options[:kwrest] = current_iseq.argument_size + current_iseq.argument_size += 1 + current_iseq.local_table.plain(node.name.value.to_sym) + end + + def visit_label(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_lambda(node) + lambda_iseq = + with_instruction_sequence( + :block, + "block in #{current_iseq.name}", + current_iseq, + node + ) do + builder.event(:RUBY_EVENT_B_CALL) + visit(node.params) + visit(node.statements) + builder.event(:RUBY_EVENT_B_RETURN) + builder.leave + end + + builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) + builder.send(:lambda, 0, VM_CALL_FCALL, lambda_iseq) + end + + def visit_lambda_var(node) + visit_block_var(node) + end + + def visit_massign(node) + visit(node.value) + builder.dup + visit(node.target) + end + + def visit_method_add_block(node) + visit_call( + CommandCall.new( + receiver: node.call.receiver, + operator: node.call.operator, + message: node.call.message, + arguments: node.call.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_mlhs(node) + lookups = [] + + node.parts.each do |part| + case part + when VarField + lookups << visit(part) + end + end + + builder.expandarray(lookups.length, 0) + + lookups.each { |lookup| builder.setlocal(lookup.index, lookup.level) } + end + + def visit_module(node) + name = node.constant.constant.value.to_sym + module_iseq = + with_instruction_sequence( + :class, + "", + current_iseq, + node + ) do + builder.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + builder.event(:RUBY_EVENT_END) + builder.leave + end + + flags = VM_DEFINECLASS_TYPE_MODULE + + case node.constant + when ConstPathRef + flags |= VM_DEFINECLASS_FLAG_SCOPED + visit(node.constant.parent) + when ConstRef + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + when TopConstRef + flags |= VM_DEFINECLASS_FLAG_SCOPED + builder.putobject(Object) + end + + builder.putnil + builder.defineclass(name, module_iseq, flags) + end + + def visit_mrhs(node) + if (compiled = RubyVisitor.compile(node)) + builder.duparray(compiled) + else + visit_all(node.parts) + builder.newarray(node.parts.length) + end + end + + def visit_not(node) + visit(node.statement) + builder.send(:!, 0, VM_CALL_ARGS_SIMPLE) + end + + def visit_opassign(node) + flag = VM_CALL_ARGS_SIMPLE + if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) + flag |= VM_CALL_FCALL + end + + case (operator = node.operator.value.chomp("=").to_sym) + when :"&&" + branchunless = nil + + with_opassign(node) do + builder.dup + branchunless = builder.branchunless(-1) + builder.pop + visit(node.value) + end + + case node.target + when ARefField + builder.leave + branchunless[1] = builder.label + builder.setn(3) + builder.adjuststack(3) + when ConstPathField, TopConstField + branchunless[1] = builder.label + builder.swap + builder.pop + else + branchunless[1] = builder.label + end + when :"||" + if node.target.is_a?(ConstPathField) || + node.target.is_a?(TopConstField) + opassign_defined(node) + builder.swap + builder.pop + elsif node.target.is_a?(VarField) && + [Const, CVar, GVar].include?(node.target.value.class) + opassign_defined(node) + else + branchif = nil + + with_opassign(node) do + builder.dup + branchif = builder.branchif(-1) + builder.pop + visit(node.value) + end + + if node.target.is_a?(ARefField) + builder.leave + branchif[1] = builder.label + builder.setn(3) + builder.adjuststack(3) + else + branchif[1] = builder.label + end + end + else + with_opassign(node) do + visit(node.value) + builder.send(operator, 1, flag) + end + end + end + + def visit_params(node) + argument_options = current_iseq.argument_options + + if node.requireds.any? + argument_options[:lead_num] = 0 + + node.requireds.each do |required| + current_iseq.local_table.plain(required.value.to_sym) + current_iseq.argument_size += 1 + argument_options[:lead_num] += 1 + end + end + + node.optionals.each do |(optional, value)| + index = current_iseq.local_table.size + name = optional.value.to_sym + + current_iseq.local_table.plain(name) + current_iseq.argument_size += 1 + + unless argument_options.key?(:opt) + argument_options[:opt] = [builder.label] + end + + visit(value) + builder.setlocal(index, 0) + current_iseq.argument_options[:opt] << builder.label + end + + visit(node.rest) if node.rest + + if node.posts.any? + argument_options[:post_start] = current_iseq.argument_size + argument_options[:post_num] = 0 + + node.posts.each do |post| + current_iseq.local_table.plain(post.value.to_sym) + current_iseq.argument_size += 1 + argument_options[:post_num] += 1 + end + end + + if node.keywords.any? + argument_options[:kwbits] = 0 + argument_options[:keyword] = [] + checkkeywords = [] + + node.keywords.each_with_index do |(keyword, value), keyword_index| + name = keyword.value.chomp(":").to_sym + index = current_iseq.local_table.size + + current_iseq.local_table.plain(name) + current_iseq.argument_size += 1 + argument_options[:kwbits] += 1 + + if value.nil? + argument_options[:keyword] << name + else + begin + compiled = value.accept(RubyVisitor.new) + argument_options[:keyword] << [name, compiled] + rescue RubyVisitor::CompilationError + argument_options[:keyword] << [name] + checkkeywords << builder.checkkeyword(-1, keyword_index) + branchif = builder.branchif(-1) + visit(value) + builder.setlocal(index, 0) + branchif[1] = builder.label + end + end + end + + name = node.keyword_rest ? 3 : 2 + current_iseq.argument_size += 1 + current_iseq.local_table.plain(name) + + lookup = current_iseq.local_table.find(name, 0) + checkkeywords.each { |checkkeyword| checkkeyword[1] = lookup.index } + end + + if node.keyword_rest.is_a?(ArgsForward) + current_iseq.local_table.plain(:*) + current_iseq.local_table.plain(:&) + + current_iseq.argument_options[ + :rest_start + ] = current_iseq.argument_size + current_iseq.argument_options[ + :block_start + ] = current_iseq.argument_size + 1 + + current_iseq.argument_size += 2 + elsif node.keyword_rest + visit(node.keyword_rest) + end + + visit(node.block) if node.block + end + + def visit_paren(node) + visit(node.contents) + end + + def visit_program(node) + node.statements.body.each do |statement| + break unless statement.is_a?(Comment) + + if statement.value == "# frozen_string_literal: true" + @frozen_string_literal = true + end + end + + preexes = [] + statements = [] + + node.statements.body.each do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + # ignore + when BEGINBlock + preexes << statement + else + statements << statement + end + end + + with_instruction_sequence(:top, "", nil, node) do + visit_all(preexes) + + if statements.empty? + builder.putnil + else + *statements, last_statement = statements + visit_all(statements) + with_last_statement { visit(last_statement) } + end + + builder.leave + end + end + + def visit_qsymbols(node) + builder.duparray(node.accept(RubyVisitor.new)) + end + + def visit_qwords(node) + if frozen_string_literal + builder.duparray(node.accept(RubyVisitor.new)) + else + visit_all(node.elements) + builder.newarray(node.elements.length) + end + end + + def visit_range(node) + builder.putobject(node.accept(RubyVisitor.new)) + rescue RubyVisitor::CompilationError + visit(node.left) + visit(node.right) + builder.newrange(node.operator.value == ".." ? 0 : 1) + end + + def visit_rational(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_regexp_literal(node) + builder.putobject(node.accept(RubyVisitor.new)) + rescue RubyVisitor::CompilationError + flags = RubyVisitor.new.visit_regexp_literal_flags(node) + length = visit_string_parts(node) + builder.toregexp(flags, length) + end + + def visit_rest_param(node) + current_iseq.local_table.plain(node.name.value.to_sym) + current_iseq.argument_options[:rest_start] = current_iseq.argument_size + current_iseq.argument_size += 1 + end + + def visit_sclass(node) + visit(node.target) + builder.putnil + + singleton_iseq = + with_instruction_sequence( + :class, + "singleton class", + current_iseq, + node + ) do + builder.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + builder.event(:RUBY_EVENT_END) + builder.leave + end + + builder.defineclass( + :singletonclass, + singleton_iseq, + VM_DEFINECLASS_TYPE_SINGLETON_CLASS + ) + end + + def visit_statements(node) + statements = + node.body.select do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + false + else + true + end + end + + statements.empty? ? builder.putnil : visit_all(statements) + end + + def visit_string_concat(node) + value = node.left.parts.first.value + node.right.parts.first.value + content = TStringContent.new(value: value, location: node.location) + + literal = + StringLiteral.new( + parts: [content], + quote: node.left.quote, + location: node.location + ) + visit_string_literal(literal) + end + + def visit_string_embexpr(node) + visit(node.statements) + end + + def visit_string_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + builder.concatstrings(length) + end + end + + def visit_super(node) + builder.putself + visit(node.arguments) + builder.invokesuper( + nil, + argument_parts(node.arguments).length, + VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER, + nil + ) + end + + def visit_symbol_literal(node) + builder.putobject(node.accept(RubyVisitor.new)) + end + + def visit_symbols(node) + builder.duparray(node.accept(RubyVisitor.new)) + rescue RubyVisitor::CompilationError + node.elements.each do |element| + if element.parts.length == 1 && + element.parts.first.is_a?(TStringContent) + builder.putobject(element.parts.first.value.to_sym) + else + length = visit_string_parts(element) + builder.concatstrings(length) + builder.intern + end + end + + builder.newarray(node.elements.length) + end + + def visit_top_const_ref(node) + builder.opt_getconstant_path(constant_names(node)) + end + + def visit_tstring_content(node) + if frozen_string_literal + builder.putobject(node.accept(RubyVisitor.new)) + else + builder.putstring(node.accept(RubyVisitor.new)) + end + end + + def visit_unary(node) + method_id = + case node.operator + when "+", "-" + "#{node.operator}@" + else + node.operator + end + + visit_call( + CommandCall.new( + receiver: node.statement, + operator: nil, + message: Ident.new(value: method_id, location: Location.default), + arguments: nil, + block: nil, + location: Location.default + ) + ) + end + + def visit_undef(node) + node.symbols.each_with_index do |symbol, index| + builder.pop if index != 0 + builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) + builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) + visit(symbol) + builder.send(:"core#undef_method", 2, VM_CALL_ARGS_SIMPLE) + end + end + + def visit_unless(node) + visit(node.predicate) + branchunless = builder.branchunless(-1) + node.consequent ? visit(node.consequent) : builder.putnil + + if last_statement? + builder.leave + branchunless[1] = builder.label + + visit(node.statements) + else + builder.pop + + if node.consequent + jump = builder.jump(-1) + branchunless[1] = builder.label + visit(node.consequent) + jump[1] = builder.label + else + branchunless[1] = builder.label + end + end + end + + def visit_until(node) + jumps = [] + + jumps << builder.jump(-1) + builder.putnil + builder.pop + jumps << builder.jump(-1) + + label = builder.label + visit(node.statements) + builder.pop + jumps.each { |jump| jump[1] = builder.label } + + visit(node.predicate) + builder.branchunless(label) + builder.putnil if last_statement? + end + + def visit_var_field(node) + case node.value + when CVar, IVar + name = node.value.value.to_sym + current_iseq.inline_storage_for(name) + when Ident + name = node.value.value.to_sym + + if (local_variable = current_iseq.local_variable(name)) + local_variable + else + current_iseq.local_table.plain(name) + current_iseq.local_variable(name) + end + end + end + + def visit_var_ref(node) + case node.value + when Const + builder.opt_getconstant_path(constant_names(node)) + when CVar + name = node.value.value.to_sym + builder.getclassvariable(name) + when GVar + builder.getglobal(node.value.value.to_sym) + when Ident + lookup = current_iseq.local_variable(node.value.value.to_sym) + + case lookup.local + when LocalTable::BlockLocal + builder.getblockparam(lookup.index, lookup.level) + when LocalTable::PlainLocal + builder.getlocal(lookup.index, lookup.level) + end + when IVar + name = node.value.value.to_sym + builder.getinstancevariable(name) + when Kw + case node.value.value + when "false" + builder.putobject(false) + when "nil" + builder.putnil + when "self" + builder.putself + when "true" + builder.putobject(true) + end + end + end + + def visit_vcall(node) + builder.putself + + flag = VM_CALL_FCALL | VM_CALL_VCALL | VM_CALL_ARGS_SIMPLE + builder.send(node.value.value.to_sym, 0, flag) + end + + def visit_when(node) + visit(node.statements) + end + + def visit_while(node) + jumps = [] + + jumps << builder.jump(-1) + builder.putnil + builder.pop + jumps << builder.jump(-1) + + label = builder.label + visit(node.statements) + builder.pop + jumps.each { |jump| jump[1] = builder.label } + + visit(node.predicate) + builder.branchif(label) + builder.putnil if last_statement? + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + builder.concatstrings(length) + end + end + + def visit_words(node) + converted = nil + + if frozen_string_literal + begin + converted = node.accept(RubyVisitor.new) + rescue RubyVisitor::CompilationError + end + end + + if converted + builder.duparray(converted) + else + visit_all(node.elements) + builder.newarray(node.elements.length) + end + end + + def visit_xstring_literal(node) + builder.putself + length = visit_string_parts(node) + builder.concatstrings(node.parts.length) if length > 1 + builder.send(:`, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + end + + def visit_yield(node) + parts = argument_parts(node.arguments) + visit_all(parts) + builder.invokeblock(nil, parts.length, VM_CALL_ARGS_SIMPLE) + end + + def visit_zsuper(_node) + builder.putself + builder.invokesuper( + nil, + 0, + VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER | VM_CALL_ZSUPER, + nil + ) + end + + private + + # This is a helper that is used in places where arguments may be present + # or they may be wrapped in parentheses. It's meant to descend down the + # tree and return an array of argument nodes. + def argument_parts(node) + case node + when nil + [] + when Args + node.parts + when ArgParen + if node.arguments.is_a?(ArgsForward) + [node.arguments] + else + node.arguments.parts + end + when Paren + node.contents.parts + end + end + + # Constant names when they are being assigned or referenced come in as a + # tree, but it's more convenient to work with them as an array. This + # method converts them into that array. This is nice because it's the + # operand that goes to opt_getconstant_path in Ruby 3.2. + def constant_names(node) + current = node + names = [] + + while current.is_a?(ConstPathField) || current.is_a?(ConstPathRef) + names.unshift(current.constant.value.to_sym) + current = current.parent + end + + case current + when VarField, VarRef + names.unshift(current.value.value.to_sym) + when TopConstRef + names.unshift(current.constant.value.to_sym) + names.unshift(:"") + end + + names + end + + # For the most part when an OpAssign (operator assignment) node with a ||= + # operator is being compiled it's a matter of reading the target, checking + # if the value should be evaluated, evaluating it if so, and then writing + # the result back to the target. + # + # However, in certain kinds of assignments (X, ::X, X::Y, @@x, and $x) we + # first check if the value is defined using the defined instruction. I + # don't know why it is necessary, and suspect that it isn't. + def opassign_defined(node) + case node.target + when ConstPathField + visit(node.target.parent) + name = node.target.constant.value.to_sym + + builder.dup + builder.defined(DEFINED_CONST_FROM, name, true) + when TopConstField + name = node.target.constant.value.to_sym + + builder.putobject(Object) + builder.dup + builder.defined(DEFINED_CONST_FROM, name, true) + when VarField + name = node.target.value.value.to_sym + builder.putnil + + case node.target.value + when Const + builder.defined(DEFINED_CONST, name, true) + when CVar + builder.defined(DEFINED_CVAR, name, true) + when GVar + builder.defined(DEFINED_GVAR, name, true) + end + end + + branchunless = builder.branchunless(-1) + + case node.target + when ConstPathField, TopConstField + builder.dup + builder.putobject(true) + builder.getconstant(name) + when VarField + case node.target.value + when Const + builder.opt_getconstant_path(constant_names(node.target)) + when CVar + builder.getclassvariable(name) + when GVar + builder.getglobal(name) + end + end + + builder.dup + branchif = builder.branchif(-1) + builder.pop + + branchunless[1] = builder.label + visit(node.value) + + case node.target + when ConstPathField, TopConstField + builder.dupn(2) + builder.swap + builder.setconstant(name) + when VarField + builder.dup + + case node.target.value + when Const + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.setconstant(name) + when CVar + builder.setclassvariable(name) + when GVar + builder.setglobal(name) + end + end + + branchif[1] = builder.label + end + + # Whenever a value is interpolated into a string-like structure, these + # three instructions are pushed. + def push_interpolate + builder.dup + builder.objtostring(:to_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + builder.anytostring + end + + # There are a lot of nodes in the AST that act as contains of parts of + # strings. This includes things like string literals, regular expressions, + # heredocs, etc. This method will visit all the parts of a string within + # those containers. + def visit_string_parts(node) + length = 0 + + unless node.parts.first.is_a?(TStringContent) + builder.putobject("") + length += 1 + end + + node.parts.each do |part| + case part + when StringDVar + visit(part.variable) + push_interpolate + when StringEmbExpr + visit(part) + push_interpolate + when TStringContent + builder.putobject(part.accept(RubyVisitor.new)) + end + + length += 1 + end + + length + end + + # The current instruction sequence that we're compiling is always stored + # on the compiler. When we descend into a node that has its own + # instruction sequence, this method can be called to temporarily set the + # new value of the instruction sequence, yield, and then set it back. + def with_instruction_sequence(type, name, parent_iseq, node) + previous_iseq = current_iseq + previous_builder = builder + + begin + iseq = InstructionSequence.new(type, name, parent_iseq, node.location) + + @current_iseq = iseq + @builder = + Builder.new( + iseq, + frozen_string_literal: frozen_string_literal, + operands_unification: operands_unification, + specialized_instruction: specialized_instruction + ) + + yield + iseq + ensure + @current_iseq = previous_iseq + @builder = previous_builder + end + end + + # When we're compiling the last statement of a set of statements within a + # scope, the instructions sometimes change from pops to leaves. These + # kinds of peephole optimizations can reduce the overall number of + # instructions. Therefore, we keep track of whether we're compiling the + # last statement of a scope and allow visit methods to query that + # information. + def with_last_statement + previous = @last_statement + @last_statement = true + + begin + yield + ensure + @last_statement = previous + end + end + + def last_statement? + @last_statement + end + + # OpAssign nodes can have a number of different kinds of nodes as their + # "target" (i.e., the left-hand side of the assignment). When compiling + # these nodes we typically need to first fetch the current value of the + # variable, then perform some kind of action, then store the result back + # into the variable. This method handles that by first fetching the value, + # then yielding to the block, then storing the result. + def with_opassign(node) + case node.target + when ARefField + builder.putnil + visit(node.target.collection) + visit(node.target.index) + + builder.dupn(2) + builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) + + yield + + builder.setn(3) + builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + builder.pop + when ConstPathField + name = node.target.constant.value.to_sym + + visit(node.target.parent) + builder.dup + builder.putobject(true) + builder.getconstant(name) + + yield + + if node.operator.value == "&&=" + builder.dupn(2) + else + builder.swap + builder.topn(1) + end + + builder.swap + builder.setconstant(name) + when TopConstField + name = node.target.constant.value.to_sym + + builder.putobject(Object) + builder.dup + builder.putobject(true) + builder.getconstant(name) + + yield + + if node.operator.value == "&&=" + builder.dupn(2) + else + builder.swap + builder.topn(1) + end + + builder.swap + builder.setconstant(name) + when VarField + case node.target.value + when Const + names = constant_names(node.target) + builder.opt_getconstant_path(names) + + yield + + builder.dup + builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.setconstant(names.last) + when CVar + name = node.target.value.value.to_sym + builder.getclassvariable(name) + + yield + + builder.dup + builder.setclassvariable(name) + when GVar + name = node.target.value.value.to_sym + builder.getglobal(name) + + yield + + builder.dup + builder.setglobal(name) + when Ident + local_variable = visit(node.target) + builder.getlocal(local_variable.index, local_variable.level) + + yield + + builder.dup + builder.setlocal(local_variable.index, local_variable.level) + when IVar + name = node.target.value.value.to_sym + builder.getinstancevariable(name) + + yield + + builder.dup + builder.setinstancevariable(name) + end + end + end + end +end diff --git a/lib/syntax_tree/visitor/compiler.rb b/lib/syntax_tree/visitor/compiler.rb deleted file mode 100644 index 82155d37..00000000 --- a/lib/syntax_tree/visitor/compiler.rb +++ /dev/null @@ -1,2719 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - class Visitor - # This class is an experiment in transforming Syntax Tree nodes into their - # corresponding YARV instruction sequences. It attempts to mirror the - # behavior of RubyVM::InstructionSequence.compile. - # - # You use this as with any other visitor. First you parse code into a tree, - # then you visit it with this compiler. Visiting the root node of the tree - # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. - # With that object you can call #to_a on it, which will return a serialized - # form of the instruction sequence as an array. This array _should_ mirror - # the array given by RubyVM::InstructionSequence#to_a. - # - # As an example, here is how you would compile a single expression: - # - # program = SyntaxTree.parse("1 + 2") - # program.accept(SyntaxTree::Visitor::Compiler.new).to_a - # - # [ - # "YARVInstructionSequence/SimpleDataFormat", - # 3, - # 1, - # 1, - # {:arg_size=>0, :local_size=>0, :stack_max=>2}, - # "", - # "", - # "", - # 1, - # :top, - # [], - # {}, - # [], - # [ - # [:putobject_INT2FIX_1_], - # [:putobject, 2], - # [:opt_plus, {:mid=>:+, :flag=>16, :orig_argc=>1}], - # [:leave] - # ] - # ] - # - # Note that this is the same output as calling: - # - # RubyVM::InstructionSequence.compile("1 + 2").to_a - # - class Compiler < BasicVisitor - # This visitor is responsible for converting Syntax Tree nodes into their - # corresponding Ruby structures. This is used to convert the operands of - # some instructions like putobject that push a Ruby object directly onto - # the stack. It is only used when the entire structure can be represented - # at compile-time, as opposed to constructed at run-time. - class RubyVisitor < BasicVisitor - # This error is raised whenever a node cannot be converted into a Ruby - # object at compile-time. - class CompilationError < StandardError - end - - # This will attempt to compile the given node. If it's possible, then - # it will return the compiled object. Otherwise it will return nil. - def self.compile(node) - node.accept(new) - rescue CompilationError - end - - def visit_array(node) - visit_all(node.contents.parts) - end - - def visit_bare_assoc_hash(node) - node.assocs.to_h do |assoc| - # We can only convert regular key-value pairs. A double splat ** - # operator means it has to be converted at run-time. - raise CompilationError unless assoc.is_a?(Assoc) - [visit(assoc.key), visit(assoc.value)] - end - end - - def visit_float(node) - node.value.to_f - end - - alias visit_hash visit_bare_assoc_hash - - def visit_imaginary(node) - node.value.to_c - end - - def visit_int(node) - node.value.to_i - end - - def visit_label(node) - node.value.chomp(":").to_sym - end - - def visit_mrhs(node) - visit_all(node.parts) - end - - def visit_qsymbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_qwords(node) - visit_all(node.elements) - end - - def visit_range(node) - left, right = [visit(node.left), visit(node.right)] - node.operator.value === ".." ? left..right : left...right - end - - def visit_rational(node) - node.value.to_r - end - - def visit_regexp_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) - else - # Any interpolation of expressions or variables will result in the - # regular expression being constructed at run-time. - raise CompilationError - end - end - - # This isn't actually a visit method, though maybe it should be. It is - # responsible for converting the set of string options on a regular - # expression into its equivalent integer. - def visit_regexp_literal_flags(node) - node - .options - .chars - .inject(0) do |accum, option| - accum | - case option - when "i" - Regexp::IGNORECASE - when "x" - Regexp::EXTENDED - when "m" - Regexp::MULTILINE - else - raise "Unknown regexp option: #{option}" - end - end - end - - def visit_symbol_literal(node) - node.value.value.to_sym - end - - def visit_symbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_tstring_content(node) - node.value - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - node.parts.first.value - else - # Any interpolation of expressions or variables will result in the - # string being constructed at run-time. - raise CompilationError - end - end - - def visit_words(node) - visit_all(node.elements) - end - - def visit_unsupported(_node) - raise CompilationError - end - - # Please forgive the metaprogramming here. This is used to create visit - # methods for every node that we did not explicitly handle. By default - # each of these methods will raise a CompilationError. - handled = instance_methods(false) - (Visitor.instance_methods(false) - handled).each do |method| - alias_method method, :visit_unsupported - end - end - - # This object is used to track the size of the stack at any given time. It - # is effectively a mini symbolic interpreter. It's necessary because when - # instruction sequences get serialized they include a :stack_max field on - # them. This field is used to determine how much stack space to allocate - # for the instruction sequence. - class Stack - attr_reader :current_size, :maximum_size - - def initialize - @current_size = 0 - @maximum_size = 0 - end - - def change_by(value) - @current_size += value - @maximum_size = @current_size if @current_size > @maximum_size - end - end - - # This represents every local variable associated with an instruction - # sequence. There are two kinds of locals: plain locals that are what you - # expect, and block proxy locals, which represent local variables - # associated with blocks that were passed into the current instruction - # sequence. - class LocalTable - # A local representing a block passed into the current instruction - # sequence. - class BlockLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # A regular local variable. - class PlainLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # The result of looking up a local variable in the current local table. - class Lookup - attr_reader :local, :index, :level - - def initialize(local, index, level) - @local = local - @index = index - @level = level - end - end - - attr_reader :locals - - def initialize - @locals = [] - end - - def find(name, level) - index = locals.index { |local| local.name == name } - Lookup.new(locals[index], index, level) if index - end - - def has?(name) - locals.any? { |local| local.name == name } - end - - def names - locals.map(&:name) - end - - def size - locals.length - end - - # Add a BlockLocal to the local table. - def block(name) - locals << BlockLocal.new(name) unless has?(name) - end - - # Add a PlainLocal to the local table. - def plain(name) - locals << PlainLocal.new(name) unless has?(name) - end - - # This is the offset from the top of the stack where this local variable - # lives. - def offset(index) - size - (index - 3) - 1 - end - end - - # This class is meant to mirror RubyVM::InstructionSequence. It contains a - # list of instructions along with the metadata pertaining to them. It also - # functions as a builder for the instruction sequence. - class InstructionSequence - MAGIC = "YARVInstructionSequence/SimpleDataFormat" - - # The type of the instruction sequence. - attr_reader :type - - # The name of the instruction sequence. - attr_reader :name - - # The parent instruction sequence, if there is one. - attr_reader :parent_iseq - - # The location of the root node of this instruction sequence. - attr_reader :location - - # This is the list of information about the arguments to this - # instruction sequence. - attr_accessor :argument_size - attr_reader :argument_options - - # The list of instructions for this instruction sequence. - attr_reader :insns - - # The table of local variables. - attr_reader :local_table - - # The hash of names of instance and class variables pointing to the - # index of their associated inline storage. - attr_reader :inline_storages - - # The index of the next inline storage that will be created. - attr_reader :storage_index - - # An object that will track the current size of the stack and the - # maximum size of the stack for this instruction sequence. - attr_reader :stack - - def initialize(type, name, parent_iseq, location) - @type = type - @name = name - @parent_iseq = parent_iseq - @location = location - - @argument_size = 0 - @argument_options = {} - - @local_table = LocalTable.new - @inline_storages = {} - @insns = [] - @storage_index = 0 - @stack = Stack.new - end - - def local_variable(name, level = 0) - if (lookup = local_table.find(name, level)) - lookup - elsif parent_iseq - parent_iseq.local_variable(name, level + 1) - end - end - - def push(insn) - insns << insn - insn - end - - def inline_storage - storage = storage_index - @storage_index += 1 - storage - end - - def inline_storage_for(name) - unless inline_storages.key?(name) - inline_storages[name] = inline_storage - end - - inline_storages[name] - end - - def length - insns.inject(0) do |sum, insn| - insn.is_a?(Array) ? sum + insn.length : sum - end - end - - def each_child - insns.each do |insn| - insn[1..].each do |operand| - yield operand if operand.is_a?(InstructionSequence) - end - end - end - - def to_a - versions = RUBY_VERSION.split(".").map(&:to_i) - - [ - MAGIC, - versions[0], - versions[1], - 1, - { - arg_size: argument_size, - local_size: local_table.size, - stack_max: stack.maximum_size - }, - name, - "", - "", - location.start_line, - type, - local_table.names, - argument_options, - [], - insns.map { |insn| serialize(insn) } - ] - end - - private - - def serialize(insn) - case insn[0] - when :checkkeyword, :getblockparam, :getblockparamproxy, - :getlocal_WC_0, :getlocal_WC_1, :getlocal, :setlocal_WC_0, - :setlocal_WC_1, :setlocal - iseq = self - - case insn[0] - when :getlocal_WC_1, :setlocal_WC_1 - iseq = iseq.parent_iseq - when :getblockparam, :getblockparamproxy, :getlocal, :setlocal - insn[2].times { iseq = iseq.parent_iseq } - end - - # Here we need to map the local variable index to the offset - # from the top of the stack where it will be stored. - [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] - when :defineclass - [insn[0], insn[1], insn[2].to_a, insn[3]] - when :definemethod, :definesmethod - [insn[0], insn[1], insn[2].to_a] - when :send - # For any instructions that push instruction sequences onto the - # stack, we need to call #to_a on them as well. - [insn[0], insn[1], (insn[2].to_a if insn[2])] - when :once - [insn[0], insn[1].to_a, insn[2]] - else - insn - end - end - end - - # This class serves as a layer of indirection between the instruction - # sequence and the compiler. It allows us to provide different behavior - # for certain instructions depending on the Ruby version. For example, - # class variable reads and writes gained an inline cache in Ruby 3.0. So - # we place the logic for checking the Ruby version in this class. - class Builder - attr_reader :iseq, :stack - attr_reader :frozen_string_literal, - :operands_unification, - :specialized_instruction - - def initialize( - iseq, - frozen_string_literal: false, - operands_unification: true, - specialized_instruction: true - ) - @iseq = iseq - @stack = iseq.stack - - @frozen_string_literal = frozen_string_literal - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction - end - - # This creates a new label at the current length of the instruction - # sequence. It is used as the operand for jump instructions. - def label - name = :"label_#{iseq.length}" - iseq.insns.last == name ? name : event(name) - end - - def event(name) - iseq.push(name) - name - end - - def adjuststack(number) - stack.change_by(-number) - iseq.push([:adjuststack, number]) - end - - def anytostring - stack.change_by(-2 + 1) - iseq.push([:anytostring]) - end - - def branchif(index) - stack.change_by(-1) - iseq.push([:branchif, index]) - end - - def branchnil(index) - stack.change_by(-1) - iseq.push([:branchnil, index]) - end - - def branchunless(index) - stack.change_by(-1) - iseq.push([:branchunless, index]) - end - - def checkkeyword(index, keyword_index) - stack.change_by(+1) - iseq.push([:checkkeyword, index, keyword_index]) - end - - def concatarray - stack.change_by(-2 + 1) - iseq.push([:concatarray]) - end - - def concatstrings(number) - stack.change_by(-number + 1) - iseq.push([:concatstrings, number]) - end - - def defined(type, name, message) - stack.change_by(-1 + 1) - iseq.push([:defined, type, name, message]) - end - - def defineclass(name, class_iseq, flags) - stack.change_by(-2 + 1) - iseq.push([:defineclass, name, class_iseq, flags]) - end - - def definemethod(name, method_iseq) - stack.change_by(0) - iseq.push([:definemethod, name, method_iseq]) - end - - def definesmethod(name, method_iseq) - stack.change_by(-1) - iseq.push([:definesmethod, name, method_iseq]) - end - - def dup - stack.change_by(-1 + 2) - iseq.push([:dup]) - end - - def duparray(object) - stack.change_by(+1) - iseq.push([:duparray, object]) - end - - def duphash(object) - stack.change_by(+1) - iseq.push([:duphash, object]) - end - - def dupn(number) - stack.change_by(+number) - iseq.push([:dupn, number]) - end - - def expandarray(length, flag) - stack.change_by(-1 + length) - iseq.push([:expandarray, length, flag]) - end - - def getblockparam(index, level) - stack.change_by(+1) - iseq.push([:getblockparam, index, level]) - end - - def getblockparamproxy(index, level) - stack.change_by(+1) - iseq.push([:getblockparamproxy, index, level]) - end - - def getclassvariable(name) - stack.change_by(+1) - - if RUBY_VERSION >= "3.0" - iseq.push([:getclassvariable, name, iseq.inline_storage_for(name)]) - else - iseq.push([:getclassvariable, name]) - end - end - - def getconstant(name) - stack.change_by(-2 + 1) - iseq.push([:getconstant, name]) - end - - def getglobal(name) - stack.change_by(+1) - iseq.push([:getglobal, name]) - end - - def getinstancevariable(name) - stack.change_by(+1) - - if RUBY_VERSION >= "3.2" - iseq.push([:getinstancevariable, name, iseq.inline_storage]) - else - inline_storage = iseq.inline_storage_for(name) - iseq.push([:getinstancevariable, name, inline_storage]) - end - end - - def getlocal(index, level) - stack.change_by(+1) - - if operands_unification - # Specialize the getlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will look at the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - iseq.push([:getlocal_WC_0, index]) - when 1 - iseq.push([:getlocal_WC_1, index]) - else - iseq.push([:getlocal, index, level]) - end - else - iseq.push([:getlocal, index, level]) - end - end - - def getspecial(key, type) - stack.change_by(-0 + 1) - iseq.push([:getspecial, key, type]) - end - - def intern - stack.change_by(-1 + 1) - iseq.push([:intern]) - end - - def invokeblock(method_id, argc, flag) - stack.change_by(-argc + 1) - iseq.push([:invokeblock, call_data(method_id, argc, flag)]) - end - - def invokesuper(method_id, argc, flag, block_iseq) - stack.change_by(-(argc + 1) + 1) - - cdata = call_data(method_id, argc, flag) - iseq.push([:invokesuper, cdata, block_iseq]) - end - - def jump(index) - stack.change_by(0) - iseq.push([:jump, index]) - end - - def leave - stack.change_by(-1) - iseq.push([:leave]) - end - - def newarray(length) - stack.change_by(-length + 1) - iseq.push([:newarray, length]) - end - - def newhash(length) - stack.change_by(-length + 1) - iseq.push([:newhash, length]) - end - - def newrange(flag) - stack.change_by(-2 + 1) - iseq.push([:newrange, flag]) - end - - def nop - stack.change_by(0) - iseq.push([:nop]) - end - - def objtostring(method_id, argc, flag) - stack.change_by(-1 + 1) - iseq.push([:objtostring, call_data(method_id, argc, flag)]) - end - - def once(postexe_iseq, inline_storage) - stack.change_by(+1) - iseq.push([:once, postexe_iseq, inline_storage]) - end - - def opt_getconstant_path(names) - if RUBY_VERSION >= "3.2" - stack.change_by(+1) - iseq.push([:opt_getconstant_path, names]) - else - inline_storage = iseq.inline_storage - getinlinecache = opt_getinlinecache(-1, inline_storage) - - if names[0] == :"" - names.shift - pop - putobject(Object) - end - - names.each_with_index do |name, index| - putobject(index == 0) - getconstant(name) - end - - opt_setinlinecache(inline_storage) - getinlinecache[1] = label - end - end - - def opt_getinlinecache(offset, inline_storage) - stack.change_by(+1) - iseq.push([:opt_getinlinecache, offset, inline_storage]) - end - - def opt_newarray_max(length) - if specialized_instruction - stack.change_by(-length + 1) - iseq.push([:opt_newarray_max, length]) - else - newarray(length) - send(:max, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_newarray_min(length) - if specialized_instruction - stack.change_by(-length + 1) - iseq.push([:opt_newarray_min, length]) - else - newarray(length) - send(:min, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_setinlinecache(inline_storage) - stack.change_by(-1 + 1) - iseq.push([:opt_setinlinecache, inline_storage]) - end - - def opt_str_freeze(value) - if specialized_instruction - stack.change_by(+1) - iseq.push( - [ - :opt_str_freeze, - value, - call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE) - ] - ) - else - putstring(value) - send(:freeze, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_str_uminus(value) - if specialized_instruction - stack.change_by(+1) - iseq.push( - [:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)] - ) - else - putstring(value) - send(:-@, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def pop - stack.change_by(-1) - iseq.push([:pop]) - end - - def putnil - stack.change_by(+1) - iseq.push([:putnil]) - end - - def putobject(object) - stack.change_by(+1) - - if operands_unification - # Specialize the putobject instruction based on the value of the - # object. If it's 0 or 1, then there's a specialized instruction - # that will push the object onto the stack and requires fewer - # operands. - if object.eql?(0) - iseq.push([:putobject_INT2FIX_0_]) - elsif object.eql?(1) - iseq.push([:putobject_INT2FIX_1_]) - else - iseq.push([:putobject, object]) - end - else - iseq.push([:putobject, object]) - end - end - - def putself - stack.change_by(+1) - iseq.push([:putself]) - end - - def putspecialobject(object) - stack.change_by(+1) - iseq.push([:putspecialobject, object]) - end - - def putstring(object) - stack.change_by(+1) - iseq.push([:putstring, object]) - end - - def send(method_id, argc, flag, block_iseq = nil) - stack.change_by(-(argc + 1) + 1) - cdata = call_data(method_id, argc, flag) - - if specialized_instruction - # Specialize the send instruction. If it doesn't have a block - # attached, then we will replace it with an opt_send_without_block - # and do further specializations based on the called method and the - # number of arguments. - - # stree-ignore - if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 - case [method_id, argc] - when [:length, 0] then iseq.push([:opt_length, cdata]) - when [:size, 0] then iseq.push([:opt_size, cdata]) - when [:empty?, 0] then iseq.push([:opt_empty_p, cdata]) - when [:nil?, 0] then iseq.push([:opt_nil_p, cdata]) - when [:succ, 0] then iseq.push([:opt_succ, cdata]) - when [:!, 0] then iseq.push([:opt_not, cdata]) - when [:+, 1] then iseq.push([:opt_plus, cdata]) - when [:-, 1] then iseq.push([:opt_minus, cdata]) - when [:*, 1] then iseq.push([:opt_mult, cdata]) - when [:/, 1] then iseq.push([:opt_div, cdata]) - when [:%, 1] then iseq.push([:opt_mod, cdata]) - when [:==, 1] then iseq.push([:opt_eq, cdata]) - when [:=~, 1] then iseq.push([:opt_regexpmatch2, cdata]) - when [:<, 1] then iseq.push([:opt_lt, cdata]) - when [:<=, 1] then iseq.push([:opt_le, cdata]) - when [:>, 1] then iseq.push([:opt_gt, cdata]) - when [:>=, 1] then iseq.push([:opt_ge, cdata]) - when [:<<, 1] then iseq.push([:opt_ltlt, cdata]) - when [:[], 1] then iseq.push([:opt_aref, cdata]) - when [:&, 1] then iseq.push([:opt_and, cdata]) - when [:|, 1] then iseq.push([:opt_or, cdata]) - when [:[]=, 2] then iseq.push([:opt_aset, cdata]) - when [:!=, 1] - eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) - iseq.push([:opt_neq, eql_data, cdata]) - else - iseq.push([:opt_send_without_block, cdata]) - end - else - iseq.push([:send, cdata, block_iseq]) - end - else - iseq.push([:send, cdata, block_iseq]) - end - end - - def setclassvariable(name) - stack.change_by(-1) - - if RUBY_VERSION >= "3.0" - iseq.push([:setclassvariable, name, iseq.inline_storage_for(name)]) - else - iseq.push([:setclassvariable, name]) - end - end - - def setconstant(name) - stack.change_by(-2) - iseq.push([:setconstant, name]) - end - - def setglobal(name) - stack.change_by(-1) - iseq.push([:setglobal, name]) - end - - def setinstancevariable(name) - stack.change_by(-1) - - if RUBY_VERSION >= "3.2" - iseq.push([:setinstancevariable, name, iseq.inline_storage]) - else - inline_storage = iseq.inline_storage_for(name) - iseq.push([:setinstancevariable, name, inline_storage]) - end - end - - def setlocal(index, level) - stack.change_by(-1) - - if operands_unification - # Specialize the setlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will write to the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - iseq.push([:setlocal_WC_0, index]) - when 1 - iseq.push([:setlocal_WC_1, index]) - else - iseq.push([:setlocal, index, level]) - end - else - iseq.push([:setlocal, index, level]) - end - end - - def setn(number) - stack.change_by(-1 + 1) - iseq.push([:setn, number]) - end - - def splatarray(flag) - stack.change_by(-1 + 1) - iseq.push([:splatarray, flag]) - end - - def swap - stack.change_by(-2 + 2) - iseq.push([:swap]) - end - - def topn(number) - stack.change_by(+1) - iseq.push([:topn, number]) - end - - def toregexp(options, length) - stack.change_by(-length + 1) - iseq.push([:toregexp, options, length]) - end - - private - - # This creates a call data object that is used as the operand for the - # send, invokesuper, and objtostring instructions. - def call_data(method_id, argc, flag) - { mid: method_id, flag: flag, orig_argc: argc } - end - end - - # These constants correspond to the putspecialobject instruction. They are - # used to represent special objects that are pushed onto the stack. - VM_SPECIAL_OBJECT_VMCORE = 1 - VM_SPECIAL_OBJECT_CBASE = 2 - VM_SPECIAL_OBJECT_CONST_BASE = 3 - - # These constants correspond to the flag passed as part of the call data - # structure on the send instruction. They are used to represent various - # metadata about the callsite (e.g., were keyword arguments used?, was a - # block given?, etc.). - VM_CALL_ARGS_SPLAT = 1 << 0 - VM_CALL_ARGS_BLOCKARG = 1 << 1 - VM_CALL_FCALL = 1 << 2 - VM_CALL_VCALL = 1 << 3 - VM_CALL_ARGS_SIMPLE = 1 << 4 - VM_CALL_BLOCKISEQ = 1 << 5 - VM_CALL_KWARG = 1 << 6 - VM_CALL_KW_SPLAT = 1 << 7 - VM_CALL_TAILCALL = 1 << 8 - VM_CALL_SUPER = 1 << 9 - VM_CALL_ZSUPER = 1 << 10 - VM_CALL_OPT_SEND = 1 << 11 - VM_CALL_KW_SPLAT_MUT = 1 << 12 - - # These constants correspond to the value passed as part of the defined - # instruction. It's an enum defined in the CRuby codebase that tells that - # instruction what kind of defined check to perform. - DEFINED_NIL = 1 - DEFINED_IVAR = 2 - DEFINED_LVAR = 3 - DEFINED_GVAR = 4 - DEFINED_CVAR = 5 - DEFINED_CONST = 6 - DEFINED_METHOD = 7 - DEFINED_YIELD = 8 - DEFINED_ZSUPER = 9 - DEFINED_SELF = 10 - DEFINED_TRUE = 11 - DEFINED_FALSE = 12 - DEFINED_ASGN = 13 - DEFINED_EXPR = 14 - DEFINED_REF = 15 - DEFINED_FUNC = 16 - DEFINED_CONST_FROM = 17 - - # These constants correspond to the value passed in the flags as part of - # the defineclass instruction. - VM_DEFINECLASS_TYPE_CLASS = 0 - VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 1 - VM_DEFINECLASS_TYPE_MODULE = 2 - VM_DEFINECLASS_FLAG_SCOPED = 8 - VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 16 - - # These options mirror the compilation options that we currently support - # that can be also passed to RubyVM::InstructionSequence.compile. - attr_reader :frozen_string_literal, - :operands_unification, - :specialized_instruction - - # The current instruction sequence that is being compiled. - attr_reader :current_iseq - - # This is the current builder that is being used to construct the current - # instruction sequence. - attr_reader :builder - - # A boolean to track if we're currently compiling the last statement - # within a set of statements. This information is necessary to determine - # if we need to return the value of the last statement. - attr_reader :last_statement - - def initialize( - frozen_string_literal: false, - operands_unification: true, - specialized_instruction: true - ) - @frozen_string_literal = frozen_string_literal - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction - - @current_iseq = nil - @builder = nil - @last_statement = false - end - - def visit_BEGIN(node) - visit(node.statements) - end - - def visit_CHAR(node) - if frozen_string_literal - builder.putobject(node.value[1..]) - else - builder.putstring(node.value[1..]) - end - end - - def visit_END(node) - name = "block in #{current_iseq.name}" - once_iseq = - with_instruction_sequence(:block, name, current_iseq, node) do - postexe_iseq = - with_instruction_sequence(:block, name, current_iseq, node) do - *statements, last_statement = node.statements.body - visit_all(statements) - with_last_statement { visit(last_statement) } - builder.leave - end - - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.send(:"core#set_postexe", 0, VM_CALL_FCALL, postexe_iseq) - builder.leave - end - - builder.once(once_iseq, current_iseq.inline_storage) - builder.pop - end - - def visit_alias(node) - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) - visit(node.left) - visit(node.right) - builder.send(:"core#set_method_alias", 3, VM_CALL_ARGS_SIMPLE) - end - - def visit_aref(node) - visit(node.collection) - visit(node.index) - builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) - end - - def visit_arg_block(node) - visit(node.value) - end - - def visit_arg_paren(node) - visit(node.arguments) - end - - def visit_arg_star(node) - visit(node.value) - builder.splatarray(false) - end - - def visit_args(node) - visit_all(node.parts) - end - - def visit_array(node) - if (compiled = RubyVisitor.compile(node)) - builder.duparray(compiled) - else - length = 0 - - node.contents.parts.each do |part| - if part.is_a?(ArgStar) - if length > 0 - builder.newarray(length) - length = 0 - end - - visit(part.value) - builder.concatarray - else - visit(part) - length += 1 - end - end - - builder.newarray(length) if length > 0 - if length > 0 && length != node.contents.parts.length - builder.concatarray - end - end - end - - def visit_assign(node) - case node.target - when ARefField - builder.putnil - visit(node.target.collection) - visit(node.target.index) - visit(node.value) - builder.setn(3) - builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) - builder.pop - when ConstPathField - names = constant_names(node.target) - name = names.pop - - if RUBY_VERSION >= "3.2" - builder.opt_getconstant_path(names) - visit(node.value) - builder.swap - builder.topn(1) - builder.swap - builder.setconstant(name) - else - visit(node.value) - builder.dup if last_statement? - builder.opt_getconstant_path(names) - builder.setconstant(name) - end - when Field - builder.putnil - visit(node.target) - visit(node.value) - builder.setn(2) - builder.send(:"#{node.target.name.value}=", 1, VM_CALL_ARGS_SIMPLE) - builder.pop - when TopConstField - name = node.target.constant.value.to_sym - - if RUBY_VERSION >= "3.2" - builder.putobject(Object) - visit(node.value) - builder.swap - builder.topn(1) - builder.swap - builder.setconstant(name) - else - visit(node.value) - builder.dup if last_statement? - builder.putobject(Object) - builder.setconstant(name) - end - when VarField - visit(node.value) - builder.dup if last_statement? - - case node.target.value - when Const - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(node.target.value.value.to_sym) - when CVar - builder.setclassvariable(node.target.value.value.to_sym) - when GVar - builder.setglobal(node.target.value.value.to_sym) - when Ident - local_variable = visit(node.target) - builder.setlocal(local_variable.index, local_variable.level) - when IVar - builder.setinstancevariable(node.target.value.value.to_sym) - end - end - end - - def visit_assoc(node) - visit(node.key) - visit(node.value) - end - - def visit_assoc_splat(node) - visit(node.value) - end - - def visit_backref(node) - builder.getspecial(1, 2 * node.value[1..].to_i) - end - - def visit_bare_assoc_hash(node) - if (compiled = RubyVisitor.compile(node)) - builder.duphash(compiled) - else - visit_all(node.assocs) - end - end - - def visit_binary(node) - case node.operator - when :"&&" - visit(node.left) - builder.dup - - branchunless = builder.branchunless(-1) - builder.pop - - visit(node.right) - branchunless[1] = builder.label - when :"||" - visit(node.left) - builder.dup - - branchif = builder.branchif(-1) - builder.pop - - visit(node.right) - branchif[1] = builder.label - else - visit(node.left) - visit(node.right) - builder.send(node.operator, 1, VM_CALL_ARGS_SIMPLE) - end - end - - def visit_block(node) - with_instruction_sequence( - :block, - "block in #{current_iseq.name}", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_B_CALL) - visit(node.block_var) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave - end - end - - def visit_block_var(node) - params = node.params - - if params.requireds.length == 1 && params.optionals.empty? && - !params.rest && params.posts.empty? && params.keywords.empty? && - !params.keyword_rest && !params.block - current_iseq.argument_options[:ambiguous_param0] = true - end - - visit(node.params) - - node.locals.each do |local| - current_iseq.local_table.plain(local.value.to_sym) - end - end - - def visit_blockarg(node) - current_iseq.argument_options[:block_start] = current_iseq.argument_size - current_iseq.local_table.block(node.name.value.to_sym) - current_iseq.argument_size += 1 - end - - def visit_bodystmt(node) - visit(node.statements) - end - - def visit_call(node) - if node.is_a?(CallNode) - return( - visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: nil, - location: node.location - ) - ) - ) - end - - arg_parts = argument_parts(node.arguments) - argc = arg_parts.length - - # First we're going to check if we're calling a method on an array - # literal without any arguments. In that case there are some - # specializations we might be able to perform. - if argc == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) - case node.receiver - when ArrayLiteral - parts = node.receiver.contents&.parts || [] - - if parts.none? { |part| part.is_a?(ArgStar) } && - RubyVisitor.compile(node.receiver).nil? - case node.message.value - when "max" - visit(node.receiver.contents) - builder.opt_newarray_max(parts.length) - return - when "min" - visit(node.receiver.contents) - builder.opt_newarray_min(parts.length) - return - end - end - when StringLiteral - if RubyVisitor.compile(node.receiver).nil? - case node.message.value - when "-@" - builder.opt_str_uminus(node.receiver.parts.first.value) - return - when "freeze" - builder.opt_str_freeze(node.receiver.parts.first.value) - return - end - end - end - end - - if node.receiver - if node.receiver.is_a?(VarRef) && - ( - lookup = - current_iseq.local_variable(node.receiver.value.value.to_sym) - ) && lookup.local.is_a?(LocalTable::BlockLocal) - builder.getblockparamproxy(lookup.index, lookup.level) - else - visit(node.receiver) - end - else - builder.putself - end - - branchnil = - if node.operator&.value == "&." - builder.dup - builder.branchnil(-1) - end - - flag = 0 - - arg_parts.each do |arg_part| - case arg_part - when ArgBlock - argc -= 1 - flag |= VM_CALL_ARGS_BLOCKARG - visit(arg_part) - when ArgStar - flag |= VM_CALL_ARGS_SPLAT - visit(arg_part) - when ArgsForward - flag |= VM_CALL_ARGS_SPLAT | VM_CALL_ARGS_BLOCKARG - - lookup = current_iseq.local_table.find(:*, 0) - builder.getlocal(lookup.index, lookup.level) - builder.splatarray(arg_parts.length != 1) - - lookup = current_iseq.local_table.find(:&, 0) - builder.getblockparamproxy(lookup.index, lookup.level) - when BareAssocHash - flag |= VM_CALL_KW_SPLAT - visit(arg_part) - else - visit(arg_part) - end - end - - block_iseq = visit(node.block) if node.block - flag |= VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 - flag |= VM_CALL_FCALL if node.receiver.nil? - - builder.send(node.message.value.to_sym, argc, flag, block_iseq) - branchnil[1] = builder.label if branchnil - end - - def visit_case(node) - visit(node.value) if node.value - - clauses = [] - else_clause = nil - - current = node.consequent - - while current - clauses << current - - if (current = current.consequent).is_a?(Else) - else_clause = current - break - end - end - - branches = - clauses.map do |clause| - visit(clause.arguments) - builder.topn(1) - builder.send(:===, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) - [clause, builder.branchif(:label_00)] - end - - builder.pop - - else_clause ? visit(else_clause) : builder.putnil - - builder.leave - - branches.each_with_index do |(clause, branchif), index| - builder.leave if index != 0 - branchif[1] = builder.label - builder.pop - visit(clause) - end - end - - def visit_class(node) - name = node.constant.constant.value.to_sym - class_iseq = - with_instruction_sequence( - :class, - "", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave - end - - flags = VM_DEFINECLASS_TYPE_CLASS - - case node.constant - when ConstPathRef - flags |= VM_DEFINECLASS_FLAG_SCOPED - visit(node.constant.parent) - when ConstRef - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - when TopConstRef - flags |= VM_DEFINECLASS_FLAG_SCOPED - builder.putobject(Object) - end - - if node.superclass - flags |= VM_DEFINECLASS_FLAG_HAS_SUPERCLASS - visit(node.superclass) - else - builder.putnil - end - - builder.defineclass(name, class_iseq, flags) - end - - def visit_command(node) - visit_call( - CommandCall.new( - receiver: nil, - operator: nil, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_command_call(node) - visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_const_path_field(node) - visit(node.parent) - end - - def visit_const_path_ref(node) - names = constant_names(node) - builder.opt_getconstant_path(names) - end - - def visit_def(node) - method_iseq = - with_instruction_sequence( - :method, - node.name.value, - current_iseq, - node - ) do - visit(node.params) if node.params - builder.event(:RUBY_EVENT_CALL) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_RETURN) - builder.leave - end - - name = node.name.value.to_sym - - if node.target - visit(node.target) - builder.definesmethod(name, method_iseq) - else - builder.definemethod(name, method_iseq) - end - - builder.putobject(name) - end - - def visit_defined(node) - case node.value - when Assign - # If we're assigning to a local variable, then we need to make sure - # that we put it into the local table. - if node.value.target.is_a?(VarField) && - node.value.target.value.is_a?(Ident) - current_iseq.local_table.plain(node.value.target.value.value.to_sym) - end - - builder.putobject("assignment") - when VarRef - value = node.value.value - name = value.value.to_sym - - case value - when Const - builder.putnil - builder.defined(DEFINED_CONST, name, "constant") - when CVar - builder.putnil - builder.defined(DEFINED_CVAR, name, "class variable") - when GVar - builder.putnil - builder.defined(DEFINED_GVAR, name, "global-variable") - when Ident - builder.putobject("local-variable") - when IVar - builder.putnil - builder.defined(DEFINED_IVAR, name, "instance-variable") - when Kw - case name - when :false - builder.putobject("false") - when :nil - builder.putobject("nil") - when :self - builder.putobject("self") - when :true - builder.putobject("true") - end - end - when VCall - builder.putself - - name = node.value.value.value.to_sym - builder.defined(DEFINED_FUNC, name, "method") - when YieldNode - builder.putnil - builder.defined(DEFINED_YIELD, false, "yield") - when ZSuper - builder.putnil - builder.defined(DEFINED_ZSUPER, false, "super") - else - builder.putobject("expression") - end - end - - def visit_dyna_symbol(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - builder.putobject(node.parts.first.value.to_sym) - end - end - - def visit_else(node) - visit(node.statements) - builder.pop unless last_statement? - end - - def visit_elsif(node) - visit_if( - IfNode.new( - predicate: node.predicate, - statements: node.statements, - consequent: node.consequent, - location: node.location - ) - ) - end - - def visit_field(node) - visit(node.parent) - end - - def visit_float(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_for(node) - visit(node.collection) - - name = node.index.value.value.to_sym - current_iseq.local_table.plain(name) - - block_iseq = - with_instruction_sequence( - :block, - "block in #{current_iseq.name}", - current_iseq, - node.statements - ) do - current_iseq.argument_options[:lead_num] ||= 0 - current_iseq.argument_options[:lead_num] += 1 - current_iseq.argument_options[:ambiguous_param0] = true - - current_iseq.argument_size += 1 - current_iseq.local_table.plain(2) - - builder.getlocal(0, 0) - - local_variable = current_iseq.local_variable(name) - builder.setlocal(local_variable.index, local_variable.level) - - builder.event(:RUBY_EVENT_B_CALL) - builder.nop - - visit(node.statements) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave - end - - builder.send(:each, 0, 0, block_iseq) - end - - def visit_hash(node) - builder.duphash(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - visit_all(node.assocs) - builder.newhash(node.assocs.length * 2) - end - - def visit_heredoc(node) - if node.beginning.value.end_with?("`") - visit_xstring_literal(node) - elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - builder.concatstrings(length) - end - end - - def visit_if(node) - visit(node.predicate) - branchunless = builder.branchunless(-1) - visit(node.statements) - - if last_statement? - builder.leave - branchunless[1] = builder.label - - node.consequent ? visit(node.consequent) : builder.putnil - else - builder.pop - - if node.consequent - jump = builder.jump(-1) - branchunless[1] = builder.label - visit(node.consequent) - jump[1] = builder.label - else - branchunless[1] = builder.label - end - end - end - - def visit_if_op(node) - visit_if( - IfNode.new( - predicate: node.predicate, - statements: node.truthy, - consequent: - Else.new( - keyword: Kw.new(value: "else", location: Location.default), - statements: node.falsy, - location: Location.default - ), - location: Location.default - ) - ) - end - - def visit_imaginary(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_int(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_kwrest_param(node) - current_iseq.argument_options[:kwrest] = current_iseq.argument_size - current_iseq.argument_size += 1 - current_iseq.local_table.plain(node.name.value.to_sym) - end - - def visit_label(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_lambda(node) - lambda_iseq = - with_instruction_sequence( - :block, - "block in #{current_iseq.name}", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_B_CALL) - visit(node.params) - visit(node.statements) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave - end - - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.send(:lambda, 0, VM_CALL_FCALL, lambda_iseq) - end - - def visit_lambda_var(node) - visit_block_var(node) - end - - def visit_massign(node) - visit(node.value) - builder.dup - visit(node.target) - end - - def visit_method_add_block(node) - visit_call( - CommandCall.new( - receiver: node.call.receiver, - operator: node.call.operator, - message: node.call.message, - arguments: node.call.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_mlhs(node) - lookups = [] - - node.parts.each do |part| - case part - when VarField - lookups << visit(part) - end - end - - builder.expandarray(lookups.length, 0) - - lookups.each { |lookup| builder.setlocal(lookup.index, lookup.level) } - end - - def visit_module(node) - name = node.constant.constant.value.to_sym - module_iseq = - with_instruction_sequence( - :class, - "", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave - end - - flags = VM_DEFINECLASS_TYPE_MODULE - - case node.constant - when ConstPathRef - flags |= VM_DEFINECLASS_FLAG_SCOPED - visit(node.constant.parent) - when ConstRef - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - when TopConstRef - flags |= VM_DEFINECLASS_FLAG_SCOPED - builder.putobject(Object) - end - - builder.putnil - builder.defineclass(name, module_iseq, flags) - end - - def visit_mrhs(node) - if (compiled = RubyVisitor.compile(node)) - builder.duparray(compiled) - else - visit_all(node.parts) - builder.newarray(node.parts.length) - end - end - - def visit_not(node) - visit(node.statement) - builder.send(:!, 0, VM_CALL_ARGS_SIMPLE) - end - - def visit_opassign(node) - flag = VM_CALL_ARGS_SIMPLE - if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) - flag |= VM_CALL_FCALL - end - - case (operator = node.operator.value.chomp("=").to_sym) - when :"&&" - branchunless = nil - - with_opassign(node) do - builder.dup - branchunless = builder.branchunless(-1) - builder.pop - visit(node.value) - end - - case node.target - when ARefField - builder.leave - branchunless[1] = builder.label - builder.setn(3) - builder.adjuststack(3) - when ConstPathField, TopConstField - branchunless[1] = builder.label - builder.swap - builder.pop - else - branchunless[1] = builder.label - end - when :"||" - if node.target.is_a?(ConstPathField) || - node.target.is_a?(TopConstField) - opassign_defined(node) - builder.swap - builder.pop - elsif node.target.is_a?(VarField) && - [Const, CVar, GVar].include?(node.target.value.class) - opassign_defined(node) - else - branchif = nil - - with_opassign(node) do - builder.dup - branchif = builder.branchif(-1) - builder.pop - visit(node.value) - end - - if node.target.is_a?(ARefField) - builder.leave - branchif[1] = builder.label - builder.setn(3) - builder.adjuststack(3) - else - branchif[1] = builder.label - end - end - else - with_opassign(node) do - visit(node.value) - builder.send(operator, 1, flag) - end - end - end - - def visit_params(node) - argument_options = current_iseq.argument_options - - if node.requireds.any? - argument_options[:lead_num] = 0 - - node.requireds.each do |required| - current_iseq.local_table.plain(required.value.to_sym) - current_iseq.argument_size += 1 - argument_options[:lead_num] += 1 - end - end - - node.optionals.each do |(optional, value)| - index = current_iseq.local_table.size - name = optional.value.to_sym - - current_iseq.local_table.plain(name) - current_iseq.argument_size += 1 - - unless argument_options.key?(:opt) - argument_options[:opt] = [builder.label] - end - - visit(value) - builder.setlocal(index, 0) - current_iseq.argument_options[:opt] << builder.label - end - - visit(node.rest) if node.rest - - if node.posts.any? - argument_options[:post_start] = current_iseq.argument_size - argument_options[:post_num] = 0 - - node.posts.each do |post| - current_iseq.local_table.plain(post.value.to_sym) - current_iseq.argument_size += 1 - argument_options[:post_num] += 1 - end - end - - if node.keywords.any? - argument_options[:kwbits] = 0 - argument_options[:keyword] = [] - checkkeywords = [] - - node.keywords.each_with_index do |(keyword, value), keyword_index| - name = keyword.value.chomp(":").to_sym - index = current_iseq.local_table.size - - current_iseq.local_table.plain(name) - current_iseq.argument_size += 1 - argument_options[:kwbits] += 1 - - if value.nil? - argument_options[:keyword] << name - else - begin - compiled = value.accept(RubyVisitor.new) - argument_options[:keyword] << [name, compiled] - rescue RubyVisitor::CompilationError - argument_options[:keyword] << [name] - checkkeywords << builder.checkkeyword(-1, keyword_index) - branchif = builder.branchif(-1) - visit(value) - builder.setlocal(index, 0) - branchif[1] = builder.label - end - end - end - - name = node.keyword_rest ? 3 : 2 - current_iseq.argument_size += 1 - current_iseq.local_table.plain(name) - - lookup = current_iseq.local_table.find(name, 0) - checkkeywords.each { |checkkeyword| checkkeyword[1] = lookup.index } - end - - if node.keyword_rest.is_a?(ArgsForward) - current_iseq.local_table.plain(:*) - current_iseq.local_table.plain(:&) - - current_iseq.argument_options[ - :rest_start - ] = current_iseq.argument_size - current_iseq.argument_options[ - :block_start - ] = current_iseq.argument_size + 1 - - current_iseq.argument_size += 2 - elsif node.keyword_rest - visit(node.keyword_rest) - end - - visit(node.block) if node.block - end - - def visit_paren(node) - visit(node.contents) - end - - def visit_program(node) - node.statements.body.each do |statement| - break unless statement.is_a?(Comment) - - if statement.value == "# frozen_string_literal: true" - @frozen_string_literal = true - end - end - - preexes = [] - statements = [] - - node.statements.body.each do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - # ignore - when BEGINBlock - preexes << statement - else - statements << statement - end - end - - with_instruction_sequence(:top, "", nil, node) do - visit_all(preexes) - - if statements.empty? - builder.putnil - else - *statements, last_statement = statements - visit_all(statements) - with_last_statement { visit(last_statement) } - end - - builder.leave - end - end - - def visit_qsymbols(node) - builder.duparray(node.accept(RubyVisitor.new)) - end - - def visit_qwords(node) - if frozen_string_literal - builder.duparray(node.accept(RubyVisitor.new)) - else - visit_all(node.elements) - builder.newarray(node.elements.length) - end - end - - def visit_range(node) - builder.putobject(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - visit(node.left) - visit(node.right) - builder.newrange(node.operator.value == ".." ? 0 : 1) - end - - def visit_rational(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_regexp_literal(node) - builder.putobject(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - flags = RubyVisitor.new.visit_regexp_literal_flags(node) - length = visit_string_parts(node) - builder.toregexp(flags, length) - end - - def visit_rest_param(node) - current_iseq.local_table.plain(node.name.value.to_sym) - current_iseq.argument_options[:rest_start] = current_iseq.argument_size - current_iseq.argument_size += 1 - end - - def visit_sclass(node) - visit(node.target) - builder.putnil - - singleton_iseq = - with_instruction_sequence( - :class, - "singleton class", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave - end - - builder.defineclass( - :singletonclass, - singleton_iseq, - VM_DEFINECLASS_TYPE_SINGLETON_CLASS - ) - end - - def visit_statements(node) - statements = - node.body.select do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - false - else - true - end - end - - statements.empty? ? builder.putnil : visit_all(statements) - end - - def visit_string_concat(node) - value = node.left.parts.first.value + node.right.parts.first.value - content = TStringContent.new(value: value, location: node.location) - - literal = - StringLiteral.new( - parts: [content], - quote: node.left.quote, - location: node.location - ) - visit_string_literal(literal) - end - - def visit_string_embexpr(node) - visit(node.statements) - end - - def visit_string_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - builder.concatstrings(length) - end - end - - def visit_super(node) - builder.putself - visit(node.arguments) - builder.invokesuper( - nil, - argument_parts(node.arguments).length, - VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER, - nil - ) - end - - def visit_symbol_literal(node) - builder.putobject(node.accept(RubyVisitor.new)) - end - - def visit_symbols(node) - builder.duparray(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - node.elements.each do |element| - if element.parts.length == 1 && - element.parts.first.is_a?(TStringContent) - builder.putobject(element.parts.first.value.to_sym) - else - length = visit_string_parts(element) - builder.concatstrings(length) - builder.intern - end - end - - builder.newarray(node.elements.length) - end - - def visit_top_const_ref(node) - builder.opt_getconstant_path(constant_names(node)) - end - - def visit_tstring_content(node) - if frozen_string_literal - builder.putobject(node.accept(RubyVisitor.new)) - else - builder.putstring(node.accept(RubyVisitor.new)) - end - end - - def visit_unary(node) - method_id = - case node.operator - when "+", "-" - "#{node.operator}@" - else - node.operator - end - - visit_call( - CommandCall.new( - receiver: node.statement, - operator: nil, - message: Ident.new(value: method_id, location: Location.default), - arguments: nil, - block: nil, - location: Location.default - ) - ) - end - - def visit_undef(node) - node.symbols.each_with_index do |symbol, index| - builder.pop if index != 0 - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) - visit(symbol) - builder.send(:"core#undef_method", 2, VM_CALL_ARGS_SIMPLE) - end - end - - def visit_unless(node) - visit(node.predicate) - branchunless = builder.branchunless(-1) - node.consequent ? visit(node.consequent) : builder.putnil - - if last_statement? - builder.leave - branchunless[1] = builder.label - - visit(node.statements) - else - builder.pop - - if node.consequent - jump = builder.jump(-1) - branchunless[1] = builder.label - visit(node.consequent) - jump[1] = builder.label - else - branchunless[1] = builder.label - end - end - end - - def visit_until(node) - jumps = [] - - jumps << builder.jump(-1) - builder.putnil - builder.pop - jumps << builder.jump(-1) - - label = builder.label - visit(node.statements) - builder.pop - jumps.each { |jump| jump[1] = builder.label } - - visit(node.predicate) - builder.branchunless(label) - builder.putnil if last_statement? - end - - def visit_var_field(node) - case node.value - when CVar, IVar - name = node.value.value.to_sym - current_iseq.inline_storage_for(name) - when Ident - name = node.value.value.to_sym - - if (local_variable = current_iseq.local_variable(name)) - local_variable - else - current_iseq.local_table.plain(name) - current_iseq.local_variable(name) - end - end - end - - def visit_var_ref(node) - case node.value - when Const - builder.opt_getconstant_path(constant_names(node)) - when CVar - name = node.value.value.to_sym - builder.getclassvariable(name) - when GVar - builder.getglobal(node.value.value.to_sym) - when Ident - lookup = current_iseq.local_variable(node.value.value.to_sym) - - case lookup.local - when LocalTable::BlockLocal - builder.getblockparam(lookup.index, lookup.level) - when LocalTable::PlainLocal - builder.getlocal(lookup.index, lookup.level) - end - when IVar - name = node.value.value.to_sym - builder.getinstancevariable(name) - when Kw - case node.value.value - when "false" - builder.putobject(false) - when "nil" - builder.putnil - when "self" - builder.putself - when "true" - builder.putobject(true) - end - end - end - - def visit_vcall(node) - builder.putself - - flag = VM_CALL_FCALL | VM_CALL_VCALL | VM_CALL_ARGS_SIMPLE - builder.send(node.value.value.to_sym, 0, flag) - end - - def visit_when(node) - visit(node.statements) - end - - def visit_while(node) - jumps = [] - - jumps << builder.jump(-1) - builder.putnil - builder.pop - jumps << builder.jump(-1) - - label = builder.label - visit(node.statements) - builder.pop - jumps.each { |jump| jump[1] = builder.label } - - visit(node.predicate) - builder.branchif(label) - builder.putnil if last_statement? - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - builder.concatstrings(length) - end - end - - def visit_words(node) - converted = nil - - if frozen_string_literal - begin - converted = node.accept(RubyVisitor.new) - rescue RubyVisitor::CompilationError - end - end - - if converted - builder.duparray(converted) - else - visit_all(node.elements) - builder.newarray(node.elements.length) - end - end - - def visit_xstring_literal(node) - builder.putself - length = visit_string_parts(node) - builder.concatstrings(node.parts.length) if length > 1 - builder.send(:`, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) - end - - def visit_yield(node) - parts = argument_parts(node.arguments) - visit_all(parts) - builder.invokeblock(nil, parts.length, VM_CALL_ARGS_SIMPLE) - end - - def visit_zsuper(_node) - builder.putself - builder.invokesuper( - nil, - 0, - VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER | VM_CALL_ZSUPER, - nil - ) - end - - private - - # This is a helper that is used in places where arguments may be present - # or they may be wrapped in parentheses. It's meant to descend down the - # tree and return an array of argument nodes. - def argument_parts(node) - case node - when nil - [] - when Args - node.parts - when ArgParen - if node.arguments.is_a?(ArgsForward) - [node.arguments] - else - node.arguments.parts - end - when Paren - node.contents.parts - end - end - - # Constant names when they are being assigned or referenced come in as a - # tree, but it's more convenient to work with them as an array. This - # method converts them into that array. This is nice because it's the - # operand that goes to opt_getconstant_path in Ruby 3.2. - def constant_names(node) - current = node - names = [] - - while current.is_a?(ConstPathField) || current.is_a?(ConstPathRef) - names.unshift(current.constant.value.to_sym) - current = current.parent - end - - case current - when VarField, VarRef - names.unshift(current.value.value.to_sym) - when TopConstRef - names.unshift(current.constant.value.to_sym) - names.unshift(:"") - end - - names - end - - # For the most part when an OpAssign (operator assignment) node with a ||= - # operator is being compiled it's a matter of reading the target, checking - # if the value should be evaluated, evaluating it if so, and then writing - # the result back to the target. - # - # However, in certain kinds of assignments (X, ::X, X::Y, @@x, and $x) we - # first check if the value is defined using the defined instruction. I - # don't know why it is necessary, and suspect that it isn't. - def opassign_defined(node) - case node.target - when ConstPathField - visit(node.target.parent) - name = node.target.constant.value.to_sym - - builder.dup - builder.defined(DEFINED_CONST_FROM, name, true) - when TopConstField - name = node.target.constant.value.to_sym - - builder.putobject(Object) - builder.dup - builder.defined(DEFINED_CONST_FROM, name, true) - when VarField - name = node.target.value.value.to_sym - builder.putnil - - case node.target.value - when Const - builder.defined(DEFINED_CONST, name, true) - when CVar - builder.defined(DEFINED_CVAR, name, true) - when GVar - builder.defined(DEFINED_GVAR, name, true) - end - end - - branchunless = builder.branchunless(-1) - - case node.target - when ConstPathField, TopConstField - builder.dup - builder.putobject(true) - builder.getconstant(name) - when VarField - case node.target.value - when Const - builder.opt_getconstant_path(constant_names(node.target)) - when CVar - builder.getclassvariable(name) - when GVar - builder.getglobal(name) - end - end - - builder.dup - branchif = builder.branchif(-1) - builder.pop - - branchunless[1] = builder.label - visit(node.value) - - case node.target - when ConstPathField, TopConstField - builder.dupn(2) - builder.swap - builder.setconstant(name) - when VarField - builder.dup - - case node.target.value - when Const - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(name) - when CVar - builder.setclassvariable(name) - when GVar - builder.setglobal(name) - end - end - - branchif[1] = builder.label - end - - # Whenever a value is interpolated into a string-like structure, these - # three instructions are pushed. - def push_interpolate - builder.dup - builder.objtostring(:to_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) - builder.anytostring - end - - # There are a lot of nodes in the AST that act as contains of parts of - # strings. This includes things like string literals, regular expressions, - # heredocs, etc. This method will visit all the parts of a string within - # those containers. - def visit_string_parts(node) - length = 0 - - unless node.parts.first.is_a?(TStringContent) - builder.putobject("") - length += 1 - end - - node.parts.each do |part| - case part - when StringDVar - visit(part.variable) - push_interpolate - when StringEmbExpr - visit(part) - push_interpolate - when TStringContent - builder.putobject(part.accept(RubyVisitor.new)) - end - - length += 1 - end - - length - end - - # The current instruction sequence that we're compiling is always stored - # on the compiler. When we descend into a node that has its own - # instruction sequence, this method can be called to temporarily set the - # new value of the instruction sequence, yield, and then set it back. - def with_instruction_sequence(type, name, parent_iseq, node) - previous_iseq = current_iseq - previous_builder = builder - - begin - iseq = InstructionSequence.new(type, name, parent_iseq, node.location) - - @current_iseq = iseq - @builder = - Builder.new( - iseq, - frozen_string_literal: frozen_string_literal, - operands_unification: operands_unification, - specialized_instruction: specialized_instruction - ) - - yield - iseq - ensure - @current_iseq = previous_iseq - @builder = previous_builder - end - end - - # When we're compiling the last statement of a set of statements within a - # scope, the instructions sometimes change from pops to leaves. These - # kinds of peephole optimizations can reduce the overall number of - # instructions. Therefore, we keep track of whether we're compiling the - # last statement of a scope and allow visit methods to query that - # information. - def with_last_statement - previous = @last_statement - @last_statement = true - - begin - yield - ensure - @last_statement = previous - end - end - - def last_statement? - @last_statement - end - - # OpAssign nodes can have a number of different kinds of nodes as their - # "target" (i.e., the left-hand side of the assignment). When compiling - # these nodes we typically need to first fetch the current value of the - # variable, then perform some kind of action, then store the result back - # into the variable. This method handles that by first fetching the value, - # then yielding to the block, then storing the result. - def with_opassign(node) - case node.target - when ARefField - builder.putnil - visit(node.target.collection) - visit(node.target.index) - - builder.dupn(2) - builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) - - yield - - builder.setn(3) - builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) - builder.pop - when ConstPathField - name = node.target.constant.value.to_sym - - visit(node.target.parent) - builder.dup - builder.putobject(true) - builder.getconstant(name) - - yield - - if node.operator.value == "&&=" - builder.dupn(2) - else - builder.swap - builder.topn(1) - end - - builder.swap - builder.setconstant(name) - when TopConstField - name = node.target.constant.value.to_sym - - builder.putobject(Object) - builder.dup - builder.putobject(true) - builder.getconstant(name) - - yield - - if node.operator.value == "&&=" - builder.dupn(2) - else - builder.swap - builder.topn(1) - end - - builder.swap - builder.setconstant(name) - when VarField - case node.target.value - when Const - names = constant_names(node.target) - builder.opt_getconstant_path(names) - - yield - - builder.dup - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(names.last) - when CVar - name = node.target.value.value.to_sym - builder.getclassvariable(name) - - yield - - builder.dup - builder.setclassvariable(name) - when GVar - name = node.target.value.value.to_sym - builder.getglobal(name) - - yield - - builder.dup - builder.setglobal(name) - when Ident - local_variable = visit(node.target) - builder.getlocal(local_variable.index, local_variable.level) - - yield - - builder.dup - builder.setlocal(local_variable.index, local_variable.level) - when IVar - name = node.target.value.value.to_sym - builder.getinstancevariable(name) - - yield - - builder.dup - builder.setinstancevariable(name) - end - end - end - end - end -end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index cf0667bb..cdf2860e 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -2,17 +2,9 @@ return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.1" require_relative "test_helper" -require "fiddle" module SyntaxTree class CompilerTest < Minitest::Test - ISEQ_LOAD = - Fiddle::Function.new( - Fiddle::Handle::DEFAULT["rb_iseq_load"], - [Fiddle::TYPE_VOIDP] * 3, - Fiddle::TYPE_VOIDP - ) - CASES = [ # Various literals placed on the stack "true", @@ -457,7 +449,7 @@ def serialize_iseq(iseq) when Array insn.map do |operand| if operand.is_a?(Array) && - operand[0] == Visitor::Compiler::InstructionSequence::MAGIC + operand[0] == Compiler::InstructionSequence::MAGIC serialize_iseq(operand) else operand @@ -478,20 +470,13 @@ def assert_compiles(source, **options) assert_equal( serialize_iseq(RubyVM::InstructionSequence.compile(source, **options)), - serialize_iseq(program.accept(Visitor::Compiler.new(**options))) + serialize_iseq(program.accept(Compiler.new(**options))) ) end def assert_evaluates(expected, source, **options) program = SyntaxTree.parse(source) - compiled = program.accept(Visitor::Compiler.new(**options)).to_a - - # Temporary hack until we get these working. - compiled[4][:node_id] = 11 - compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] - - iseq = Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)) - assert_equal expected, iseq.eval + assert_equal expected, program.accept(Compiler.new(**options)).eval end end end From 8b836c73b7cc2c9327a7782008a301653b2848dd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 20:20:45 -0500 Subject: [PATCH 044/104] Split YARV out into its own file --- lib/syntax_tree.rb | 1 + lib/syntax_tree/compiler.rb | 1062 ++++------------------------------- lib/syntax_tree/yarv.rb | 838 +++++++++++++++++++++++++++ test/compiler_test.rb | 2 +- 4 files changed, 954 insertions(+), 949 deletions(-) create mode 100644 lib/syntax_tree/yarv.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index c62132e6..187ff74d 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -26,6 +26,7 @@ require_relative "syntax_tree/pattern" require_relative "syntax_tree/search" +require_relative "syntax_tree/yarv" require_relative "syntax_tree/compiler" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index d9b7e787..c936c9c1 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -185,839 +185,6 @@ def visit_unsupported(_node) end end - # This object is used to track the size of the stack at any given time. It - # is effectively a mini symbolic interpreter. It's necessary because when - # instruction sequences get serialized they include a :stack_max field on - # them. This field is used to determine how much stack space to allocate - # for the instruction sequence. - class Stack - attr_reader :current_size, :maximum_size - - def initialize - @current_size = 0 - @maximum_size = 0 - end - - def change_by(value) - @current_size += value - @maximum_size = @current_size if @current_size > @maximum_size - end - end - - # This represents every local variable associated with an instruction - # sequence. There are two kinds of locals: plain locals that are what you - # expect, and block proxy locals, which represent local variables - # associated with blocks that were passed into the current instruction - # sequence. - class LocalTable - # A local representing a block passed into the current instruction - # sequence. - class BlockLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # A regular local variable. - class PlainLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # The result of looking up a local variable in the current local table. - class Lookup - attr_reader :local, :index, :level - - def initialize(local, index, level) - @local = local - @index = index - @level = level - end - end - - attr_reader :locals - - def initialize - @locals = [] - end - - def find(name, level) - index = locals.index { |local| local.name == name } - Lookup.new(locals[index], index, level) if index - end - - def has?(name) - locals.any? { |local| local.name == name } - end - - def names - locals.map(&:name) - end - - def size - locals.length - end - - # Add a BlockLocal to the local table. - def block(name) - locals << BlockLocal.new(name) unless has?(name) - end - - # Add a PlainLocal to the local table. - def plain(name) - locals << PlainLocal.new(name) unless has?(name) - end - - # This is the offset from the top of the stack where this local variable - # lives. - def offset(index) - size - (index - 3) - 1 - end - end - - # This class is meant to mirror RubyVM::InstructionSequence. It contains a - # list of instructions along with the metadata pertaining to them. It also - # functions as a builder for the instruction sequence. - class InstructionSequence - MAGIC = "YARVInstructionSequence/SimpleDataFormat" - - # This provides a handle to the rb_iseq_load function, which allows you to - # pass a serialized iseq to Ruby and have it return a - # RubyVM::InstructionSequence object. - ISEQ_LOAD = - Fiddle::Function.new( - Fiddle::Handle::DEFAULT["rb_iseq_load"], - [Fiddle::TYPE_VOIDP] * 3, - Fiddle::TYPE_VOIDP - ) - - # The type of the instruction sequence. - attr_reader :type - - # The name of the instruction sequence. - attr_reader :name - - # The parent instruction sequence, if there is one. - attr_reader :parent_iseq - - # The location of the root node of this instruction sequence. - attr_reader :location - - # This is the list of information about the arguments to this - # instruction sequence. - attr_accessor :argument_size - attr_reader :argument_options - - # The list of instructions for this instruction sequence. - attr_reader :insns - - # The table of local variables. - attr_reader :local_table - - # The hash of names of instance and class variables pointing to the - # index of their associated inline storage. - attr_reader :inline_storages - - # The index of the next inline storage that will be created. - attr_reader :storage_index - - # An object that will track the current size of the stack and the - # maximum size of the stack for this instruction sequence. - attr_reader :stack - - def initialize(type, name, parent_iseq, location) - @type = type - @name = name - @parent_iseq = parent_iseq - @location = location - - @argument_size = 0 - @argument_options = {} - - @local_table = LocalTable.new - @inline_storages = {} - @insns = [] - @storage_index = 0 - @stack = Stack.new - end - - def local_variable(name, level = 0) - if (lookup = local_table.find(name, level)) - lookup - elsif parent_iseq - parent_iseq.local_variable(name, level + 1) - end - end - - def push(insn) - insns << insn - insn - end - - def inline_storage - storage = storage_index - @storage_index += 1 - storage - end - - def inline_storage_for(name) - unless inline_storages.key?(name) - inline_storages[name] = inline_storage - end - - inline_storages[name] - end - - def length - insns.inject(0) do |sum, insn| - insn.is_a?(Array) ? sum + insn.length : sum - end - end - - def each_child - insns.each do |insn| - insn[1..].each do |operand| - yield operand if operand.is_a?(InstructionSequence) - end - end - end - - def eval - compiled = to_a - - # Temporary hack until we get these working. - compiled[4][:node_id] = 11 - compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] - - Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval - end - - def to_a - versions = RUBY_VERSION.split(".").map(&:to_i) - - [ - MAGIC, - versions[0], - versions[1], - 1, - { - arg_size: argument_size, - local_size: local_table.size, - stack_max: stack.maximum_size - }, - name, - "", - "", - location.start_line, - type, - local_table.names, - argument_options, - [], - insns.map { |insn| serialize(insn) } - ] - end - - private - - def serialize(insn) - case insn[0] - when :checkkeyword, :getblockparam, :getblockparamproxy, - :getlocal_WC_0, :getlocal_WC_1, :getlocal, :setlocal_WC_0, - :setlocal_WC_1, :setlocal - iseq = self - - case insn[0] - when :getlocal_WC_1, :setlocal_WC_1 - iseq = iseq.parent_iseq - when :getblockparam, :getblockparamproxy, :getlocal, :setlocal - insn[2].times { iseq = iseq.parent_iseq } - end - - # Here we need to map the local variable index to the offset - # from the top of the stack where it will be stored. - [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] - when :defineclass - [insn[0], insn[1], insn[2].to_a, insn[3]] - when :definemethod, :definesmethod - [insn[0], insn[1], insn[2].to_a] - when :send - # For any instructions that push instruction sequences onto the - # stack, we need to call #to_a on them as well. - [insn[0], insn[1], (insn[2].to_a if insn[2])] - when :once - [insn[0], insn[1].to_a, insn[2]] - else - insn - end - end - end - - # This class serves as a layer of indirection between the instruction - # sequence and the compiler. It allows us to provide different behavior - # for certain instructions depending on the Ruby version. For example, - # class variable reads and writes gained an inline cache in Ruby 3.0. So - # we place the logic for checking the Ruby version in this class. - class Builder - attr_reader :iseq, :stack - attr_reader :frozen_string_literal, - :operands_unification, - :specialized_instruction - - def initialize( - iseq, - frozen_string_literal: false, - operands_unification: true, - specialized_instruction: true - ) - @iseq = iseq - @stack = iseq.stack - - @frozen_string_literal = frozen_string_literal - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction - end - - # This creates a new label at the current length of the instruction - # sequence. It is used as the operand for jump instructions. - def label - name = :"label_#{iseq.length}" - iseq.insns.last == name ? name : event(name) - end - - def event(name) - iseq.push(name) - name - end - - def adjuststack(number) - stack.change_by(-number) - iseq.push([:adjuststack, number]) - end - - def anytostring - stack.change_by(-2 + 1) - iseq.push([:anytostring]) - end - - def branchif(index) - stack.change_by(-1) - iseq.push([:branchif, index]) - end - - def branchnil(index) - stack.change_by(-1) - iseq.push([:branchnil, index]) - end - - def branchunless(index) - stack.change_by(-1) - iseq.push([:branchunless, index]) - end - - def checkkeyword(index, keyword_index) - stack.change_by(+1) - iseq.push([:checkkeyword, index, keyword_index]) - end - - def concatarray - stack.change_by(-2 + 1) - iseq.push([:concatarray]) - end - - def concatstrings(number) - stack.change_by(-number + 1) - iseq.push([:concatstrings, number]) - end - - def defined(type, name, message) - stack.change_by(-1 + 1) - iseq.push([:defined, type, name, message]) - end - - def defineclass(name, class_iseq, flags) - stack.change_by(-2 + 1) - iseq.push([:defineclass, name, class_iseq, flags]) - end - - def definemethod(name, method_iseq) - stack.change_by(0) - iseq.push([:definemethod, name, method_iseq]) - end - - def definesmethod(name, method_iseq) - stack.change_by(-1) - iseq.push([:definesmethod, name, method_iseq]) - end - - def dup - stack.change_by(-1 + 2) - iseq.push([:dup]) - end - - def duparray(object) - stack.change_by(+1) - iseq.push([:duparray, object]) - end - - def duphash(object) - stack.change_by(+1) - iseq.push([:duphash, object]) - end - - def dupn(number) - stack.change_by(+number) - iseq.push([:dupn, number]) - end - - def expandarray(length, flag) - stack.change_by(-1 + length) - iseq.push([:expandarray, length, flag]) - end - - def getblockparam(index, level) - stack.change_by(+1) - iseq.push([:getblockparam, index, level]) - end - - def getblockparamproxy(index, level) - stack.change_by(+1) - iseq.push([:getblockparamproxy, index, level]) - end - - def getclassvariable(name) - stack.change_by(+1) - - if RUBY_VERSION >= "3.0" - iseq.push([:getclassvariable, name, iseq.inline_storage_for(name)]) - else - iseq.push([:getclassvariable, name]) - end - end - - def getconstant(name) - stack.change_by(-2 + 1) - iseq.push([:getconstant, name]) - end - - def getglobal(name) - stack.change_by(+1) - iseq.push([:getglobal, name]) - end - - def getinstancevariable(name) - stack.change_by(+1) - - if RUBY_VERSION >= "3.2" - iseq.push([:getinstancevariable, name, iseq.inline_storage]) - else - inline_storage = iseq.inline_storage_for(name) - iseq.push([:getinstancevariable, name, inline_storage]) - end - end - - def getlocal(index, level) - stack.change_by(+1) - - if operands_unification - # Specialize the getlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will look at the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - iseq.push([:getlocal_WC_0, index]) - when 1 - iseq.push([:getlocal_WC_1, index]) - else - iseq.push([:getlocal, index, level]) - end - else - iseq.push([:getlocal, index, level]) - end - end - - def getspecial(key, type) - stack.change_by(-0 + 1) - iseq.push([:getspecial, key, type]) - end - - def intern - stack.change_by(-1 + 1) - iseq.push([:intern]) - end - - def invokeblock(method_id, argc, flag) - stack.change_by(-argc + 1) - iseq.push([:invokeblock, call_data(method_id, argc, flag)]) - end - - def invokesuper(method_id, argc, flag, block_iseq) - stack.change_by(-(argc + 1) + 1) - - cdata = call_data(method_id, argc, flag) - iseq.push([:invokesuper, cdata, block_iseq]) - end - - def jump(index) - stack.change_by(0) - iseq.push([:jump, index]) - end - - def leave - stack.change_by(-1) - iseq.push([:leave]) - end - - def newarray(length) - stack.change_by(-length + 1) - iseq.push([:newarray, length]) - end - - def newhash(length) - stack.change_by(-length + 1) - iseq.push([:newhash, length]) - end - - def newrange(flag) - stack.change_by(-2 + 1) - iseq.push([:newrange, flag]) - end - - def nop - stack.change_by(0) - iseq.push([:nop]) - end - - def objtostring(method_id, argc, flag) - stack.change_by(-1 + 1) - iseq.push([:objtostring, call_data(method_id, argc, flag)]) - end - - def once(postexe_iseq, inline_storage) - stack.change_by(+1) - iseq.push([:once, postexe_iseq, inline_storage]) - end - - def opt_getconstant_path(names) - if RUBY_VERSION >= "3.2" - stack.change_by(+1) - iseq.push([:opt_getconstant_path, names]) - else - inline_storage = iseq.inline_storage - getinlinecache = opt_getinlinecache(-1, inline_storage) - - if names[0] == :"" - names.shift - pop - putobject(Object) - end - - names.each_with_index do |name, index| - putobject(index == 0) - getconstant(name) - end - - opt_setinlinecache(inline_storage) - getinlinecache[1] = label - end - end - - def opt_getinlinecache(offset, inline_storage) - stack.change_by(+1) - iseq.push([:opt_getinlinecache, offset, inline_storage]) - end - - def opt_newarray_max(length) - if specialized_instruction - stack.change_by(-length + 1) - iseq.push([:opt_newarray_max, length]) - else - newarray(length) - send(:max, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_newarray_min(length) - if specialized_instruction - stack.change_by(-length + 1) - iseq.push([:opt_newarray_min, length]) - else - newarray(length) - send(:min, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_setinlinecache(inline_storage) - stack.change_by(-1 + 1) - iseq.push([:opt_setinlinecache, inline_storage]) - end - - def opt_str_freeze(value) - if specialized_instruction - stack.change_by(+1) - iseq.push( - [ - :opt_str_freeze, - value, - call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE) - ] - ) - else - putstring(value) - send(:freeze, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def opt_str_uminus(value) - if specialized_instruction - stack.change_by(+1) - iseq.push( - [:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)] - ) - else - putstring(value) - send(:-@, 0, VM_CALL_ARGS_SIMPLE) - end - end - - def pop - stack.change_by(-1) - iseq.push([:pop]) - end - - def putnil - stack.change_by(+1) - iseq.push([:putnil]) - end - - def putobject(object) - stack.change_by(+1) - - if operands_unification - # Specialize the putobject instruction based on the value of the - # object. If it's 0 or 1, then there's a specialized instruction - # that will push the object onto the stack and requires fewer - # operands. - if object.eql?(0) - iseq.push([:putobject_INT2FIX_0_]) - elsif object.eql?(1) - iseq.push([:putobject_INT2FIX_1_]) - else - iseq.push([:putobject, object]) - end - else - iseq.push([:putobject, object]) - end - end - - def putself - stack.change_by(+1) - iseq.push([:putself]) - end - - def putspecialobject(object) - stack.change_by(+1) - iseq.push([:putspecialobject, object]) - end - - def putstring(object) - stack.change_by(+1) - iseq.push([:putstring, object]) - end - - def send(method_id, argc, flag, block_iseq = nil) - stack.change_by(-(argc + 1) + 1) - cdata = call_data(method_id, argc, flag) - - if specialized_instruction - # Specialize the send instruction. If it doesn't have a block - # attached, then we will replace it with an opt_send_without_block - # and do further specializations based on the called method and the - # number of arguments. - - # stree-ignore - if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 - case [method_id, argc] - when [:length, 0] then iseq.push([:opt_length, cdata]) - when [:size, 0] then iseq.push([:opt_size, cdata]) - when [:empty?, 0] then iseq.push([:opt_empty_p, cdata]) - when [:nil?, 0] then iseq.push([:opt_nil_p, cdata]) - when [:succ, 0] then iseq.push([:opt_succ, cdata]) - when [:!, 0] then iseq.push([:opt_not, cdata]) - when [:+, 1] then iseq.push([:opt_plus, cdata]) - when [:-, 1] then iseq.push([:opt_minus, cdata]) - when [:*, 1] then iseq.push([:opt_mult, cdata]) - when [:/, 1] then iseq.push([:opt_div, cdata]) - when [:%, 1] then iseq.push([:opt_mod, cdata]) - when [:==, 1] then iseq.push([:opt_eq, cdata]) - when [:=~, 1] then iseq.push([:opt_regexpmatch2, cdata]) - when [:<, 1] then iseq.push([:opt_lt, cdata]) - when [:<=, 1] then iseq.push([:opt_le, cdata]) - when [:>, 1] then iseq.push([:opt_gt, cdata]) - when [:>=, 1] then iseq.push([:opt_ge, cdata]) - when [:<<, 1] then iseq.push([:opt_ltlt, cdata]) - when [:[], 1] then iseq.push([:opt_aref, cdata]) - when [:&, 1] then iseq.push([:opt_and, cdata]) - when [:|, 1] then iseq.push([:opt_or, cdata]) - when [:[]=, 2] then iseq.push([:opt_aset, cdata]) - when [:!=, 1] - eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) - iseq.push([:opt_neq, eql_data, cdata]) - else - iseq.push([:opt_send_without_block, cdata]) - end - else - iseq.push([:send, cdata, block_iseq]) - end - else - iseq.push([:send, cdata, block_iseq]) - end - end - - def setclassvariable(name) - stack.change_by(-1) - - if RUBY_VERSION >= "3.0" - iseq.push([:setclassvariable, name, iseq.inline_storage_for(name)]) - else - iseq.push([:setclassvariable, name]) - end - end - - def setconstant(name) - stack.change_by(-2) - iseq.push([:setconstant, name]) - end - - def setglobal(name) - stack.change_by(-1) - iseq.push([:setglobal, name]) - end - - def setinstancevariable(name) - stack.change_by(-1) - - if RUBY_VERSION >= "3.2" - iseq.push([:setinstancevariable, name, iseq.inline_storage]) - else - inline_storage = iseq.inline_storage_for(name) - iseq.push([:setinstancevariable, name, inline_storage]) - end - end - - def setlocal(index, level) - stack.change_by(-1) - - if operands_unification - # Specialize the setlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will write to the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - iseq.push([:setlocal_WC_0, index]) - when 1 - iseq.push([:setlocal_WC_1, index]) - else - iseq.push([:setlocal, index, level]) - end - else - iseq.push([:setlocal, index, level]) - end - end - - def setn(number) - stack.change_by(-1 + 1) - iseq.push([:setn, number]) - end - - def splatarray(flag) - stack.change_by(-1 + 1) - iseq.push([:splatarray, flag]) - end - - def swap - stack.change_by(-2 + 2) - iseq.push([:swap]) - end - - def topn(number) - stack.change_by(+1) - iseq.push([:topn, number]) - end - - def toregexp(options, length) - stack.change_by(-length + 1) - iseq.push([:toregexp, options, length]) - end - - private - - # This creates a call data object that is used as the operand for the - # send, invokesuper, and objtostring instructions. - def call_data(method_id, argc, flag) - { mid: method_id, flag: flag, orig_argc: argc } - end - end - - # These constants correspond to the putspecialobject instruction. They are - # used to represent special objects that are pushed onto the stack. - VM_SPECIAL_OBJECT_VMCORE = 1 - VM_SPECIAL_OBJECT_CBASE = 2 - VM_SPECIAL_OBJECT_CONST_BASE = 3 - - # These constants correspond to the flag passed as part of the call data - # structure on the send instruction. They are used to represent various - # metadata about the callsite (e.g., were keyword arguments used?, was a - # block given?, etc.). - VM_CALL_ARGS_SPLAT = 1 << 0 - VM_CALL_ARGS_BLOCKARG = 1 << 1 - VM_CALL_FCALL = 1 << 2 - VM_CALL_VCALL = 1 << 3 - VM_CALL_ARGS_SIMPLE = 1 << 4 - VM_CALL_BLOCKISEQ = 1 << 5 - VM_CALL_KWARG = 1 << 6 - VM_CALL_KW_SPLAT = 1 << 7 - VM_CALL_TAILCALL = 1 << 8 - VM_CALL_SUPER = 1 << 9 - VM_CALL_ZSUPER = 1 << 10 - VM_CALL_OPT_SEND = 1 << 11 - VM_CALL_KW_SPLAT_MUT = 1 << 12 - - # These constants correspond to the value passed as part of the defined - # instruction. It's an enum defined in the CRuby codebase that tells that - # instruction what kind of defined check to perform. - DEFINED_NIL = 1 - DEFINED_IVAR = 2 - DEFINED_LVAR = 3 - DEFINED_GVAR = 4 - DEFINED_CVAR = 5 - DEFINED_CONST = 6 - DEFINED_METHOD = 7 - DEFINED_YIELD = 8 - DEFINED_ZSUPER = 9 - DEFINED_SELF = 10 - DEFINED_TRUE = 11 - DEFINED_FALSE = 12 - DEFINED_ASGN = 13 - DEFINED_EXPR = 14 - DEFINED_REF = 15 - DEFINED_FUNC = 16 - DEFINED_CONST_FROM = 17 - - # These constants correspond to the value passed in the flags as part of - # the defineclass instruction. - VM_DEFINECLASS_TYPE_CLASS = 0 - VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 1 - VM_DEFINECLASS_TYPE_MODULE = 2 - VM_DEFINECLASS_FLAG_SCOPED = 8 - VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 16 - # These options mirror the compilation options that we currently support # that can be also passed to RubyVM::InstructionSequence.compile. attr_reader :frozen_string_literal, @@ -1074,8 +241,8 @@ def visit_END(node) builder.leave end - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.send(:"core#set_postexe", 0, VM_CALL_FCALL, postexe_iseq) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + builder.send(:"core#set_postexe", 0, YARV::VM_CALL_FCALL, postexe_iseq) builder.leave end @@ -1084,17 +251,17 @@ def visit_END(node) end def visit_alias(node) - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) visit(node.left) visit(node.right) - builder.send(:"core#set_method_alias", 3, VM_CALL_ARGS_SIMPLE) + builder.send(:"core#set_method_alias", 3, YARV::VM_CALL_ARGS_SIMPLE) end def visit_aref(node) visit(node.collection) visit(node.index) - builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) + builder.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) end def visit_arg_block(node) @@ -1150,7 +317,7 @@ def visit_assign(node) visit(node.target.index) visit(node.value) builder.setn(3) - builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + builder.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) builder.pop when ConstPathField names = constant_names(node.target) @@ -1174,7 +341,7 @@ def visit_assign(node) visit(node.target) visit(node.value) builder.setn(2) - builder.send(:"#{node.target.name.value}=", 1, VM_CALL_ARGS_SIMPLE) + builder.send(:"#{node.target.name.value}=", 1, YARV::VM_CALL_ARGS_SIMPLE) builder.pop when TopConstField name = node.target.constant.value.to_sym @@ -1198,7 +365,7 @@ def visit_assign(node) case node.target.value when Const - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) builder.setconstant(node.target.value.value.to_sym) when CVar builder.setclassvariable(node.target.value.value.to_sym) @@ -1257,7 +424,7 @@ def visit_binary(node) else visit(node.left) visit(node.right) - builder.send(node.operator, 1, VM_CALL_ARGS_SIMPLE) + builder.send(node.operator, 1, YARV::VM_CALL_ARGS_SIMPLE) end end @@ -1357,12 +524,14 @@ def visit_call(node) end if node.receiver - if node.receiver.is_a?(VarRef) && - ( - lookup = - current_iseq.local_variable(node.receiver.value.value.to_sym) - ) && lookup.local.is_a?(LocalTable::BlockLocal) - builder.getblockparamproxy(lookup.index, lookup.level) + if node.receiver.is_a?(VarRef) + lookup = current_iseq.local_variable(node.receiver.value.value.to_sym) + + if lookup.local.is_a?(YARV::LocalTable::BlockLocal) + builder.getblockparamproxy(lookup.index, lookup.level) + else + visit(node.receiver) + end else visit(node.receiver) end @@ -1382,13 +551,13 @@ def visit_call(node) case arg_part when ArgBlock argc -= 1 - flag |= VM_CALL_ARGS_BLOCKARG + flag |= YARV::VM_CALL_ARGS_BLOCKARG visit(arg_part) when ArgStar - flag |= VM_CALL_ARGS_SPLAT + flag |= YARV::VM_CALL_ARGS_SPLAT visit(arg_part) when ArgsForward - flag |= VM_CALL_ARGS_SPLAT | VM_CALL_ARGS_BLOCKARG + flag |= YARV::VM_CALL_ARGS_SPLAT | YARV::VM_CALL_ARGS_BLOCKARG lookup = current_iseq.local_table.find(:*, 0) builder.getlocal(lookup.index, lookup.level) @@ -1397,7 +566,7 @@ def visit_call(node) lookup = current_iseq.local_table.find(:&, 0) builder.getblockparamproxy(lookup.index, lookup.level) when BareAssocHash - flag |= VM_CALL_KW_SPLAT + flag |= YARV::VM_CALL_KW_SPLAT visit(arg_part) else visit(arg_part) @@ -1405,8 +574,8 @@ def visit_call(node) end block_iseq = visit(node.block) if node.block - flag |= VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 - flag |= VM_CALL_FCALL if node.receiver.nil? + flag |= YARV::VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 + flag |= YARV::VM_CALL_FCALL if node.receiver.nil? builder.send(node.message.value.to_sym, argc, flag, block_iseq) branchnil[1] = builder.label if branchnil @@ -1433,7 +602,7 @@ def visit_case(node) clauses.map do |clause| visit(clause.arguments) builder.topn(1) - builder.send(:===, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + builder.send(:===, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) [clause, builder.branchif(:label_00)] end @@ -1466,21 +635,21 @@ def visit_class(node) builder.leave end - flags = VM_DEFINECLASS_TYPE_CLASS + flags = YARV::VM_DEFINECLASS_TYPE_CLASS case node.constant when ConstPathRef - flags |= VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED visit(node.constant.parent) when ConstRef - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) when TopConstRef - flags |= VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED builder.putobject(Object) end if node.superclass - flags |= VM_DEFINECLASS_FLAG_HAS_SUPERCLASS + flags |= YARV::VM_DEFINECLASS_FLAG_HAS_SUPERCLASS visit(node.superclass) else builder.putnil @@ -1569,18 +738,18 @@ def visit_defined(node) case value when Const builder.putnil - builder.defined(DEFINED_CONST, name, "constant") + builder.defined(YARV::DEFINED_CONST, name, "constant") when CVar builder.putnil - builder.defined(DEFINED_CVAR, name, "class variable") + builder.defined(YARV::DEFINED_CVAR, name, "class variable") when GVar builder.putnil - builder.defined(DEFINED_GVAR, name, "global-variable") + builder.defined(YARV::DEFINED_GVAR, name, "global-variable") when Ident builder.putobject("local-variable") when IVar builder.putnil - builder.defined(DEFINED_IVAR, name, "instance-variable") + builder.defined(YARV::DEFINED_IVAR, name, "instance-variable") when Kw case name when :false @@ -1597,13 +766,13 @@ def visit_defined(node) builder.putself name = node.value.value.value.to_sym - builder.defined(DEFINED_FUNC, name, "method") + builder.defined(YARV::DEFINED_FUNC, name, "method") when YieldNode builder.putnil - builder.defined(DEFINED_YIELD, false, "yield") + builder.defined(YARV::DEFINED_YIELD, false, "yield") when ZSuper builder.putnil - builder.defined(DEFINED_ZSUPER, false, "super") + builder.defined(YARV::DEFINED_ZSUPER, false, "super") else builder.putobject("expression") end @@ -1676,10 +845,12 @@ def visit_for(node) end def visit_hash(node) - builder.duphash(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - visit_all(node.assocs) - builder.newhash(node.assocs.length * 2) + if (compiled = RubyVisitor.compile(node)) + builder.duphash(compiled) + else + visit_all(node.assocs) + builder.newhash(node.assocs.length * 2) + end end def visit_heredoc(node) @@ -1766,8 +937,8 @@ def visit_lambda(node) builder.leave end - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.send(:lambda, 0, VM_CALL_FCALL, lambda_iseq) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + builder.send(:lambda, 0, YARV::VM_CALL_FCALL, lambda_iseq) end def visit_lambda_var(node) @@ -1823,16 +994,16 @@ def visit_module(node) builder.leave end - flags = VM_DEFINECLASS_TYPE_MODULE + flags = YARV::VM_DEFINECLASS_TYPE_MODULE case node.constant when ConstPathRef - flags |= VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED visit(node.constant.parent) when ConstRef - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) when TopConstRef - flags |= VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED builder.putobject(Object) end @@ -1851,13 +1022,13 @@ def visit_mrhs(node) def visit_not(node) visit(node.statement) - builder.send(:!, 0, VM_CALL_ARGS_SIMPLE) + builder.send(:!, 0, YARV::VM_CALL_ARGS_SIMPLE) end def visit_opassign(node) - flag = VM_CALL_ARGS_SIMPLE + flag = YARV::VM_CALL_ARGS_SIMPLE if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) - flag |= VM_CALL_FCALL + flag |= YARV::VM_CALL_FCALL end case (operator = node.operator.value.chomp("=").to_sym) @@ -1977,18 +1148,16 @@ def visit_params(node) if value.nil? argument_options[:keyword] << name + elsif (compiled = RubyVisitor.compile(value)) + compiled = value.accept(RubyVisitor.new) + argument_options[:keyword] << [name, compiled] else - begin - compiled = value.accept(RubyVisitor.new) - argument_options[:keyword] << [name, compiled] - rescue RubyVisitor::CompilationError - argument_options[:keyword] << [name] - checkkeywords << builder.checkkeyword(-1, keyword_index) - branchif = builder.branchif(-1) - visit(value) - builder.setlocal(index, 0) - branchif[1] = builder.label - end + argument_options[:keyword] << [name] + checkkeywords << builder.checkkeyword(-1, keyword_index) + branchif = builder.branchif(-1) + visit(value) + builder.setlocal(index, 0) + branchif[1] = builder.label end end @@ -2075,11 +1244,13 @@ def visit_qwords(node) end def visit_range(node) - builder.putobject(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - visit(node.left) - visit(node.right) - builder.newrange(node.operator.value == ".." ? 0 : 1) + if (compiled = RubyVisitor.compile(node)) + builder.putobject(compiled) + else + visit(node.left) + visit(node.right) + builder.newrange(node.operator.value == ".." ? 0 : 1) + end end def visit_rational(node) @@ -2087,11 +1258,13 @@ def visit_rational(node) end def visit_regexp_literal(node) - builder.putobject(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - flags = RubyVisitor.new.visit_regexp_literal_flags(node) - length = visit_string_parts(node) - builder.toregexp(flags, length) + if (compiled = RubyVisitor.compile(node)) + builder.putobject(compiled) + else + flags = RubyVisitor.new.visit_regexp_literal_flags(node) + length = visit_string_parts(node) + builder.toregexp(flags, length) + end end def visit_rest_param(node) @@ -2120,7 +1293,7 @@ def visit_sclass(node) builder.defineclass( :singletonclass, singleton_iseq, - VM_DEFINECLASS_TYPE_SINGLETON_CLASS + YARV::VM_DEFINECLASS_TYPE_SINGLETON_CLASS ) end @@ -2170,7 +1343,7 @@ def visit_super(node) builder.invokesuper( nil, argument_parts(node.arguments).length, - VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER, + YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE | YARV::VM_CALL_SUPER, nil ) end @@ -2180,20 +1353,22 @@ def visit_symbol_literal(node) end def visit_symbols(node) - builder.duparray(node.accept(RubyVisitor.new)) - rescue RubyVisitor::CompilationError - node.elements.each do |element| - if element.parts.length == 1 && - element.parts.first.is_a?(TStringContent) - builder.putobject(element.parts.first.value.to_sym) - else - length = visit_string_parts(element) - builder.concatstrings(length) - builder.intern + if (compiled = RubyVisitor.compile(node)) + builder.duparray(compiled) + else + node.elements.each do |element| + if element.parts.length == 1 && + element.parts.first.is_a?(TStringContent) + builder.putobject(element.parts.first.value.to_sym) + else + length = visit_string_parts(element) + builder.concatstrings(length) + builder.intern + end end - end - builder.newarray(node.elements.length) + builder.newarray(node.elements.length) + end end def visit_top_const_ref(node) @@ -2232,10 +1407,10 @@ def visit_unary(node) def visit_undef(node) node.symbols.each_with_index do |symbol, index| builder.pop if index != 0 - builder.putspecialobject(VM_SPECIAL_OBJECT_VMCORE) - builder.putspecialobject(VM_SPECIAL_OBJECT_CBASE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) visit(symbol) - builder.send(:"core#undef_method", 2, VM_CALL_ARGS_SIMPLE) + builder.send(:"core#undef_method", 2, YARV::VM_CALL_ARGS_SIMPLE) end end @@ -2311,9 +1486,9 @@ def visit_var_ref(node) lookup = current_iseq.local_variable(node.value.value.to_sym) case lookup.local - when LocalTable::BlockLocal + when YARV::LocalTable::BlockLocal builder.getblockparam(lookup.index, lookup.level) - when LocalTable::PlainLocal + when YARV::LocalTable::PlainLocal builder.getlocal(lookup.index, lookup.level) end when IVar @@ -2336,7 +1511,7 @@ def visit_var_ref(node) def visit_vcall(node) builder.putself - flag = VM_CALL_FCALL | VM_CALL_VCALL | VM_CALL_ARGS_SIMPLE + flag = YARV::VM_CALL_FCALL | YARV::VM_CALL_VCALL | YARV::VM_CALL_ARGS_SIMPLE builder.send(node.value.value.to_sym, 0, flag) end @@ -2372,17 +1547,8 @@ def visit_word(node) end def visit_words(node) - converted = nil - - if frozen_string_literal - begin - converted = node.accept(RubyVisitor.new) - rescue RubyVisitor::CompilationError - end - end - - if converted - builder.duparray(converted) + if frozen_string_literal && (compiled = RubyVisitor.compile(node)) + builder.duparray(compiled) else visit_all(node.elements) builder.newarray(node.elements.length) @@ -2393,13 +1559,13 @@ def visit_xstring_literal(node) builder.putself length = visit_string_parts(node) builder.concatstrings(node.parts.length) if length > 1 - builder.send(:`, 1, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + builder.send(:`, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) end def visit_yield(node) parts = argument_parts(node.arguments) visit_all(parts) - builder.invokeblock(nil, parts.length, VM_CALL_ARGS_SIMPLE) + builder.invokeblock(nil, parts.length, YARV::VM_CALL_ARGS_SIMPLE) end def visit_zsuper(_node) @@ -2407,7 +1573,7 @@ def visit_zsuper(_node) builder.invokesuper( nil, 0, - VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE | VM_CALL_SUPER | VM_CALL_ZSUPER, + YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE | YARV::VM_CALL_SUPER | YARV::VM_CALL_ZSUPER, nil ) end @@ -2473,24 +1639,24 @@ def opassign_defined(node) name = node.target.constant.value.to_sym builder.dup - builder.defined(DEFINED_CONST_FROM, name, true) + builder.defined(YARV::DEFINED_CONST_FROM, name, true) when TopConstField name = node.target.constant.value.to_sym builder.putobject(Object) builder.dup - builder.defined(DEFINED_CONST_FROM, name, true) + builder.defined(YARV::DEFINED_CONST_FROM, name, true) when VarField name = node.target.value.value.to_sym builder.putnil case node.target.value when Const - builder.defined(DEFINED_CONST, name, true) + builder.defined(YARV::DEFINED_CONST, name, true) when CVar - builder.defined(DEFINED_CVAR, name, true) + builder.defined(YARV::DEFINED_CVAR, name, true) when GVar - builder.defined(DEFINED_GVAR, name, true) + builder.defined(YARV::DEFINED_GVAR, name, true) end end @@ -2529,7 +1695,7 @@ def opassign_defined(node) case node.target.value when Const - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) builder.setconstant(name) when CVar builder.setclassvariable(name) @@ -2545,7 +1711,7 @@ def opassign_defined(node) # three instructions are pushed. def push_interpolate builder.dup - builder.objtostring(:to_s, 0, VM_CALL_FCALL | VM_CALL_ARGS_SIMPLE) + builder.objtostring(:to_s, 0, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) builder.anytostring end @@ -2588,11 +1754,11 @@ def with_instruction_sequence(type, name, parent_iseq, node) previous_builder = builder begin - iseq = InstructionSequence.new(type, name, parent_iseq, node.location) + iseq = YARV::InstructionSequence.new(type, name, parent_iseq, node.location) @current_iseq = iseq @builder = - Builder.new( + YARV::Builder.new( iseq, frozen_string_literal: frozen_string_literal, operands_unification: operands_unification, @@ -2642,12 +1808,12 @@ def with_opassign(node) visit(node.target.index) builder.dupn(2) - builder.send(:[], 1, VM_CALL_ARGS_SIMPLE) + builder.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) yield builder.setn(3) - builder.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + builder.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) builder.pop when ConstPathField name = node.target.constant.value.to_sym @@ -2696,7 +1862,7 @@ def with_opassign(node) yield builder.dup - builder.putspecialobject(VM_SPECIAL_OBJECT_CONST_BASE) + builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) builder.setconstant(names.last) when CVar name = node.target.value.value.to_sym diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb new file mode 100644 index 00000000..42faa66b --- /dev/null +++ b/lib/syntax_tree/yarv.rb @@ -0,0 +1,838 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This object is used to track the size of the stack at any given time. It + # is effectively a mini symbolic interpreter. It's necessary because when + # instruction sequences get serialized they include a :stack_max field on + # them. This field is used to determine how much stack space to allocate + # for the instruction sequence. + class Stack + attr_reader :current_size, :maximum_size + + def initialize + @current_size = 0 + @maximum_size = 0 + end + + def change_by(value) + @current_size += value + @maximum_size = @current_size if @current_size > @maximum_size + end + end + + # This represents every local variable associated with an instruction + # sequence. There are two kinds of locals: plain locals that are what you + # expect, and block proxy locals, which represent local variables + # associated with blocks that were passed into the current instruction + # sequence. + class LocalTable + # A local representing a block passed into the current instruction + # sequence. + class BlockLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # A regular local variable. + class PlainLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # The result of looking up a local variable in the current local table. + class Lookup + attr_reader :local, :index, :level + + def initialize(local, index, level) + @local = local + @index = index + @level = level + end + end + + attr_reader :locals + + def initialize + @locals = [] + end + + def find(name, level) + index = locals.index { |local| local.name == name } + Lookup.new(locals[index], index, level) if index + end + + def has?(name) + locals.any? { |local| local.name == name } + end + + def names + locals.map(&:name) + end + + def size + locals.length + end + + # Add a BlockLocal to the local table. + def block(name) + locals << BlockLocal.new(name) unless has?(name) + end + + # Add a PlainLocal to the local table. + def plain(name) + locals << PlainLocal.new(name) unless has?(name) + end + + # This is the offset from the top of the stack where this local variable + # lives. + def offset(index) + size - (index - 3) - 1 + end + end + + # This class is meant to mirror RubyVM::InstructionSequence. It contains a + # list of instructions along with the metadata pertaining to them. It also + # functions as a builder for the instruction sequence. + class InstructionSequence + MAGIC = "YARVInstructionSequence/SimpleDataFormat" + + # This provides a handle to the rb_iseq_load function, which allows you to + # pass a serialized iseq to Ruby and have it return a + # RubyVM::InstructionSequence object. + ISEQ_LOAD = + Fiddle::Function.new( + Fiddle::Handle::DEFAULT["rb_iseq_load"], + [Fiddle::TYPE_VOIDP] * 3, + Fiddle::TYPE_VOIDP + ) + + # The type of the instruction sequence. + attr_reader :type + + # The name of the instruction sequence. + attr_reader :name + + # The parent instruction sequence, if there is one. + attr_reader :parent_iseq + + # The location of the root node of this instruction sequence. + attr_reader :location + + # This is the list of information about the arguments to this + # instruction sequence. + attr_accessor :argument_size + attr_reader :argument_options + + # The list of instructions for this instruction sequence. + attr_reader :insns + + # The table of local variables. + attr_reader :local_table + + # The hash of names of instance and class variables pointing to the + # index of their associated inline storage. + attr_reader :inline_storages + + # The index of the next inline storage that will be created. + attr_reader :storage_index + + # An object that will track the current size of the stack and the + # maximum size of the stack for this instruction sequence. + attr_reader :stack + + def initialize(type, name, parent_iseq, location) + @type = type + @name = name + @parent_iseq = parent_iseq + @location = location + + @argument_size = 0 + @argument_options = {} + + @local_table = LocalTable.new + @inline_storages = {} + @insns = [] + @storage_index = 0 + @stack = Stack.new + end + + def local_variable(name, level = 0) + if (lookup = local_table.find(name, level)) + lookup + elsif parent_iseq + parent_iseq.local_variable(name, level + 1) + end + end + + def push(insn) + insns << insn + insn + end + + def inline_storage + storage = storage_index + @storage_index += 1 + storage + end + + def inline_storage_for(name) + unless inline_storages.key?(name) + inline_storages[name] = inline_storage + end + + inline_storages[name] + end + + def length + insns.inject(0) do |sum, insn| + insn.is_a?(Array) ? sum + insn.length : sum + end + end + + def each_child + insns.each do |insn| + insn[1..].each do |operand| + yield operand if operand.is_a?(InstructionSequence) + end + end + end + + def eval + compiled = to_a + + # Temporary hack until we get these working. + compiled[4][:node_id] = 11 + compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] + + Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval + end + + def to_a + versions = RUBY_VERSION.split(".").map(&:to_i) + + [ + MAGIC, + versions[0], + versions[1], + 1, + { + arg_size: argument_size, + local_size: local_table.size, + stack_max: stack.maximum_size + }, + name, + "", + "", + location.start_line, + type, + local_table.names, + argument_options, + [], + insns.map { |insn| serialize(insn) } + ] + end + + private + + def serialize(insn) + case insn[0] + when :checkkeyword, :getblockparam, :getblockparamproxy, + :getlocal_WC_0, :getlocal_WC_1, :getlocal, :setlocal_WC_0, + :setlocal_WC_1, :setlocal + iseq = self + + case insn[0] + when :getlocal_WC_1, :setlocal_WC_1 + iseq = iseq.parent_iseq + when :getblockparam, :getblockparamproxy, :getlocal, :setlocal + insn[2].times { iseq = iseq.parent_iseq } + end + + # Here we need to map the local variable index to the offset + # from the top of the stack where it will be stored. + [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] + when :defineclass + [insn[0], insn[1], insn[2].to_a, insn[3]] + when :definemethod, :definesmethod + [insn[0], insn[1], insn[2].to_a] + when :send + # For any instructions that push instruction sequences onto the + # stack, we need to call #to_a on them as well. + [insn[0], insn[1], (insn[2].to_a if insn[2])] + when :once + [insn[0], insn[1].to_a, insn[2]] + else + insn + end + end + end + + # This class serves as a layer of indirection between the instruction + # sequence and the compiler. It allows us to provide different behavior + # for certain instructions depending on the Ruby version. For example, + # class variable reads and writes gained an inline cache in Ruby 3.0. So + # we place the logic for checking the Ruby version in this class. + class Builder + attr_reader :iseq, :stack + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + def initialize( + iseq, + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) + @iseq = iseq + @stack = iseq.stack + + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction + end + + # This creates a new label at the current length of the instruction + # sequence. It is used as the operand for jump instructions. + def label + name = :"label_#{iseq.length}" + iseq.insns.last == name ? name : event(name) + end + + def event(name) + iseq.push(name) + name + end + + def adjuststack(number) + stack.change_by(-number) + iseq.push([:adjuststack, number]) + end + + def anytostring + stack.change_by(-2 + 1) + iseq.push([:anytostring]) + end + + def branchif(index) + stack.change_by(-1) + iseq.push([:branchif, index]) + end + + def branchnil(index) + stack.change_by(-1) + iseq.push([:branchnil, index]) + end + + def branchunless(index) + stack.change_by(-1) + iseq.push([:branchunless, index]) + end + + def checkkeyword(index, keyword_index) + stack.change_by(+1) + iseq.push([:checkkeyword, index, keyword_index]) + end + + def concatarray + stack.change_by(-2 + 1) + iseq.push([:concatarray]) + end + + def concatstrings(number) + stack.change_by(-number + 1) + iseq.push([:concatstrings, number]) + end + + def defined(type, name, message) + stack.change_by(-1 + 1) + iseq.push([:defined, type, name, message]) + end + + def defineclass(name, class_iseq, flags) + stack.change_by(-2 + 1) + iseq.push([:defineclass, name, class_iseq, flags]) + end + + def definemethod(name, method_iseq) + stack.change_by(0) + iseq.push([:definemethod, name, method_iseq]) + end + + def definesmethod(name, method_iseq) + stack.change_by(-1) + iseq.push([:definesmethod, name, method_iseq]) + end + + def dup + stack.change_by(-1 + 2) + iseq.push([:dup]) + end + + def duparray(object) + stack.change_by(+1) + iseq.push([:duparray, object]) + end + + def duphash(object) + stack.change_by(+1) + iseq.push([:duphash, object]) + end + + def dupn(number) + stack.change_by(+number) + iseq.push([:dupn, number]) + end + + def expandarray(length, flag) + stack.change_by(-1 + length) + iseq.push([:expandarray, length, flag]) + end + + def getblockparam(index, level) + stack.change_by(+1) + iseq.push([:getblockparam, index, level]) + end + + def getblockparamproxy(index, level) + stack.change_by(+1) + iseq.push([:getblockparamproxy, index, level]) + end + + def getclassvariable(name) + stack.change_by(+1) + + if RUBY_VERSION >= "3.0" + iseq.push([:getclassvariable, name, iseq.inline_storage_for(name)]) + else + iseq.push([:getclassvariable, name]) + end + end + + def getconstant(name) + stack.change_by(-2 + 1) + iseq.push([:getconstant, name]) + end + + def getglobal(name) + stack.change_by(+1) + iseq.push([:getglobal, name]) + end + + def getinstancevariable(name) + stack.change_by(+1) + + if RUBY_VERSION >= "3.2" + iseq.push([:getinstancevariable, name, iseq.inline_storage]) + else + inline_storage = iseq.inline_storage_for(name) + iseq.push([:getinstancevariable, name, inline_storage]) + end + end + + def getlocal(index, level) + stack.change_by(+1) + + if operands_unification + # Specialize the getlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will look at the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + iseq.push([:getlocal_WC_0, index]) + when 1 + iseq.push([:getlocal_WC_1, index]) + else + iseq.push([:getlocal, index, level]) + end + else + iseq.push([:getlocal, index, level]) + end + end + + def getspecial(key, type) + stack.change_by(-0 + 1) + iseq.push([:getspecial, key, type]) + end + + def intern + stack.change_by(-1 + 1) + iseq.push([:intern]) + end + + def invokeblock(method_id, argc, flag) + stack.change_by(-argc + 1) + iseq.push([:invokeblock, call_data(method_id, argc, flag)]) + end + + def invokesuper(method_id, argc, flag, block_iseq) + stack.change_by(-(argc + 1) + 1) + + cdata = call_data(method_id, argc, flag) + iseq.push([:invokesuper, cdata, block_iseq]) + end + + def jump(index) + stack.change_by(0) + iseq.push([:jump, index]) + end + + def leave + stack.change_by(-1) + iseq.push([:leave]) + end + + def newarray(length) + stack.change_by(-length + 1) + iseq.push([:newarray, length]) + end + + def newhash(length) + stack.change_by(-length + 1) + iseq.push([:newhash, length]) + end + + def newrange(flag) + stack.change_by(-2 + 1) + iseq.push([:newrange, flag]) + end + + def nop + stack.change_by(0) + iseq.push([:nop]) + end + + def objtostring(method_id, argc, flag) + stack.change_by(-1 + 1) + iseq.push([:objtostring, call_data(method_id, argc, flag)]) + end + + def once(postexe_iseq, inline_storage) + stack.change_by(+1) + iseq.push([:once, postexe_iseq, inline_storage]) + end + + def opt_getconstant_path(names) + if RUBY_VERSION >= "3.2" + stack.change_by(+1) + iseq.push([:opt_getconstant_path, names]) + else + inline_storage = iseq.inline_storage + getinlinecache = opt_getinlinecache(-1, inline_storage) + + if names[0] == :"" + names.shift + pop + putobject(Object) + end + + names.each_with_index do |name, index| + putobject(index == 0) + getconstant(name) + end + + opt_setinlinecache(inline_storage) + getinlinecache[1] = label + end + end + + def opt_getinlinecache(offset, inline_storage) + stack.change_by(+1) + iseq.push([:opt_getinlinecache, offset, inline_storage]) + end + + def opt_newarray_max(length) + if specialized_instruction + stack.change_by(-length + 1) + iseq.push([:opt_newarray_max, length]) + else + newarray(length) + send(:max, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def opt_newarray_min(length) + if specialized_instruction + stack.change_by(-length + 1) + iseq.push([:opt_newarray_min, length]) + else + newarray(length) + send(:min, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def opt_setinlinecache(inline_storage) + stack.change_by(-1 + 1) + iseq.push([:opt_setinlinecache, inline_storage]) + end + + def opt_str_freeze(value) + if specialized_instruction + stack.change_by(+1) + iseq.push( + [ + :opt_str_freeze, + value, + call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE) + ] + ) + else + putstring(value) + send(:freeze, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def opt_str_uminus(value) + if specialized_instruction + stack.change_by(+1) + iseq.push( + [:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)] + ) + else + putstring(value) + send(:-@, 0, VM_CALL_ARGS_SIMPLE) + end + end + + def pop + stack.change_by(-1) + iseq.push([:pop]) + end + + def putnil + stack.change_by(+1) + iseq.push([:putnil]) + end + + def putobject(object) + stack.change_by(+1) + + if operands_unification + # Specialize the putobject instruction based on the value of the + # object. If it's 0 or 1, then there's a specialized instruction + # that will push the object onto the stack and requires fewer + # operands. + if object.eql?(0) + iseq.push([:putobject_INT2FIX_0_]) + elsif object.eql?(1) + iseq.push([:putobject_INT2FIX_1_]) + else + iseq.push([:putobject, object]) + end + else + iseq.push([:putobject, object]) + end + end + + def putself + stack.change_by(+1) + iseq.push([:putself]) + end + + def putspecialobject(object) + stack.change_by(+1) + iseq.push([:putspecialobject, object]) + end + + def putstring(object) + stack.change_by(+1) + iseq.push([:putstring, object]) + end + + def send(method_id, argc, flag, block_iseq = nil) + stack.change_by(-(argc + 1) + 1) + cdata = call_data(method_id, argc, flag) + + if specialized_instruction + # Specialize the send instruction. If it doesn't have a block + # attached, then we will replace it with an opt_send_without_block + # and do further specializations based on the called method and the + # number of arguments. + + # stree-ignore + if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 + case [method_id, argc] + when [:length, 0] then iseq.push([:opt_length, cdata]) + when [:size, 0] then iseq.push([:opt_size, cdata]) + when [:empty?, 0] then iseq.push([:opt_empty_p, cdata]) + when [:nil?, 0] then iseq.push([:opt_nil_p, cdata]) + when [:succ, 0] then iseq.push([:opt_succ, cdata]) + when [:!, 0] then iseq.push([:opt_not, cdata]) + when [:+, 1] then iseq.push([:opt_plus, cdata]) + when [:-, 1] then iseq.push([:opt_minus, cdata]) + when [:*, 1] then iseq.push([:opt_mult, cdata]) + when [:/, 1] then iseq.push([:opt_div, cdata]) + when [:%, 1] then iseq.push([:opt_mod, cdata]) + when [:==, 1] then iseq.push([:opt_eq, cdata]) + when [:=~, 1] then iseq.push([:opt_regexpmatch2, cdata]) + when [:<, 1] then iseq.push([:opt_lt, cdata]) + when [:<=, 1] then iseq.push([:opt_le, cdata]) + when [:>, 1] then iseq.push([:opt_gt, cdata]) + when [:>=, 1] then iseq.push([:opt_ge, cdata]) + when [:<<, 1] then iseq.push([:opt_ltlt, cdata]) + when [:[], 1] then iseq.push([:opt_aref, cdata]) + when [:&, 1] then iseq.push([:opt_and, cdata]) + when [:|, 1] then iseq.push([:opt_or, cdata]) + when [:[]=, 2] then iseq.push([:opt_aset, cdata]) + when [:!=, 1] + eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) + iseq.push([:opt_neq, eql_data, cdata]) + else + iseq.push([:opt_send_without_block, cdata]) + end + else + iseq.push([:send, cdata, block_iseq]) + end + else + iseq.push([:send, cdata, block_iseq]) + end + end + + def setclassvariable(name) + stack.change_by(-1) + + if RUBY_VERSION >= "3.0" + iseq.push([:setclassvariable, name, iseq.inline_storage_for(name)]) + else + iseq.push([:setclassvariable, name]) + end + end + + def setconstant(name) + stack.change_by(-2) + iseq.push([:setconstant, name]) + end + + def setglobal(name) + stack.change_by(-1) + iseq.push([:setglobal, name]) + end + + def setinstancevariable(name) + stack.change_by(-1) + + if RUBY_VERSION >= "3.2" + iseq.push([:setinstancevariable, name, iseq.inline_storage]) + else + inline_storage = iseq.inline_storage_for(name) + iseq.push([:setinstancevariable, name, inline_storage]) + end + end + + def setlocal(index, level) + stack.change_by(-1) + + if operands_unification + # Specialize the setlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will write to the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + iseq.push([:setlocal_WC_0, index]) + when 1 + iseq.push([:setlocal_WC_1, index]) + else + iseq.push([:setlocal, index, level]) + end + else + iseq.push([:setlocal, index, level]) + end + end + + def setn(number) + stack.change_by(-1 + 1) + iseq.push([:setn, number]) + end + + def splatarray(flag) + stack.change_by(-1 + 1) + iseq.push([:splatarray, flag]) + end + + def swap + stack.change_by(-2 + 2) + iseq.push([:swap]) + end + + def topn(number) + stack.change_by(+1) + iseq.push([:topn, number]) + end + + def toregexp(options, length) + stack.change_by(-length + 1) + iseq.push([:toregexp, options, length]) + end + + private + + # This creates a call data object that is used as the operand for the + # send, invokesuper, and objtostring instructions. + def call_data(method_id, argc, flag) + { mid: method_id, flag: flag, orig_argc: argc } + end + end + + # These constants correspond to the putspecialobject instruction. They are + # used to represent special objects that are pushed onto the stack. + VM_SPECIAL_OBJECT_VMCORE = 1 + VM_SPECIAL_OBJECT_CBASE = 2 + VM_SPECIAL_OBJECT_CONST_BASE = 3 + + # These constants correspond to the flag passed as part of the call data + # structure on the send instruction. They are used to represent various + # metadata about the callsite (e.g., were keyword arguments used?, was a + # block given?, etc.). + VM_CALL_ARGS_SPLAT = 1 << 0 + VM_CALL_ARGS_BLOCKARG = 1 << 1 + VM_CALL_FCALL = 1 << 2 + VM_CALL_VCALL = 1 << 3 + VM_CALL_ARGS_SIMPLE = 1 << 4 + VM_CALL_BLOCKISEQ = 1 << 5 + VM_CALL_KWARG = 1 << 6 + VM_CALL_KW_SPLAT = 1 << 7 + VM_CALL_TAILCALL = 1 << 8 + VM_CALL_SUPER = 1 << 9 + VM_CALL_ZSUPER = 1 << 10 + VM_CALL_OPT_SEND = 1 << 11 + VM_CALL_KW_SPLAT_MUT = 1 << 12 + + # These constants correspond to the value passed as part of the defined + # instruction. It's an enum defined in the CRuby codebase that tells that + # instruction what kind of defined check to perform. + DEFINED_NIL = 1 + DEFINED_IVAR = 2 + DEFINED_LVAR = 3 + DEFINED_GVAR = 4 + DEFINED_CVAR = 5 + DEFINED_CONST = 6 + DEFINED_METHOD = 7 + DEFINED_YIELD = 8 + DEFINED_ZSUPER = 9 + DEFINED_SELF = 10 + DEFINED_TRUE = 11 + DEFINED_FALSE = 12 + DEFINED_ASGN = 13 + DEFINED_EXPR = 14 + DEFINED_REF = 15 + DEFINED_FUNC = 16 + DEFINED_CONST_FROM = 17 + + # These constants correspond to the value passed in the flags as part of + # the defineclass instruction. + VM_DEFINECLASS_TYPE_CLASS = 0 + VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 1 + VM_DEFINECLASS_TYPE_MODULE = 2 + VM_DEFINECLASS_FLAG_SCOPED = 8 + VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 16 + end +end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index cdf2860e..3b8c0ea2 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -449,7 +449,7 @@ def serialize_iseq(iseq) when Array insn.map do |operand| if operand.is_a?(Array) && - operand[0] == Compiler::InstructionSequence::MAGIC + operand[0] == YARV::InstructionSequence::MAGIC serialize_iseq(operand) else operand From 6c6b88b1f4eeb5f43164d6eb81c5c8272dbd4315 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 20:30:07 -0500 Subject: [PATCH 045/104] Start the disassembler --- lib/syntax_tree/yarv.rb | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 42faa66b..e3780a0c 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -274,6 +274,43 @@ def serialize(insn) end end + # This class is responsible for taking a compiled instruction sequence and + # walking through it to generate equivalent Ruby code. + class Disassembler + attr_reader :iseq + + def initialize(iseq) + @iseq = iseq + end + + def to_ruby + stack = [] + + iseq.insns.each do |insn| + case insn[0] + when :leave + stack << ReturnNode.new(arguments: Args.new(parts: [stack.pop], location: Location.default), location: Location.default) + when :opt_plus + left, right = stack.pop(2) + stack << Binary.new(left: left, operator: :+, right: right, location: Location.default) + when :putobject + case insn[1] + when Integer + stack << Int.new(value: insn[1].inspect, location: Location.default) + else + raise "Unknown object type: #{insn[1].class.name}" + end + when :putobject_INT2FIX_1_ + stack << Int.new(value: "1", location: Location.default) + else + raise "Unknown instruction #{insn[0]}" + end + end + + Statements.new(nil, body: stack, location: Location.default) + end + end + # This class serves as a layer of indirection between the instruction # sequence and the compiler. It allows us to provide different behavior # for certain instructions depending on the Ruby version. For example, From c9db96bc925c10d80e530a3238ce50980aa57f3f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 20:32:52 -0500 Subject: [PATCH 046/104] opt_mult, Float, and Rational --- lib/syntax_tree/yarv.rb | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index e3780a0c..cbb91f1e 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -290,13 +290,20 @@ def to_ruby case insn[0] when :leave stack << ReturnNode.new(arguments: Args.new(parts: [stack.pop], location: Location.default), location: Location.default) + when :opt_mult + left, right = stack.pop(2) + stack << Binary.new(left: left, operator: :*, right: right, location: Location.default) when :opt_plus left, right = stack.pop(2) stack << Binary.new(left: left, operator: :+, right: right, location: Location.default) when :putobject case insn[1] + when Float + stack << FloatLiteral.new(value: insn[1].inspect, location: Location.default) when Integer stack << Int.new(value: insn[1].inspect, location: Location.default) + when Rational + stack << RationalLiteral.new(value: insn[1].inspect, location: Location.default) else raise "Unknown object type: #{insn[1].class.name}" end From 8ad799ad2dfb73ec90b8a8def55b7c088fc45bed Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 18 Nov 2022 20:37:27 -0500 Subject: [PATCH 047/104] Local variables and assignments --- lib/syntax_tree/yarv.rb | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index cbb91f1e..7290d87f 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -288,6 +288,9 @@ def to_ruby iseq.insns.each do |insn| case insn[0] + when :getlocal_WC_0 + value = iseq.local_table.locals[insn[1]].name.to_s + stack << VarRef.new(value: Ident.new(value: value, location: Location.default), location: Location.default) when :leave stack << ReturnNode.new(arguments: Args.new(parts: [stack.pop], location: Location.default), location: Location.default) when :opt_mult @@ -309,6 +312,9 @@ def to_ruby end when :putobject_INT2FIX_1_ stack << Int.new(value: "1", location: Location.default) + when :setlocal_WC_0 + target = VarField.new(value: Ident.new(value: iseq.local_table.locals[insn[1]].name.to_s, location: Location.default), location: Location.default) + stack << Assign.new(target: target, value: stack.pop, location: Location.default) else raise "Unknown instruction #{insn[0]}" end From 0047065d4227b141e0d9d17542696b5adb75e12b Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 19 Nov 2022 14:48:55 -0500 Subject: [PATCH 048/104] Inline builder into ISeq --- lib/syntax_tree/compiler.rb | 912 +++++++++++++++++------------------- lib/syntax_tree/yarv.rb | 485 ++++++++++--------- 2 files changed, 687 insertions(+), 710 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index c936c9c1..424a9cf5 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -192,11 +192,7 @@ def visit_unsupported(_node) :specialized_instruction # The current instruction sequence that is being compiled. - attr_reader :current_iseq - - # This is the current builder that is being used to construct the current - # instruction sequence. - attr_reader :builder + attr_reader :iseq # A boolean to track if we're currently compiling the last statement # within a set of statements. This information is necessary to determine @@ -212,8 +208,7 @@ def initialize( @operands_unification = operands_unification @specialized_instruction = specialized_instruction - @current_iseq = nil - @builder = nil + @iseq = nil @last_statement = false end @@ -223,45 +218,45 @@ def visit_BEGIN(node) def visit_CHAR(node) if frozen_string_literal - builder.putobject(node.value[1..]) + iseq.putobject(node.value[1..]) else - builder.putstring(node.value[1..]) + iseq.putstring(node.value[1..]) end end def visit_END(node) - name = "block in #{current_iseq.name}" + name = "block in #{iseq.name}" once_iseq = - with_instruction_sequence(:block, name, current_iseq, node) do + with_instruction_sequence(:block, name, node) do postexe_iseq = - with_instruction_sequence(:block, name, current_iseq, node) do + with_instruction_sequence(:block, name, node) do *statements, last_statement = node.statements.body visit_all(statements) with_last_statement { visit(last_statement) } - builder.leave + iseq.leave end - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - builder.send(:"core#set_postexe", 0, YARV::VM_CALL_FCALL, postexe_iseq) - builder.leave + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.send(:"core#set_postexe", 0, YARV::VM_CALL_FCALL, postexe_iseq) + iseq.leave end - builder.once(once_iseq, current_iseq.inline_storage) - builder.pop + iseq.once(once_iseq, iseq.inline_storage) + iseq.pop end def visit_alias(node) - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) visit(node.left) visit(node.right) - builder.send(:"core#set_method_alias", 3, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:"core#set_method_alias", 3, YARV::VM_CALL_ARGS_SIMPLE) end def visit_aref(node) visit(node.collection) visit(node.index) - builder.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) end def visit_arg_block(node) @@ -274,7 +269,7 @@ def visit_arg_paren(node) def visit_arg_star(node) visit(node.value) - builder.splatarray(false) + iseq.splatarray(false) end def visit_args(node) @@ -283,99 +278,97 @@ def visit_args(node) def visit_array(node) if (compiled = RubyVisitor.compile(node)) - builder.duparray(compiled) + iseq.duparray(compiled) else length = 0 node.contents.parts.each do |part| if part.is_a?(ArgStar) if length > 0 - builder.newarray(length) + iseq.newarray(length) length = 0 end visit(part.value) - builder.concatarray + iseq.concatarray else visit(part) length += 1 end end - builder.newarray(length) if length > 0 - if length > 0 && length != node.contents.parts.length - builder.concatarray - end + iseq.newarray(length) if length > 0 + iseq.concatarray if length > 0 && length != node.contents.parts.length end end def visit_assign(node) case node.target when ARefField - builder.putnil + iseq.putnil visit(node.target.collection) visit(node.target.index) visit(node.value) - builder.setn(3) - builder.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) - builder.pop + iseq.setn(3) + iseq.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) + iseq.pop when ConstPathField names = constant_names(node.target) name = names.pop if RUBY_VERSION >= "3.2" - builder.opt_getconstant_path(names) + iseq.opt_getconstant_path(names) visit(node.value) - builder.swap - builder.topn(1) - builder.swap - builder.setconstant(name) + iseq.swap + iseq.topn(1) + iseq.swap + iseq.setconstant(name) else visit(node.value) - builder.dup if last_statement? - builder.opt_getconstant_path(names) - builder.setconstant(name) + iseq.dup if last_statement? + iseq.opt_getconstant_path(names) + iseq.setconstant(name) end when Field - builder.putnil + iseq.putnil visit(node.target) visit(node.value) - builder.setn(2) - builder.send(:"#{node.target.name.value}=", 1, YARV::VM_CALL_ARGS_SIMPLE) - builder.pop + iseq.setn(2) + iseq.send(:"#{node.target.name.value}=", 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.pop when TopConstField name = node.target.constant.value.to_sym if RUBY_VERSION >= "3.2" - builder.putobject(Object) + iseq.putobject(Object) visit(node.value) - builder.swap - builder.topn(1) - builder.swap - builder.setconstant(name) + iseq.swap + iseq.topn(1) + iseq.swap + iseq.setconstant(name) else visit(node.value) - builder.dup if last_statement? - builder.putobject(Object) - builder.setconstant(name) + iseq.dup if last_statement? + iseq.putobject(Object) + iseq.setconstant(name) end when VarField visit(node.value) - builder.dup if last_statement? + iseq.dup if last_statement? case node.target.value when Const - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(node.target.value.value.to_sym) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) + iseq.setconstant(node.target.value.value.to_sym) when CVar - builder.setclassvariable(node.target.value.value.to_sym) + iseq.setclassvariable(node.target.value.value.to_sym) when GVar - builder.setglobal(node.target.value.value.to_sym) + iseq.setglobal(node.target.value.value.to_sym) when Ident local_variable = visit(node.target) - builder.setlocal(local_variable.index, local_variable.level) + iseq.setlocal(local_variable.index, local_variable.level) when IVar - builder.setinstancevariable(node.target.value.value.to_sym) + iseq.setinstancevariable(node.target.value.value.to_sym) end end end @@ -390,12 +383,12 @@ def visit_assoc_splat(node) end def visit_backref(node) - builder.getspecial(1, 2 * node.value[1..].to_i) + iseq.getspecial(1, 2 * node.value[1..].to_i) end def visit_bare_assoc_hash(node) if (compiled = RubyVisitor.compile(node)) - builder.duphash(compiled) + iseq.duphash(compiled) else visit_all(node.assocs) end @@ -405,41 +398,36 @@ def visit_binary(node) case node.operator when :"&&" visit(node.left) - builder.dup + iseq.dup - branchunless = builder.branchunless(-1) - builder.pop + branchunless = iseq.branchunless(-1) + iseq.pop visit(node.right) - branchunless[1] = builder.label + branchunless[1] = iseq.label when :"||" visit(node.left) - builder.dup + iseq.dup - branchif = builder.branchif(-1) - builder.pop + branchif = iseq.branchif(-1) + iseq.pop visit(node.right) - branchif[1] = builder.label + branchif[1] = iseq.label else visit(node.left) visit(node.right) - builder.send(node.operator, 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(node.operator, 1, YARV::VM_CALL_ARGS_SIMPLE) end end def visit_block(node) - with_instruction_sequence( - :block, - "block in #{current_iseq.name}", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_B_CALL) + with_instruction_sequence(:block, "block in #{iseq.name}", node) do + iseq.event(:RUBY_EVENT_B_CALL) visit(node.block_var) visit(node.bodystmt) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave end end @@ -447,22 +435,20 @@ def visit_block_var(node) params = node.params if params.requireds.length == 1 && params.optionals.empty? && - !params.rest && params.posts.empty? && params.keywords.empty? && - !params.keyword_rest && !params.block - current_iseq.argument_options[:ambiguous_param0] = true + !params.rest && params.posts.empty? && params.keywords.empty? && + !params.keyword_rest && !params.block + iseq.argument_options[:ambiguous_param0] = true end visit(node.params) - node.locals.each do |local| - current_iseq.local_table.plain(local.value.to_sym) - end + node.locals.each { |local| iseq.local_table.plain(local.value.to_sym) } end def visit_blockarg(node) - current_iseq.argument_options[:block_start] = current_iseq.argument_size - current_iseq.local_table.block(node.name.value.to_sym) - current_iseq.argument_size += 1 + iseq.argument_options[:block_start] = iseq.argument_size + iseq.local_table.block(node.name.value.to_sym) + iseq.argument_size += 1 end def visit_bodystmt(node) @@ -497,15 +483,15 @@ def visit_call(node) parts = node.receiver.contents&.parts || [] if parts.none? { |part| part.is_a?(ArgStar) } && - RubyVisitor.compile(node.receiver).nil? + RubyVisitor.compile(node.receiver).nil? case node.message.value when "max" visit(node.receiver.contents) - builder.opt_newarray_max(parts.length) + iseq.opt_newarray_max(parts.length) return when "min" visit(node.receiver.contents) - builder.opt_newarray_min(parts.length) + iseq.opt_newarray_min(parts.length) return end end @@ -513,10 +499,10 @@ def visit_call(node) if RubyVisitor.compile(node.receiver).nil? case node.message.value when "-@" - builder.opt_str_uminus(node.receiver.parts.first.value) + iseq.opt_str_uminus(node.receiver.parts.first.value) return when "freeze" - builder.opt_str_freeze(node.receiver.parts.first.value) + iseq.opt_str_freeze(node.receiver.parts.first.value) return end end @@ -525,10 +511,10 @@ def visit_call(node) if node.receiver if node.receiver.is_a?(VarRef) - lookup = current_iseq.local_variable(node.receiver.value.value.to_sym) + lookup = iseq.local_variable(node.receiver.value.value.to_sym) if lookup.local.is_a?(YARV::LocalTable::BlockLocal) - builder.getblockparamproxy(lookup.index, lookup.level) + iseq.getblockparamproxy(lookup.index, lookup.level) else visit(node.receiver) end @@ -536,13 +522,13 @@ def visit_call(node) visit(node.receiver) end else - builder.putself + iseq.putself end branchnil = if node.operator&.value == "&." - builder.dup - builder.branchnil(-1) + iseq.dup + iseq.branchnil(-1) end flag = 0 @@ -559,12 +545,12 @@ def visit_call(node) when ArgsForward flag |= YARV::VM_CALL_ARGS_SPLAT | YARV::VM_CALL_ARGS_BLOCKARG - lookup = current_iseq.local_table.find(:*, 0) - builder.getlocal(lookup.index, lookup.level) - builder.splatarray(arg_parts.length != 1) + lookup = iseq.local_table.find(:*, 0) + iseq.getlocal(lookup.index, lookup.level) + iseq.splatarray(arg_parts.length != 1) - lookup = current_iseq.local_table.find(:&, 0) - builder.getblockparamproxy(lookup.index, lookup.level) + lookup = iseq.local_table.find(:&, 0) + iseq.getblockparamproxy(lookup.index, lookup.level) when BareAssocHash flag |= YARV::VM_CALL_KW_SPLAT visit(arg_part) @@ -577,8 +563,8 @@ def visit_call(node) flag |= YARV::VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 flag |= YARV::VM_CALL_FCALL if node.receiver.nil? - builder.send(node.message.value.to_sym, argc, flag, block_iseq) - branchnil[1] = builder.label if branchnil + iseq.send(node.message.value.to_sym, argc, flag, block_iseq) + branchnil[1] = iseq.label if branchnil end def visit_case(node) @@ -586,7 +572,6 @@ def visit_case(node) clauses = [] else_clause = nil - current = node.consequent while current @@ -601,21 +586,19 @@ def visit_case(node) branches = clauses.map do |clause| visit(clause.arguments) - builder.topn(1) - builder.send(:===, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) - [clause, builder.branchif(:label_00)] + iseq.topn(1) + iseq.send(:===, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) + [clause, iseq.branchif(:label_00)] end - builder.pop - - else_clause ? visit(else_clause) : builder.putnil - - builder.leave + iseq.pop + else_clause ? visit(else_clause) : iseq.putnil + iseq.leave branches.each_with_index do |(clause, branchif), index| - builder.leave if index != 0 - branchif[1] = builder.label - builder.pop + iseq.leave if index != 0 + branchif[1] = iseq.label + iseq.pop visit(clause) end end @@ -623,16 +606,11 @@ def visit_case(node) def visit_class(node) name = node.constant.constant.value.to_sym class_iseq = - with_instruction_sequence( - :class, - "", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) + with_instruction_sequence(:class, "", node) do + iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave + iseq.event(:RUBY_EVENT_END) + iseq.leave end flags = YARV::VM_DEFINECLASS_TYPE_CLASS @@ -642,20 +620,20 @@ def visit_class(node) flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED visit(node.constant.parent) when ConstRef - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) when TopConstRef flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED - builder.putobject(Object) + iseq.putobject(Object) end if node.superclass flags |= YARV::VM_DEFINECLASS_FLAG_HAS_SUPERCLASS visit(node.superclass) else - builder.putnil + iseq.putnil end - builder.defineclass(name, class_iseq, flags) + iseq.defineclass(name, class_iseq, flags) end def visit_command(node) @@ -690,34 +668,29 @@ def visit_const_path_field(node) def visit_const_path_ref(node) names = constant_names(node) - builder.opt_getconstant_path(names) + iseq.opt_getconstant_path(names) end def visit_def(node) method_iseq = - with_instruction_sequence( - :method, - node.name.value, - current_iseq, - node - ) do + with_instruction_sequence(:method, node.name.value, node) do visit(node.params) if node.params - builder.event(:RUBY_EVENT_CALL) + iseq.event(:RUBY_EVENT_CALL) visit(node.bodystmt) - builder.event(:RUBY_EVENT_RETURN) - builder.leave + iseq.event(:RUBY_EVENT_RETURN) + iseq.leave end name = node.name.value.to_sym if node.target visit(node.target) - builder.definesmethod(name, method_iseq) + iseq.definesmethod(name, method_iseq) else - builder.definemethod(name, method_iseq) + iseq.definemethod(name, method_iseq) end - builder.putobject(name) + iseq.putobject(name) end def visit_defined(node) @@ -726,67 +699,67 @@ def visit_defined(node) # If we're assigning to a local variable, then we need to make sure # that we put it into the local table. if node.value.target.is_a?(VarField) && - node.value.target.value.is_a?(Ident) - current_iseq.local_table.plain(node.value.target.value.value.to_sym) + node.value.target.value.is_a?(Ident) + iseq.local_table.plain(node.value.target.value.value.to_sym) end - builder.putobject("assignment") + iseq.putobject("assignment") when VarRef value = node.value.value name = value.value.to_sym case value when Const - builder.putnil - builder.defined(YARV::DEFINED_CONST, name, "constant") + iseq.putnil + iseq.defined(YARV::DEFINED_CONST, name, "constant") when CVar - builder.putnil - builder.defined(YARV::DEFINED_CVAR, name, "class variable") + iseq.putnil + iseq.defined(YARV::DEFINED_CVAR, name, "class variable") when GVar - builder.putnil - builder.defined(YARV::DEFINED_GVAR, name, "global-variable") + iseq.putnil + iseq.defined(YARV::DEFINED_GVAR, name, "global-variable") when Ident - builder.putobject("local-variable") + iseq.putobject("local-variable") when IVar - builder.putnil - builder.defined(YARV::DEFINED_IVAR, name, "instance-variable") + iseq.putnil + iseq.defined(YARV::DEFINED_IVAR, name, "instance-variable") when Kw case name when :false - builder.putobject("false") + iseq.putobject("false") when :nil - builder.putobject("nil") + iseq.putobject("nil") when :self - builder.putobject("self") + iseq.putobject("self") when :true - builder.putobject("true") + iseq.putobject("true") end end when VCall - builder.putself + iseq.putself name = node.value.value.value.to_sym - builder.defined(YARV::DEFINED_FUNC, name, "method") + iseq.defined(YARV::DEFINED_FUNC, name, "method") when YieldNode - builder.putnil - builder.defined(YARV::DEFINED_YIELD, false, "yield") + iseq.putnil + iseq.defined(YARV::DEFINED_YIELD, false, "yield") when ZSuper - builder.putnil - builder.defined(YARV::DEFINED_ZSUPER, false, "super") + iseq.putnil + iseq.defined(YARV::DEFINED_ZSUPER, false, "super") else - builder.putobject("expression") + iseq.putobject("expression") end end def visit_dyna_symbol(node) if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - builder.putobject(node.parts.first.value.to_sym) + iseq.putobject(node.parts.first.value.to_sym) end end def visit_else(node) visit(node.statements) - builder.pop unless last_statement? + iseq.pop unless last_statement? end def visit_elsif(node) @@ -805,51 +778,50 @@ def visit_field(node) end def visit_float(node) - builder.putobject(node.accept(RubyVisitor.new)) + iseq.putobject(node.accept(RubyVisitor.new)) end def visit_for(node) visit(node.collection) name = node.index.value.value.to_sym - current_iseq.local_table.plain(name) + iseq.local_table.plain(name) block_iseq = with_instruction_sequence( :block, - "block in #{current_iseq.name}", - current_iseq, + "block in #{iseq.name}", node.statements ) do - current_iseq.argument_options[:lead_num] ||= 0 - current_iseq.argument_options[:lead_num] += 1 - current_iseq.argument_options[:ambiguous_param0] = true + iseq.argument_options[:lead_num] ||= 0 + iseq.argument_options[:lead_num] += 1 + iseq.argument_options[:ambiguous_param0] = true - current_iseq.argument_size += 1 - current_iseq.local_table.plain(2) + iseq.argument_size += 1 + iseq.local_table.plain(2) - builder.getlocal(0, 0) + iseq.getlocal(0, 0) - local_variable = current_iseq.local_variable(name) - builder.setlocal(local_variable.index, local_variable.level) + local_variable = iseq.local_variable(name) + iseq.setlocal(local_variable.index, local_variable.level) - builder.event(:RUBY_EVENT_B_CALL) - builder.nop + iseq.event(:RUBY_EVENT_B_CALL) + iseq.nop visit(node.statements) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave end - builder.send(:each, 0, 0, block_iseq) + iseq.send(:each, 0, 0, block_iseq) end def visit_hash(node) if (compiled = RubyVisitor.compile(node)) - builder.duphash(compiled) + iseq.duphash(compiled) else visit_all(node.assocs) - builder.newhash(node.assocs.length * 2) + iseq.newhash(node.assocs.length * 2) end end @@ -860,30 +832,30 @@ def visit_heredoc(node) visit(node.parts.first) else length = visit_string_parts(node) - builder.concatstrings(length) + iseq.concatstrings(length) end end def visit_if(node) visit(node.predicate) - branchunless = builder.branchunless(-1) + branchunless = iseq.branchunless(-1) visit(node.statements) if last_statement? - builder.leave - branchunless[1] = builder.label + iseq.leave + branchunless[1] = iseq.label - node.consequent ? visit(node.consequent) : builder.putnil + node.consequent ? visit(node.consequent) : iseq.putnil else - builder.pop + iseq.pop if node.consequent - jump = builder.jump(-1) - branchunless[1] = builder.label + jump = iseq.jump(-1) + branchunless[1] = iseq.label visit(node.consequent) - jump[1] = builder.label + jump[1] = iseq.label else - branchunless[1] = builder.label + branchunless[1] = iseq.label end end end @@ -905,40 +877,35 @@ def visit_if_op(node) end def visit_imaginary(node) - builder.putobject(node.accept(RubyVisitor.new)) + iseq.putobject(node.accept(RubyVisitor.new)) end def visit_int(node) - builder.putobject(node.accept(RubyVisitor.new)) + iseq.putobject(node.accept(RubyVisitor.new)) end def visit_kwrest_param(node) - current_iseq.argument_options[:kwrest] = current_iseq.argument_size - current_iseq.argument_size += 1 - current_iseq.local_table.plain(node.name.value.to_sym) + iseq.argument_options[:kwrest] = iseq.argument_size + iseq.argument_size += 1 + iseq.local_table.plain(node.name.value.to_sym) end def visit_label(node) - builder.putobject(node.accept(RubyVisitor.new)) + iseq.putobject(node.accept(RubyVisitor.new)) end def visit_lambda(node) lambda_iseq = - with_instruction_sequence( - :block, - "block in #{current_iseq.name}", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_B_CALL) + with_instruction_sequence(:block, "block in #{iseq.name}", node) do + iseq.event(:RUBY_EVENT_B_CALL) visit(node.params) visit(node.statements) - builder.event(:RUBY_EVENT_B_RETURN) - builder.leave + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave end - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - builder.send(:lambda, 0, YARV::VM_CALL_FCALL, lambda_iseq) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.send(:lambda, 0, YARV::VM_CALL_FCALL, lambda_iseq) end def visit_lambda_var(node) @@ -947,7 +914,7 @@ def visit_lambda_var(node) def visit_massign(node) visit(node.value) - builder.dup + iseq.dup visit(node.target) end @@ -966,7 +933,6 @@ def visit_method_add_block(node) def visit_mlhs(node) lookups = [] - node.parts.each do |part| case part when VarField @@ -974,24 +940,18 @@ def visit_mlhs(node) end end - builder.expandarray(lookups.length, 0) - - lookups.each { |lookup| builder.setlocal(lookup.index, lookup.level) } + iseq.expandarray(lookups.length, 0) + lookups.each { |lookup| iseq.setlocal(lookup.index, lookup.level) } end def visit_module(node) name = node.constant.constant.value.to_sym module_iseq = - with_instruction_sequence( - :class, - "", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) + with_instruction_sequence(:class, "", node) do + iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave + iseq.event(:RUBY_EVENT_END) + iseq.leave end flags = YARV::VM_DEFINECLASS_TYPE_MODULE @@ -1001,28 +961,28 @@ def visit_module(node) flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED visit(node.constant.parent) when ConstRef - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) when TopConstRef flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED - builder.putobject(Object) + iseq.putobject(Object) end - builder.putnil - builder.defineclass(name, module_iseq, flags) + iseq.putnil + iseq.defineclass(name, module_iseq, flags) end def visit_mrhs(node) if (compiled = RubyVisitor.compile(node)) - builder.duparray(compiled) + iseq.duparray(compiled) else visit_all(node.parts) - builder.newarray(node.parts.length) + iseq.newarray(node.parts.length) end end def visit_not(node) visit(node.statement) - builder.send(:!, 0, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:!, 0, YARV::VM_CALL_ARGS_SIMPLE) end def visit_opassign(node) @@ -1036,31 +996,30 @@ def visit_opassign(node) branchunless = nil with_opassign(node) do - builder.dup - branchunless = builder.branchunless(-1) - builder.pop + iseq.dup + branchunless = iseq.branchunless(-1) + iseq.pop visit(node.value) end case node.target when ARefField - builder.leave - branchunless[1] = builder.label - builder.setn(3) - builder.adjuststack(3) + iseq.leave + branchunless[1] = iseq.label + iseq.setn(3) + iseq.adjuststack(3) when ConstPathField, TopConstField - branchunless[1] = builder.label - builder.swap - builder.pop + branchunless[1] = iseq.label + iseq.swap + iseq.pop else - branchunless[1] = builder.label + branchunless[1] = iseq.label end when :"||" - if node.target.is_a?(ConstPathField) || - node.target.is_a?(TopConstField) + if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) opassign_defined(node) - builder.swap - builder.pop + iseq.swap + iseq.pop elsif node.target.is_a?(VarField) && [Const, CVar, GVar].include?(node.target.value.class) opassign_defined(node) @@ -1068,67 +1027,65 @@ def visit_opassign(node) branchif = nil with_opassign(node) do - builder.dup - branchif = builder.branchif(-1) - builder.pop + iseq.dup + branchif = iseq.branchif(-1) + iseq.pop visit(node.value) end if node.target.is_a?(ARefField) - builder.leave - branchif[1] = builder.label - builder.setn(3) - builder.adjuststack(3) + iseq.leave + branchif[1] = iseq.label + iseq.setn(3) + iseq.adjuststack(3) else - branchif[1] = builder.label + branchif[1] = iseq.label end end else with_opassign(node) do visit(node.value) - builder.send(operator, 1, flag) + iseq.send(operator, 1, flag) end end end def visit_params(node) - argument_options = current_iseq.argument_options + argument_options = iseq.argument_options if node.requireds.any? argument_options[:lead_num] = 0 node.requireds.each do |required| - current_iseq.local_table.plain(required.value.to_sym) - current_iseq.argument_size += 1 + iseq.local_table.plain(required.value.to_sym) + iseq.argument_size += 1 argument_options[:lead_num] += 1 end end node.optionals.each do |(optional, value)| - index = current_iseq.local_table.size + index = iseq.local_table.size name = optional.value.to_sym - current_iseq.local_table.plain(name) - current_iseq.argument_size += 1 + iseq.local_table.plain(name) + iseq.argument_size += 1 - unless argument_options.key?(:opt) - argument_options[:opt] = [builder.label] - end + argument_options[:opt] = [iseq.label] unless argument_options.key?(:opt) visit(value) - builder.setlocal(index, 0) - current_iseq.argument_options[:opt] << builder.label + iseq.setlocal(index, 0) + iseq.argument_options[:opt] << iseq.label end visit(node.rest) if node.rest if node.posts.any? - argument_options[:post_start] = current_iseq.argument_size + argument_options[:post_start] = iseq.argument_size argument_options[:post_num] = 0 node.posts.each do |post| - current_iseq.local_table.plain(post.value.to_sym) - current_iseq.argument_size += 1 + iseq.local_table.plain(post.value.to_sym) + iseq.argument_size += 1 argument_options[:post_num] += 1 end end @@ -1140,10 +1097,10 @@ def visit_params(node) node.keywords.each_with_index do |(keyword, value), keyword_index| name = keyword.value.chomp(":").to_sym - index = current_iseq.local_table.size + index = iseq.local_table.size - current_iseq.local_table.plain(name) - current_iseq.argument_size += 1 + iseq.local_table.plain(name) + iseq.argument_size += 1 argument_options[:kwbits] += 1 if value.nil? @@ -1153,34 +1110,30 @@ def visit_params(node) argument_options[:keyword] << [name, compiled] else argument_options[:keyword] << [name] - checkkeywords << builder.checkkeyword(-1, keyword_index) - branchif = builder.branchif(-1) + checkkeywords << iseq.checkkeyword(-1, keyword_index) + branchif = iseq.branchif(-1) visit(value) - builder.setlocal(index, 0) - branchif[1] = builder.label + iseq.setlocal(index, 0) + branchif[1] = iseq.label end end name = node.keyword_rest ? 3 : 2 - current_iseq.argument_size += 1 - current_iseq.local_table.plain(name) + iseq.argument_size += 1 + iseq.local_table.plain(name) - lookup = current_iseq.local_table.find(name, 0) + lookup = iseq.local_table.find(name, 0) checkkeywords.each { |checkkeyword| checkkeyword[1] = lookup.index } end if node.keyword_rest.is_a?(ArgsForward) - current_iseq.local_table.plain(:*) - current_iseq.local_table.plain(:&) + iseq.local_table.plain(:*) + iseq.local_table.plain(:&) - current_iseq.argument_options[ - :rest_start - ] = current_iseq.argument_size - current_iseq.argument_options[ - :block_start - ] = current_iseq.argument_size + 1 + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_options[:block_start] = iseq.argument_size + 1 - current_iseq.argument_size += 2 + iseq.argument_size += 2 elsif node.keyword_rest visit(node.keyword_rest) end @@ -1215,82 +1168,77 @@ def visit_program(node) end end - with_instruction_sequence(:top, "", nil, node) do + with_instruction_sequence(:top, "", node) do visit_all(preexes) if statements.empty? - builder.putnil + iseq.putnil else *statements, last_statement = statements visit_all(statements) with_last_statement { visit(last_statement) } end - builder.leave + iseq.leave end end def visit_qsymbols(node) - builder.duparray(node.accept(RubyVisitor.new)) + iseq.duparray(node.accept(RubyVisitor.new)) end def visit_qwords(node) if frozen_string_literal - builder.duparray(node.accept(RubyVisitor.new)) + iseq.duparray(node.accept(RubyVisitor.new)) else visit_all(node.elements) - builder.newarray(node.elements.length) + iseq.newarray(node.elements.length) end end def visit_range(node) if (compiled = RubyVisitor.compile(node)) - builder.putobject(compiled) + iseq.putobject(compiled) else visit(node.left) visit(node.right) - builder.newrange(node.operator.value == ".." ? 0 : 1) + iseq.newrange(node.operator.value == ".." ? 0 : 1) end end def visit_rational(node) - builder.putobject(node.accept(RubyVisitor.new)) + iseq.putobject(node.accept(RubyVisitor.new)) end def visit_regexp_literal(node) if (compiled = RubyVisitor.compile(node)) - builder.putobject(compiled) + iseq.putobject(compiled) else flags = RubyVisitor.new.visit_regexp_literal_flags(node) length = visit_string_parts(node) - builder.toregexp(flags, length) + iseq.toregexp(flags, length) end end def visit_rest_param(node) - current_iseq.local_table.plain(node.name.value.to_sym) - current_iseq.argument_options[:rest_start] = current_iseq.argument_size - current_iseq.argument_size += 1 + iseq.local_table.plain(node.name.value.to_sym) + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_size += 1 end def visit_sclass(node) visit(node.target) - builder.putnil + iseq.putnil singleton_iseq = - with_instruction_sequence( - :class, - "singleton class", - current_iseq, - node - ) do - builder.event(:RUBY_EVENT_CLASS) + with_instruction_sequence(:class, "singleton class", node) do + iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) - builder.event(:RUBY_EVENT_END) - builder.leave + iseq.event(:RUBY_EVENT_END) + iseq.leave end - builder.defineclass( + iseq.defineclass( :singletonclass, singleton_iseq, YARV::VM_DEFINECLASS_TYPE_SINGLETON_CLASS @@ -1308,20 +1256,19 @@ def visit_statements(node) end end - statements.empty? ? builder.putnil : visit_all(statements) + statements.empty? ? iseq.putnil : visit_all(statements) end def visit_string_concat(node) value = node.left.parts.first.value + node.right.parts.first.value - content = TStringContent.new(value: value, location: node.location) - literal = + visit_string_literal( StringLiteral.new( - parts: [content], + parts: [TStringContent.new(value: value, location: node.location)], quote: node.left.quote, location: node.location ) - visit_string_literal(literal) + ) end def visit_string_embexpr(node) @@ -1333,14 +1280,14 @@ def visit_string_literal(node) visit(node.parts.first) else length = visit_string_parts(node) - builder.concatstrings(length) + iseq.concatstrings(length) end end def visit_super(node) - builder.putself + iseq.putself visit(node.arguments) - builder.invokesuper( + iseq.invokesuper( nil, argument_parts(node.arguments).length, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE | YARV::VM_CALL_SUPER, @@ -1349,37 +1296,37 @@ def visit_super(node) end def visit_symbol_literal(node) - builder.putobject(node.accept(RubyVisitor.new)) + iseq.putobject(node.accept(RubyVisitor.new)) end def visit_symbols(node) if (compiled = RubyVisitor.compile(node)) - builder.duparray(compiled) + iseq.duparray(compiled) else node.elements.each do |element| if element.parts.length == 1 && - element.parts.first.is_a?(TStringContent) - builder.putobject(element.parts.first.value.to_sym) + element.parts.first.is_a?(TStringContent) + iseq.putobject(element.parts.first.value.to_sym) else length = visit_string_parts(element) - builder.concatstrings(length) - builder.intern + iseq.concatstrings(length) + iseq.intern end end - builder.newarray(node.elements.length) + iseq.newarray(node.elements.length) end end def visit_top_const_ref(node) - builder.opt_getconstant_path(constant_names(node)) + iseq.opt_getconstant_path(constant_names(node)) end def visit_tstring_content(node) if frozen_string_literal - builder.putobject(node.accept(RubyVisitor.new)) + iseq.putobject(node.accept(RubyVisitor.new)) else - builder.putstring(node.accept(RubyVisitor.new)) + iseq.putstring(node.accept(RubyVisitor.new)) end end @@ -1406,34 +1353,34 @@ def visit_unary(node) def visit_undef(node) node.symbols.each_with_index do |symbol, index| - builder.pop if index != 0 - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) + iseq.pop if index != 0 + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) visit(symbol) - builder.send(:"core#undef_method", 2, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:"core#undef_method", 2, YARV::VM_CALL_ARGS_SIMPLE) end end def visit_unless(node) visit(node.predicate) - branchunless = builder.branchunless(-1) - node.consequent ? visit(node.consequent) : builder.putnil + branchunless = iseq.branchunless(-1) + node.consequent ? visit(node.consequent) : iseq.putnil if last_statement? - builder.leave - branchunless[1] = builder.label + iseq.leave + branchunless[1] = iseq.label visit(node.statements) else - builder.pop + iseq.pop if node.consequent - jump = builder.jump(-1) - branchunless[1] = builder.label + jump = iseq.jump(-1) + branchunless[1] = iseq.label visit(node.consequent) - jump[1] = builder.label + jump[1] = iseq.label else - branchunless[1] = builder.label + branchunless[1] = iseq.label end end end @@ -1441,34 +1388,34 @@ def visit_unless(node) def visit_until(node) jumps = [] - jumps << builder.jump(-1) - builder.putnil - builder.pop - jumps << builder.jump(-1) + jumps << iseq.jump(-1) + iseq.putnil + iseq.pop + jumps << iseq.jump(-1) - label = builder.label + label = iseq.label visit(node.statements) - builder.pop - jumps.each { |jump| jump[1] = builder.label } + iseq.pop + jumps.each { |jump| jump[1] = iseq.label } visit(node.predicate) - builder.branchunless(label) - builder.putnil if last_statement? + iseq.branchunless(label) + iseq.putnil if last_statement? end def visit_var_field(node) case node.value when CVar, IVar name = node.value.value.to_sym - current_iseq.inline_storage_for(name) + iseq.inline_storage_for(name) when Ident name = node.value.value.to_sym - if (local_variable = current_iseq.local_variable(name)) + if (local_variable = iseq.local_variable(name)) local_variable else - current_iseq.local_table.plain(name) - current_iseq.local_variable(name) + iseq.local_table.plain(name) + iseq.local_variable(name) end end end @@ -1476,43 +1423,44 @@ def visit_var_field(node) def visit_var_ref(node) case node.value when Const - builder.opt_getconstant_path(constant_names(node)) + iseq.opt_getconstant_path(constant_names(node)) when CVar name = node.value.value.to_sym - builder.getclassvariable(name) + iseq.getclassvariable(name) when GVar - builder.getglobal(node.value.value.to_sym) + iseq.getglobal(node.value.value.to_sym) when Ident - lookup = current_iseq.local_variable(node.value.value.to_sym) + lookup = iseq.local_variable(node.value.value.to_sym) case lookup.local when YARV::LocalTable::BlockLocal - builder.getblockparam(lookup.index, lookup.level) + iseq.getblockparam(lookup.index, lookup.level) when YARV::LocalTable::PlainLocal - builder.getlocal(lookup.index, lookup.level) + iseq.getlocal(lookup.index, lookup.level) end when IVar name = node.value.value.to_sym - builder.getinstancevariable(name) + iseq.getinstancevariable(name) when Kw case node.value.value when "false" - builder.putobject(false) + iseq.putobject(false) when "nil" - builder.putnil + iseq.putnil when "self" - builder.putself + iseq.putself when "true" - builder.putobject(true) + iseq.putobject(true) end end end def visit_vcall(node) - builder.putself + iseq.putself - flag = YARV::VM_CALL_FCALL | YARV::VM_CALL_VCALL | YARV::VM_CALL_ARGS_SIMPLE - builder.send(node.value.value.to_sym, 0, flag) + flag = + YARV::VM_CALL_FCALL | YARV::VM_CALL_VCALL | YARV::VM_CALL_ARGS_SIMPLE + iseq.send(node.value.value.to_sym, 0, flag) end def visit_when(node) @@ -1522,19 +1470,19 @@ def visit_when(node) def visit_while(node) jumps = [] - jumps << builder.jump(-1) - builder.putnil - builder.pop - jumps << builder.jump(-1) + jumps << iseq.jump(-1) + iseq.putnil + iseq.pop + jumps << iseq.jump(-1) - label = builder.label + label = iseq.label visit(node.statements) - builder.pop - jumps.each { |jump| jump[1] = builder.label } + iseq.pop + jumps.each { |jump| jump[1] = iseq.label } visit(node.predicate) - builder.branchif(label) - builder.putnil if last_statement? + iseq.branchif(label) + iseq.putnil if last_statement? end def visit_word(node) @@ -1542,38 +1490,39 @@ def visit_word(node) visit(node.parts.first) else length = visit_string_parts(node) - builder.concatstrings(length) + iseq.concatstrings(length) end end def visit_words(node) if frozen_string_literal && (compiled = RubyVisitor.compile(node)) - builder.duparray(compiled) + iseq.duparray(compiled) else visit_all(node.elements) - builder.newarray(node.elements.length) + iseq.newarray(node.elements.length) end end def visit_xstring_literal(node) - builder.putself + iseq.putself length = visit_string_parts(node) - builder.concatstrings(node.parts.length) if length > 1 - builder.send(:`, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) + iseq.concatstrings(node.parts.length) if length > 1 + iseq.send(:`, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) end def visit_yield(node) parts = argument_parts(node.arguments) visit_all(parts) - builder.invokeblock(nil, parts.length, YARV::VM_CALL_ARGS_SIMPLE) + iseq.invokeblock(nil, parts.length, YARV::VM_CALL_ARGS_SIMPLE) end def visit_zsuper(_node) - builder.putself - builder.invokesuper( + iseq.putself + iseq.invokesuper( nil, 0, - YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE | YARV::VM_CALL_SUPER | YARV::VM_CALL_ZSUPER, + YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE | YARV::VM_CALL_SUPER | + YARV::VM_CALL_ZSUPER, nil ) end @@ -1638,81 +1587,85 @@ def opassign_defined(node) visit(node.target.parent) name = node.target.constant.value.to_sym - builder.dup - builder.defined(YARV::DEFINED_CONST_FROM, name, true) + iseq.dup + iseq.defined(YARV::DEFINED_CONST_FROM, name, true) when TopConstField name = node.target.constant.value.to_sym - builder.putobject(Object) - builder.dup - builder.defined(YARV::DEFINED_CONST_FROM, name, true) + iseq.putobject(Object) + iseq.dup + iseq.defined(YARV::DEFINED_CONST_FROM, name, true) when VarField name = node.target.value.value.to_sym - builder.putnil + iseq.putnil case node.target.value when Const - builder.defined(YARV::DEFINED_CONST, name, true) + iseq.defined(YARV::DEFINED_CONST, name, true) when CVar - builder.defined(YARV::DEFINED_CVAR, name, true) + iseq.defined(YARV::DEFINED_CVAR, name, true) when GVar - builder.defined(YARV::DEFINED_GVAR, name, true) + iseq.defined(YARV::DEFINED_GVAR, name, true) end end - branchunless = builder.branchunless(-1) + branchunless = iseq.branchunless(-1) case node.target when ConstPathField, TopConstField - builder.dup - builder.putobject(true) - builder.getconstant(name) + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) when VarField case node.target.value when Const - builder.opt_getconstant_path(constant_names(node.target)) + iseq.opt_getconstant_path(constant_names(node.target)) when CVar - builder.getclassvariable(name) + iseq.getclassvariable(name) when GVar - builder.getglobal(name) + iseq.getglobal(name) end end - builder.dup - branchif = builder.branchif(-1) - builder.pop + iseq.dup + branchif = iseq.branchif(-1) + iseq.pop - branchunless[1] = builder.label + branchunless[1] = iseq.label visit(node.value) case node.target when ConstPathField, TopConstField - builder.dupn(2) - builder.swap - builder.setconstant(name) + iseq.dupn(2) + iseq.swap + iseq.setconstant(name) when VarField - builder.dup + iseq.dup case node.target.value when Const - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(name) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) + iseq.setconstant(name) when CVar - builder.setclassvariable(name) + iseq.setclassvariable(name) when GVar - builder.setglobal(name) + iseq.setglobal(name) end end - branchif[1] = builder.label + branchif[1] = iseq.label end # Whenever a value is interpolated into a string-like structure, these # three instructions are pushed. def push_interpolate - builder.dup - builder.objtostring(:to_s, 0, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) - builder.anytostring + iseq.dup + iseq.objtostring( + :to_s, + 0, + YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE + ) + iseq.anytostring end # There are a lot of nodes in the AST that act as contains of parts of @@ -1723,7 +1676,7 @@ def visit_string_parts(node) length = 0 unless node.parts.first.is_a?(TStringContent) - builder.putobject("") + iseq.putobject("") length += 1 end @@ -1736,7 +1689,7 @@ def visit_string_parts(node) visit(part) push_interpolate when TStringContent - builder.putobject(part.accept(RubyVisitor.new)) + iseq.putobject(part.accept(RubyVisitor.new)) end length += 1 @@ -1749,27 +1702,26 @@ def visit_string_parts(node) # on the compiler. When we descend into a node that has its own # instruction sequence, this method can be called to temporarily set the # new value of the instruction sequence, yield, and then set it back. - def with_instruction_sequence(type, name, parent_iseq, node) - previous_iseq = current_iseq - previous_builder = builder + def with_instruction_sequence(type, name, node) + parent_iseq = iseq begin - iseq = YARV::InstructionSequence.new(type, name, parent_iseq, node.location) - - @current_iseq = iseq - @builder = - YARV::Builder.new( - iseq, + iseq = + YARV::InstructionSequence.new( + type, + name, + parent_iseq, + node.location, frozen_string_literal: frozen_string_literal, operands_unification: operands_unification, specialized_instruction: specialized_instruction ) + @iseq = iseq yield iseq ensure - @current_iseq = previous_iseq - @builder = previous_builder + @iseq = parent_iseq end end @@ -1803,99 +1755,99 @@ def last_statement? def with_opassign(node) case node.target when ARefField - builder.putnil + iseq.putnil visit(node.target.collection) visit(node.target.index) - builder.dupn(2) - builder.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.dupn(2) + iseq.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) yield - builder.setn(3) - builder.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) - builder.pop + iseq.setn(3) + iseq.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) + iseq.pop when ConstPathField name = node.target.constant.value.to_sym visit(node.target.parent) - builder.dup - builder.putobject(true) - builder.getconstant(name) + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) yield if node.operator.value == "&&=" - builder.dupn(2) + iseq.dupn(2) else - builder.swap - builder.topn(1) + iseq.swap + iseq.topn(1) end - builder.swap - builder.setconstant(name) + iseq.swap + iseq.setconstant(name) when TopConstField name = node.target.constant.value.to_sym - builder.putobject(Object) - builder.dup - builder.putobject(true) - builder.getconstant(name) + iseq.putobject(Object) + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) yield if node.operator.value == "&&=" - builder.dupn(2) + iseq.dupn(2) else - builder.swap - builder.topn(1) + iseq.swap + iseq.topn(1) end - builder.swap - builder.setconstant(name) + iseq.swap + iseq.setconstant(name) when VarField case node.target.value when Const names = constant_names(node.target) - builder.opt_getconstant_path(names) + iseq.opt_getconstant_path(names) yield - builder.dup - builder.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - builder.setconstant(names.last) + iseq.dup + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) + iseq.setconstant(names.last) when CVar name = node.target.value.value.to_sym - builder.getclassvariable(name) + iseq.getclassvariable(name) yield - builder.dup - builder.setclassvariable(name) + iseq.dup + iseq.setclassvariable(name) when GVar name = node.target.value.value.to_sym - builder.getglobal(name) + iseq.getglobal(name) yield - builder.dup - builder.setglobal(name) + iseq.dup + iseq.setglobal(name) when Ident local_variable = visit(node.target) - builder.getlocal(local_variable.index, local_variable.level) + iseq.getlocal(local_variable.index, local_variable.level) yield - builder.dup - builder.setlocal(local_variable.index, local_variable.level) + iseq.dup + iseq.setlocal(local_variable.index, local_variable.level) when IVar name = node.target.value.value.to_sym - builder.getinstancevariable(name) + iseq.getinstancevariable(name) yield - builder.dup - builder.setinstancevariable(name) + iseq.dup + iseq.setinstancevariable(name) end end end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 7290d87f..b6c3468c 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -147,7 +147,20 @@ class InstructionSequence # maximum size of the stack for this instruction sequence. attr_reader :stack - def initialize(type, name, parent_iseq, location) + # These are various compilation options provided. + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + def initialize( + type, + name, + parent_iseq, + location, + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) @type = type @name = name @parent_iseq = parent_iseq @@ -161,8 +174,16 @@ def initialize(type, name, parent_iseq, location) @insns = [] @storage_index = 0 @stack = Stack.new + + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction end + ########################################################################## + # Query methods + ########################################################################## + def local_variable(name, level = 0) if (lookup = local_table.find(name, level)) lookup @@ -171,11 +192,6 @@ def local_variable(name, level = 0) end end - def push(insn) - insns << insn - insn - end - def inline_storage storage = storage_index @storage_index += 1 @@ -183,9 +199,7 @@ def inline_storage end def inline_storage_for(name) - unless inline_storages.key?(name) - inline_storages[name] = inline_storage - end + inline_storages[name] = inline_storage unless inline_storages.key?(name) inline_storages[name] end @@ -239,251 +253,149 @@ def to_a ] end - private - - def serialize(insn) - case insn[0] - when :checkkeyword, :getblockparam, :getblockparamproxy, - :getlocal_WC_0, :getlocal_WC_1, :getlocal, :setlocal_WC_0, - :setlocal_WC_1, :setlocal - iseq = self - - case insn[0] - when :getlocal_WC_1, :setlocal_WC_1 - iseq = iseq.parent_iseq - when :getblockparam, :getblockparamproxy, :getlocal, :setlocal - insn[2].times { iseq = iseq.parent_iseq } - end - - # Here we need to map the local variable index to the offset - # from the top of the stack where it will be stored. - [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] - when :defineclass - [insn[0], insn[1], insn[2].to_a, insn[3]] - when :definemethod, :definesmethod - [insn[0], insn[1], insn[2].to_a] - when :send - # For any instructions that push instruction sequences onto the - # stack, we need to call #to_a on them as well. - [insn[0], insn[1], (insn[2].to_a if insn[2])] - when :once - [insn[0], insn[1].to_a, insn[2]] - else - insn - end - end - end - - # This class is responsible for taking a compiled instruction sequence and - # walking through it to generate equivalent Ruby code. - class Disassembler - attr_reader :iseq - - def initialize(iseq) - @iseq = iseq - end - - def to_ruby - stack = [] - - iseq.insns.each do |insn| - case insn[0] - when :getlocal_WC_0 - value = iseq.local_table.locals[insn[1]].name.to_s - stack << VarRef.new(value: Ident.new(value: value, location: Location.default), location: Location.default) - when :leave - stack << ReturnNode.new(arguments: Args.new(parts: [stack.pop], location: Location.default), location: Location.default) - when :opt_mult - left, right = stack.pop(2) - stack << Binary.new(left: left, operator: :*, right: right, location: Location.default) - when :opt_plus - left, right = stack.pop(2) - stack << Binary.new(left: left, operator: :+, right: right, location: Location.default) - when :putobject - case insn[1] - when Float - stack << FloatLiteral.new(value: insn[1].inspect, location: Location.default) - when Integer - stack << Int.new(value: insn[1].inspect, location: Location.default) - when Rational - stack << RationalLiteral.new(value: insn[1].inspect, location: Location.default) - else - raise "Unknown object type: #{insn[1].class.name}" - end - when :putobject_INT2FIX_1_ - stack << Int.new(value: "1", location: Location.default) - when :setlocal_WC_0 - target = VarField.new(value: Ident.new(value: iseq.local_table.locals[insn[1]].name.to_s, location: Location.default), location: Location.default) - stack << Assign.new(target: target, value: stack.pop, location: Location.default) - else - raise "Unknown instruction #{insn[0]}" - end - end - - Statements.new(nil, body: stack, location: Location.default) - end - end - - # This class serves as a layer of indirection between the instruction - # sequence and the compiler. It allows us to provide different behavior - # for certain instructions depending on the Ruby version. For example, - # class variable reads and writes gained an inline cache in Ruby 3.0. So - # we place the logic for checking the Ruby version in this class. - class Builder - attr_reader :iseq, :stack - attr_reader :frozen_string_literal, - :operands_unification, - :specialized_instruction + ########################################################################## + # Instruction push methods + ########################################################################## - def initialize( - iseq, - frozen_string_literal: false, - operands_unification: true, - specialized_instruction: true - ) - @iseq = iseq - @stack = iseq.stack - - @frozen_string_literal = frozen_string_literal - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction + def push(insn) + insns << insn + insn end # This creates a new label at the current length of the instruction # sequence. It is used as the operand for jump instructions. def label - name = :"label_#{iseq.length}" - iseq.insns.last == name ? name : event(name) + name = :"label_#{length}" + insns.last == name ? name : event(name) end def event(name) - iseq.push(name) - name + push(name) end def adjuststack(number) stack.change_by(-number) - iseq.push([:adjuststack, number]) + push([:adjuststack, number]) end def anytostring stack.change_by(-2 + 1) - iseq.push([:anytostring]) + push([:anytostring]) end def branchif(index) stack.change_by(-1) - iseq.push([:branchif, index]) + push([:branchif, index]) end def branchnil(index) stack.change_by(-1) - iseq.push([:branchnil, index]) + push([:branchnil, index]) end def branchunless(index) stack.change_by(-1) - iseq.push([:branchunless, index]) + push([:branchunless, index]) end def checkkeyword(index, keyword_index) stack.change_by(+1) - iseq.push([:checkkeyword, index, keyword_index]) + push([:checkkeyword, index, keyword_index]) end def concatarray stack.change_by(-2 + 1) - iseq.push([:concatarray]) + push([:concatarray]) end def concatstrings(number) stack.change_by(-number + 1) - iseq.push([:concatstrings, number]) + push([:concatstrings, number]) end def defined(type, name, message) stack.change_by(-1 + 1) - iseq.push([:defined, type, name, message]) + push([:defined, type, name, message]) end def defineclass(name, class_iseq, flags) stack.change_by(-2 + 1) - iseq.push([:defineclass, name, class_iseq, flags]) + push([:defineclass, name, class_iseq, flags]) end def definemethod(name, method_iseq) stack.change_by(0) - iseq.push([:definemethod, name, method_iseq]) + push([:definemethod, name, method_iseq]) end def definesmethod(name, method_iseq) stack.change_by(-1) - iseq.push([:definesmethod, name, method_iseq]) + push([:definesmethod, name, method_iseq]) end def dup stack.change_by(-1 + 2) - iseq.push([:dup]) + push([:dup]) end def duparray(object) stack.change_by(+1) - iseq.push([:duparray, object]) + push([:duparray, object]) end def duphash(object) stack.change_by(+1) - iseq.push([:duphash, object]) + push([:duphash, object]) end def dupn(number) stack.change_by(+number) - iseq.push([:dupn, number]) + push([:dupn, number]) end def expandarray(length, flag) stack.change_by(-1 + length) - iseq.push([:expandarray, length, flag]) + push([:expandarray, length, flag]) end def getblockparam(index, level) stack.change_by(+1) - iseq.push([:getblockparam, index, level]) + push([:getblockparam, index, level]) end def getblockparamproxy(index, level) stack.change_by(+1) - iseq.push([:getblockparamproxy, index, level]) + push([:getblockparamproxy, index, level]) end def getclassvariable(name) stack.change_by(+1) if RUBY_VERSION >= "3.0" - iseq.push([:getclassvariable, name, iseq.inline_storage_for(name)]) + push([:getclassvariable, name, inline_storage_for(name)]) else - iseq.push([:getclassvariable, name]) + push([:getclassvariable, name]) end end def getconstant(name) stack.change_by(-2 + 1) - iseq.push([:getconstant, name]) + push([:getconstant, name]) end def getglobal(name) stack.change_by(+1) - iseq.push([:getglobal, name]) + push([:getglobal, name]) end def getinstancevariable(name) stack.change_by(+1) if RUBY_VERSION >= "3.2" - iseq.push([:getinstancevariable, name, iseq.inline_storage]) + push([:getinstancevariable, name, inline_storage]) else - inline_storage = iseq.inline_storage_for(name) - iseq.push([:getinstancevariable, name, inline_storage]) + inline_storage = inline_storage_for(name) + push([:getinstancevariable, name, inline_storage]) end end @@ -497,86 +409,86 @@ def getlocal(index, level) # scope, respectively, and requires fewer operands. case level when 0 - iseq.push([:getlocal_WC_0, index]) + push([:getlocal_WC_0, index]) when 1 - iseq.push([:getlocal_WC_1, index]) + push([:getlocal_WC_1, index]) else - iseq.push([:getlocal, index, level]) + push([:getlocal, index, level]) end else - iseq.push([:getlocal, index, level]) + push([:getlocal, index, level]) end end def getspecial(key, type) stack.change_by(-0 + 1) - iseq.push([:getspecial, key, type]) + push([:getspecial, key, type]) end def intern stack.change_by(-1 + 1) - iseq.push([:intern]) + push([:intern]) end def invokeblock(method_id, argc, flag) stack.change_by(-argc + 1) - iseq.push([:invokeblock, call_data(method_id, argc, flag)]) + push([:invokeblock, call_data(method_id, argc, flag)]) end def invokesuper(method_id, argc, flag, block_iseq) stack.change_by(-(argc + 1) + 1) cdata = call_data(method_id, argc, flag) - iseq.push([:invokesuper, cdata, block_iseq]) + push([:invokesuper, cdata, block_iseq]) end def jump(index) stack.change_by(0) - iseq.push([:jump, index]) + push([:jump, index]) end def leave stack.change_by(-1) - iseq.push([:leave]) + push([:leave]) end def newarray(length) stack.change_by(-length + 1) - iseq.push([:newarray, length]) + push([:newarray, length]) end def newhash(length) stack.change_by(-length + 1) - iseq.push([:newhash, length]) + push([:newhash, length]) end def newrange(flag) stack.change_by(-2 + 1) - iseq.push([:newrange, flag]) + push([:newrange, flag]) end def nop stack.change_by(0) - iseq.push([:nop]) + push([:nop]) end def objtostring(method_id, argc, flag) stack.change_by(-1 + 1) - iseq.push([:objtostring, call_data(method_id, argc, flag)]) + push([:objtostring, call_data(method_id, argc, flag)]) end def once(postexe_iseq, inline_storage) stack.change_by(+1) - iseq.push([:once, postexe_iseq, inline_storage]) + push([:once, postexe_iseq, inline_storage]) end def opt_getconstant_path(names) if RUBY_VERSION >= "3.2" stack.change_by(+1) - iseq.push([:opt_getconstant_path, names]) + push([:opt_getconstant_path, names]) else - inline_storage = iseq.inline_storage - getinlinecache = opt_getinlinecache(-1, inline_storage) + const_inline_storage = inline_storage + getinlinecache = opt_getinlinecache(-1, const_inline_storage) if names[0] == :"" names.shift @@ -589,20 +501,20 @@ def opt_getconstant_path(names) getconstant(name) end - opt_setinlinecache(inline_storage) + opt_setinlinecache(const_inline_storage) getinlinecache[1] = label end end def opt_getinlinecache(offset, inline_storage) stack.change_by(+1) - iseq.push([:opt_getinlinecache, offset, inline_storage]) + push([:opt_getinlinecache, offset, inline_storage]) end def opt_newarray_max(length) if specialized_instruction stack.change_by(-length + 1) - iseq.push([:opt_newarray_max, length]) + push([:opt_newarray_max, length]) else newarray(length) send(:max, 0, VM_CALL_ARGS_SIMPLE) @@ -612,7 +524,7 @@ def opt_newarray_max(length) def opt_newarray_min(length) if specialized_instruction stack.change_by(-length + 1) - iseq.push([:opt_newarray_min, length]) + push([:opt_newarray_min, length]) else newarray(length) send(:min, 0, VM_CALL_ARGS_SIMPLE) @@ -621,18 +533,14 @@ def opt_newarray_min(length) def opt_setinlinecache(inline_storage) stack.change_by(-1 + 1) - iseq.push([:opt_setinlinecache, inline_storage]) + push([:opt_setinlinecache, inline_storage]) end def opt_str_freeze(value) if specialized_instruction stack.change_by(+1) - iseq.push( - [ - :opt_str_freeze, - value, - call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE) - ] + push( + [:opt_str_freeze, value, call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE)] ) else putstring(value) @@ -643,9 +551,7 @@ def opt_str_freeze(value) def opt_str_uminus(value) if specialized_instruction stack.change_by(+1) - iseq.push( - [:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)] - ) + push([:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)]) else putstring(value) send(:-@, 0, VM_CALL_ARGS_SIMPLE) @@ -654,12 +560,12 @@ def opt_str_uminus(value) def pop stack.change_by(-1) - iseq.push([:pop]) + push([:pop]) end def putnil stack.change_by(+1) - iseq.push([:putnil]) + push([:putnil]) end def putobject(object) @@ -671,30 +577,30 @@ def putobject(object) # that will push the object onto the stack and requires fewer # operands. if object.eql?(0) - iseq.push([:putobject_INT2FIX_0_]) + push([:putobject_INT2FIX_0_]) elsif object.eql?(1) - iseq.push([:putobject_INT2FIX_1_]) + push([:putobject_INT2FIX_1_]) else - iseq.push([:putobject, object]) + push([:putobject, object]) end else - iseq.push([:putobject, object]) + push([:putobject, object]) end end def putself stack.change_by(+1) - iseq.push([:putself]) + push([:putself]) end def putspecialobject(object) stack.change_by(+1) - iseq.push([:putspecialobject, object]) + push([:putspecialobject, object]) end def putstring(object) stack.change_by(+1) - iseq.push([:putstring, object]) + push([:putstring, object]) end def send(method_id, argc, flag, block_iseq = nil) @@ -710,39 +616,39 @@ def send(method_id, argc, flag, block_iseq = nil) # stree-ignore if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 case [method_id, argc] - when [:length, 0] then iseq.push([:opt_length, cdata]) - when [:size, 0] then iseq.push([:opt_size, cdata]) - when [:empty?, 0] then iseq.push([:opt_empty_p, cdata]) - when [:nil?, 0] then iseq.push([:opt_nil_p, cdata]) - when [:succ, 0] then iseq.push([:opt_succ, cdata]) - when [:!, 0] then iseq.push([:opt_not, cdata]) - when [:+, 1] then iseq.push([:opt_plus, cdata]) - when [:-, 1] then iseq.push([:opt_minus, cdata]) - when [:*, 1] then iseq.push([:opt_mult, cdata]) - when [:/, 1] then iseq.push([:opt_div, cdata]) - when [:%, 1] then iseq.push([:opt_mod, cdata]) - when [:==, 1] then iseq.push([:opt_eq, cdata]) - when [:=~, 1] then iseq.push([:opt_regexpmatch2, cdata]) - when [:<, 1] then iseq.push([:opt_lt, cdata]) - when [:<=, 1] then iseq.push([:opt_le, cdata]) - when [:>, 1] then iseq.push([:opt_gt, cdata]) - when [:>=, 1] then iseq.push([:opt_ge, cdata]) - when [:<<, 1] then iseq.push([:opt_ltlt, cdata]) - when [:[], 1] then iseq.push([:opt_aref, cdata]) - when [:&, 1] then iseq.push([:opt_and, cdata]) - when [:|, 1] then iseq.push([:opt_or, cdata]) - when [:[]=, 2] then iseq.push([:opt_aset, cdata]) + when [:length, 0] then push([:opt_length, cdata]) + when [:size, 0] then push([:opt_size, cdata]) + when [:empty?, 0] then push([:opt_empty_p, cdata]) + when [:nil?, 0] then push([:opt_nil_p, cdata]) + when [:succ, 0] then push([:opt_succ, cdata]) + when [:!, 0] then push([:opt_not, cdata]) + when [:+, 1] then push([:opt_plus, cdata]) + when [:-, 1] then push([:opt_minus, cdata]) + when [:*, 1] then push([:opt_mult, cdata]) + when [:/, 1] then push([:opt_div, cdata]) + when [:%, 1] then push([:opt_mod, cdata]) + when [:==, 1] then push([:opt_eq, cdata]) + when [:=~, 1] then push([:opt_regexpmatch2, cdata]) + when [:<, 1] then push([:opt_lt, cdata]) + when [:<=, 1] then push([:opt_le, cdata]) + when [:>, 1] then push([:opt_gt, cdata]) + when [:>=, 1] then push([:opt_ge, cdata]) + when [:<<, 1] then push([:opt_ltlt, cdata]) + when [:[], 1] then push([:opt_aref, cdata]) + when [:&, 1] then push([:opt_and, cdata]) + when [:|, 1] then push([:opt_or, cdata]) + when [:[]=, 2] then push([:opt_aset, cdata]) when [:!=, 1] eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) - iseq.push([:opt_neq, eql_data, cdata]) + push([:opt_neq, eql_data, cdata]) else - iseq.push([:opt_send_without_block, cdata]) + push([:opt_send_without_block, cdata]) end else - iseq.push([:send, cdata, block_iseq]) + push([:send, cdata, block_iseq]) end else - iseq.push([:send, cdata, block_iseq]) + push([:send, cdata, block_iseq]) end end @@ -750,30 +656,29 @@ def setclassvariable(name) stack.change_by(-1) if RUBY_VERSION >= "3.0" - iseq.push([:setclassvariable, name, iseq.inline_storage_for(name)]) + push([:setclassvariable, name, inline_storage_for(name)]) else - iseq.push([:setclassvariable, name]) + push([:setclassvariable, name]) end end def setconstant(name) stack.change_by(-2) - iseq.push([:setconstant, name]) + push([:setconstant, name]) end def setglobal(name) stack.change_by(-1) - iseq.push([:setglobal, name]) + push([:setglobal, name]) end def setinstancevariable(name) stack.change_by(-1) if RUBY_VERSION >= "3.2" - iseq.push([:setinstancevariable, name, iseq.inline_storage]) + push([:setinstancevariable, name, inline_storage]) else - inline_storage = iseq.inline_storage_for(name) - iseq.push([:setinstancevariable, name, inline_storage]) + push([:setinstancevariable, name, inline_storage_for(name)]) end end @@ -787,40 +692,40 @@ def setlocal(index, level) # scope, respectively, and requires fewer operands. case level when 0 - iseq.push([:setlocal_WC_0, index]) + push([:setlocal_WC_0, index]) when 1 - iseq.push([:setlocal_WC_1, index]) + push([:setlocal_WC_1, index]) else - iseq.push([:setlocal, index, level]) + push([:setlocal, index, level]) end else - iseq.push([:setlocal, index, level]) + push([:setlocal, index, level]) end end def setn(number) stack.change_by(-1 + 1) - iseq.push([:setn, number]) + push([:setn, number]) end def splatarray(flag) stack.change_by(-1 + 1) - iseq.push([:splatarray, flag]) + push([:splatarray, flag]) end def swap stack.change_by(-2 + 2) - iseq.push([:swap]) + push([:swap]) end def topn(number) stack.change_by(+1) - iseq.push([:topn, number]) + push([:topn, number]) end def toregexp(options, length) stack.change_by(-length + 1) - iseq.push([:toregexp, options, length]) + push([:toregexp, options, length]) end private @@ -830,6 +735,126 @@ def toregexp(options, length) def call_data(method_id, argc, flag) { mid: method_id, flag: flag, orig_argc: argc } end + + def serialize(insn) + case insn[0] + when :checkkeyword, :getblockparam, :getblockparamproxy, :getlocal_WC_0, + :getlocal_WC_1, :getlocal, :setlocal_WC_0, :setlocal_WC_1, + :setlocal + iseq = self + + case insn[0] + when :getlocal_WC_1, :setlocal_WC_1 + iseq = iseq.parent_iseq + when :getblockparam, :getblockparamproxy, :getlocal, :setlocal + insn[2].times { iseq = iseq.parent_iseq } + end + + # Here we need to map the local variable index to the offset + # from the top of the stack where it will be stored. + [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] + when :defineclass + [insn[0], insn[1], insn[2].to_a, insn[3]] + when :definemethod, :definesmethod + [insn[0], insn[1], insn[2].to_a] + when :send + # For any instructions that push instruction sequences onto the + # stack, we need to call #to_a on them as well. + [insn[0], insn[1], (insn[2].to_a if insn[2])] + when :once + [insn[0], insn[1].to_a, insn[2]] + else + insn + end + end + end + + # This class is responsible for taking a compiled instruction sequence and + # walking through it to generate equivalent Ruby code. + class Disassembler + attr_reader :iseq + + def initialize(iseq) + @iseq = iseq + end + + def to_ruby + stack = [] + + iseq.insns.each do |insn| + case insn[0] + when :getlocal_WC_0 + value = iseq.local_table.locals[insn[1]].name.to_s + stack << VarRef.new( + value: Ident.new(value: value, location: Location.default), + location: Location.default + ) + when :leave + stack << ReturnNode.new( + arguments: + Args.new(parts: [stack.pop], location: Location.default), + location: Location.default + ) + when :opt_mult + left, right = stack.pop(2) + stack << Binary.new( + left: left, + operator: :*, + right: right, + location: Location.default + ) + when :opt_plus + left, right = stack.pop(2) + stack << Binary.new( + left: left, + operator: :+, + right: right, + location: Location.default + ) + when :putobject + case insn[1] + when Float + stack << FloatLiteral.new( + value: insn[1].inspect, + location: Location.default + ) + when Integer + stack << Int.new( + value: insn[1].inspect, + location: Location.default + ) + when Rational + stack << RationalLiteral.new( + value: insn[1].inspect, + location: Location.default + ) + else + raise "Unknown object type: #{insn[1].class.name}" + end + when :putobject_INT2FIX_1_ + stack << Int.new(value: "1", location: Location.default) + when :setlocal_WC_0 + target = + VarField.new( + value: + Ident.new( + value: iseq.local_table.locals[insn[1]].name.to_s, + location: Location.default + ), + location: Location.default + ) + stack << Assign.new( + target: target, + value: stack.pop, + location: Location.default + ) + else + raise "Unknown instruction #{insn[0]}" + end + end + + Statements.new(nil, body: stack, location: Location.default) + end end # These constants correspond to the putspecialobject instruction. They are From 154e75f9fe4f831237206fff080b03ad22d59d32 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 10:02:31 -0500 Subject: [PATCH 049/104] Put child iseq methods on iseq --- lib/syntax_tree/compiler.rb | 57 ++++++++++++++++++------------------- lib/syntax_tree/yarv.rb | 46 ++++++++++++++++++++++++------ test/compiler_test.rb | 3 ++ 3 files changed, 69 insertions(+), 37 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 424a9cf5..926661cc 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -225,14 +225,17 @@ def visit_CHAR(node) end def visit_END(node) - name = "block in #{iseq.name}" once_iseq = - with_instruction_sequence(:block, name, node) do + with_child_iseq(iseq.block_child_iseq(node.location)) do postexe_iseq = - with_instruction_sequence(:block, name, node) do + with_child_iseq(iseq.block_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_B_CALL) + *statements, last_statement = node.statements.body visit_all(statements) with_last_statement { visit(last_statement) } + + iseq.event(:RUBY_EVENT_B_RETURN) iseq.leave end @@ -422,7 +425,7 @@ def visit_binary(node) end def visit_block(node) - with_instruction_sequence(:block, "block in #{iseq.name}", node) do + with_child_iseq(iseq.block_child_iseq(node.location)) do iseq.event(:RUBY_EVENT_B_CALL) visit(node.block_var) visit(node.bodystmt) @@ -606,7 +609,7 @@ def visit_case(node) def visit_class(node) name = node.constant.constant.value.to_sym class_iseq = - with_instruction_sequence(:class, "", node) do + with_child_iseq(iseq.class_child_iseq(name, node.location)) do iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) iseq.event(:RUBY_EVENT_END) @@ -673,7 +676,7 @@ def visit_const_path_ref(node) def visit_def(node) method_iseq = - with_instruction_sequence(:method, node.name.value, node) do + with_child_iseq(iseq.method_child_iseq(node.name.value, node.location)) do visit(node.params) if node.params iseq.event(:RUBY_EVENT_CALL) visit(node.bodystmt) @@ -788,11 +791,7 @@ def visit_for(node) iseq.local_table.plain(name) block_iseq = - with_instruction_sequence( - :block, - "block in #{iseq.name}", - node.statements - ) do + with_child_iseq(iseq.block_child_iseq(node.statements.location)) do iseq.argument_options[:lead_num] ||= 0 iseq.argument_options[:lead_num] += 1 iseq.argument_options[:ambiguous_param0] = true @@ -896,7 +895,7 @@ def visit_label(node) def visit_lambda(node) lambda_iseq = - with_instruction_sequence(:block, "block in #{iseq.name}", node) do + with_child_iseq(iseq.block_child_iseq(node.location)) do iseq.event(:RUBY_EVENT_B_CALL) visit(node.params) visit(node.statements) @@ -947,7 +946,7 @@ def visit_mlhs(node) def visit_module(node) name = node.constant.constant.value.to_sym module_iseq = - with_instruction_sequence(:class, "", node) do + with_child_iseq(iseq.module_child_iseq(name, node.location)) do iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) iseq.event(:RUBY_EVENT_END) @@ -1168,7 +1167,18 @@ def visit_program(node) end end - with_instruction_sequence(:top, "", node) do + top_iseq = + YARV::InstructionSequence.new( + :top, + "", + nil, + node.location, + frozen_string_literal: frozen_string_literal, + operands_unification: operands_unification, + specialized_instruction: specialized_instruction + ) + + with_child_iseq(top_iseq) do visit_all(preexes) if statements.empty? @@ -1231,7 +1241,7 @@ def visit_sclass(node) iseq.putnil singleton_iseq = - with_instruction_sequence(:class, "singleton class", node) do + with_child_iseq(iseq.singleton_class_child_iseq(node.location)) do iseq.event(:RUBY_EVENT_CLASS) visit(node.bodystmt) iseq.event(:RUBY_EVENT_END) @@ -1702,24 +1712,13 @@ def visit_string_parts(node) # on the compiler. When we descend into a node that has its own # instruction sequence, this method can be called to temporarily set the # new value of the instruction sequence, yield, and then set it back. - def with_instruction_sequence(type, name, node) + def with_child_iseq(child_iseq) parent_iseq = iseq begin - iseq = - YARV::InstructionSequence.new( - type, - name, - parent_iseq, - node.location, - frozen_string_literal: frozen_string_literal, - operands_unification: operands_unification, - specialized_instruction: specialized_instruction - ) - - @iseq = iseq + @iseq = child_iseq yield - iseq + child_iseq ensure @iseq = parent_iseq end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index b6c3468c..12d1dba2 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -210,14 +210,6 @@ def length end end - def each_child - insns.each do |insn| - insn[1..].each do |operand| - yield operand if operand.is_a?(InstructionSequence) - end - end - end - def eval compiled = to_a @@ -253,6 +245,44 @@ def to_a ] end + ########################################################################## + # Child instruction sequence methods + ########################################################################## + + def child_iseq(type, name, location) + InstructionSequence.new( + type, + name, + self, + location, + frozen_string_literal: frozen_string_literal, + operands_unification: operands_unification, + specialized_instruction: specialized_instruction + ) + end + + def block_child_iseq(location) + current = self + current = current.parent_iseq while current.type == :block + child_iseq(:block, "block in #{current.name}", location) + end + + def class_child_iseq(name, location) + child_iseq(:class, "", location) + end + + def method_child_iseq(name, location) + child_iseq(:method, name, location) + end + + def module_child_iseq(name, location) + child_iseq(:class, "", location) + end + + def singleton_class_child_iseq(location) + child_iseq(:class, "singleton class", location) + end + ########################################################################## # Instruction push methods ########################################################################## diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 3b8c0ea2..27bf993d 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -6,6 +6,9 @@ module SyntaxTree class CompilerTest < Minitest::Test CASES = [ + # Hooks + "BEGIN { a = 1 }", + "a = 1; END { a = 1 }; a", # Various literals placed on the stack "true", "false", From df9f6220c009126f0a5b02c4a618ec54548d6e43 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 10:19:30 -0500 Subject: [PATCH 050/104] Test out disassembler --- lib/syntax_tree/yarv.rb | 171 ++++++++++++++++++++++++++++------------ test/yarv_test.rb | 46 +++++++++++ 2 files changed, 166 insertions(+), 51 deletions(-) create mode 100644 test/yarv_test.rb diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 12d1dba2..93f2ac06 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -802,6 +802,65 @@ def serialize(insn) # This class is responsible for taking a compiled instruction sequence and # walking through it to generate equivalent Ruby code. class Disassembler + module DSL + def Args(parts) + Args.new(parts: parts, location: Location.default) + end + + def ArgParen(arguments) + ArgParen.new(arguments: arguments, location: Location.default) + end + + def Assign(target, value) + Assign.new(target: target, value: value, location: Location.default) + end + + def Binary(left, operator, right) + Binary.new(left: left, operator: operator, right: right, location: Location.default) + end + + def CallNode(receiver, operator, message, arguments) + CallNode.new(receiver: receiver, operator: operator, message: message, arguments: arguments, location: Location.default) + end + + def FloatLiteral(value) + FloatLiteral.new(value: value, location: Location.default) + end + + def Ident(value) + Ident.new(value: value, location: Location.default) + end + + def Int(value) + Int.new(value: value, location: Location.default) + end + + def Period(value) + Period.new(value: value, location: Location.default) + end + + def Program(statements) + Program.new(statements: statements, location: Location.default) + end + + def ReturnNode(arguments) + ReturnNode.new(arguments: arguments, location: Location.default) + end + + def Statements(body) + Statements.new(nil, body: body, location: Location.default) + end + + def VarField(value) + VarField.new(value: value, location: Location.default) + end + + def VarRef(value) + VarRef.new(value: value, location: Location.default) + end + end + + include DSL attr_reader :iseq def initialize(iseq) @@ -812,78 +871,88 @@ def to_ruby stack = [] iseq.insns.each do |insn| + # skip line numbers and events + next unless insn.is_a?(Array) + case insn[0] when :getlocal_WC_0 - value = iseq.local_table.locals[insn[1]].name.to_s - stack << VarRef.new( - value: Ident.new(value: value, location: Location.default), - location: Location.default - ) + stack << VarRef(Ident(local_name(insn[1], 0))) when :leave - stack << ReturnNode.new( - arguments: - Args.new(parts: [stack.pop], location: Location.default), - location: Location.default - ) + stack << ReturnNode(Args([stack.pop])) + when :opt_and + left, right = stack.pop(2) + stack << Binary(left, :&, right) + when :opt_div + left, right = stack.pop(2) + stack << Binary(left, :/, right) + when :opt_eq + left, right = stack.pop(2) + stack << Binary(left, :==, right) + when :opt_ge + left, right = stack.pop(2) + stack << Binary(left, :>=, right) + when :opt_gt + left, right = stack.pop(2) + stack << Binary(left, :>, right) + when :opt_le + left, right = stack.pop(2) + stack << Binary(left, :<=, right) + when :opt_lt + left, right = stack.pop(2) + stack << Binary(left, :<, right) + when :opt_ltlt + left, right = stack.pop(2) + stack << Binary(left, :<<, right) + when :opt_minus + left, right = stack.pop(2) + stack << Binary(left, :-, right) + when :opt_mod + left, right = stack.pop(2) + stack << Binary(left, :%, right) when :opt_mult left, right = stack.pop(2) - stack << Binary.new( - left: left, - operator: :*, - right: right, - location: Location.default - ) + stack << Binary(left, :*, right) + when :opt_neq + left, right = stack.pop(2) + stack << Binary(left, :"!=", right) + when :opt_or + left, right = stack.pop(2) + stack << Binary(left, :|, right) when :opt_plus left, right = stack.pop(2) - stack << Binary.new( - left: left, - operator: :+, - right: right, - location: Location.default - ) + stack << Binary(left, :+, right) + when :opt_send_without_block + receiver, *arguments = stack.pop(insn[1][:orig_argc] + 1) + stack << CallNode(receiver, Period("."), Ident(insn[1][:mid]), ArgParen(Args(arguments))) when :putobject case insn[1] when Float - stack << FloatLiteral.new( - value: insn[1].inspect, - location: Location.default - ) + stack << FloatLiteral(insn[1].inspect) when Integer - stack << Int.new( - value: insn[1].inspect, - location: Location.default - ) - when Rational - stack << RationalLiteral.new( - value: insn[1].inspect, - location: Location.default - ) + stack << Int(insn[1].inspect) else raise "Unknown object type: #{insn[1].class.name}" end + when :putobject_INT2FIX_0_ + stack << Int("0") when :putobject_INT2FIX_1_ - stack << Int.new(value: "1", location: Location.default) + stack << Int("1") when :setlocal_WC_0 - target = - VarField.new( - value: - Ident.new( - value: iseq.local_table.locals[insn[1]].name.to_s, - location: Location.default - ), - location: Location.default - ) - stack << Assign.new( - target: target, - value: stack.pop, - location: Location.default - ) + stack << Assign(VarField(Ident(local_name(insn[1], 0))), stack.pop) else raise "Unknown instruction #{insn[0]}" end end - Statements.new(nil, body: stack, location: Location.default) + Program(Statements(stack)) + end + + private + + def local_name(index, level) + current = iseq + level.times { current = current.parent_iseq } + current.local_table.locals[index].name.to_s end end diff --git a/test/yarv_test.rb b/test/yarv_test.rb new file mode 100644 index 00000000..57371ba3 --- /dev/null +++ b/test/yarv_test.rb @@ -0,0 +1,46 @@ +# frozen_string_literal: true + +return if !defined?(RubyVM::InstructionSequence) || RUBY_VERSION < "3.1" +require_relative "test_helper" + +module SyntaxTree + class YARVTest < Minitest::Test + CASES = { + "0" => "return 0\n", + "1" => "return 1\n", + "2" => "return 2\n", + "1.0" => "return 1.0\n", + "1 + 2" => "return 1 + 2\n", + "1 - 2" => "return 1 - 2\n", + "1 * 2" => "return 1 * 2\n", + "1 / 2" => "return 1 / 2\n", + "1 % 2" => "return 1 % 2\n", + "1 < 2" => "return 1 < 2\n", + "1 <= 2" => "return 1 <= 2\n", + "1 > 2" => "return 1 > 2\n", + "1 >= 2" => "return 1 >= 2\n", + "1 == 2" => "return 1 == 2\n", + "1 != 2" => "return 1 != 2\n", + "1 & 2" => "return 1 & 2\n", + "1 | 2" => "return 1 | 2\n", + "1 << 2" => "return 1 << 2\n", + "1 >> 2" => "return 1.>>(2)\n", + "1 ** 2" => "return 1.**(2)\n", + "a = 1; a" => "a = 1\nreturn a\n", + }.freeze + + CASES.each do |source, expected| + define_method("test_disassemble_#{source}") do + assert_disassembles(expected, source) + end + end + + private + + def assert_disassembles(expected, source) + iseq = SyntaxTree.parse(source).accept(Compiler.new) + actual = Formatter.format(source, YARV::Disassembler.new(iseq).to_ruby) + assert_equal expected, actual + end + end +end From 6c6b4376b88b27d911c577ab8c90de9c9cc47f95 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 12:23:22 -0500 Subject: [PATCH 051/104] Add BF compiler --- lib/syntax_tree.rb | 3 + lib/syntax_tree/dsl.rb | 129 ++++++++ lib/syntax_tree/yarv.rb | 157 --------- lib/syntax_tree/yarv/bf.rb | 466 +++++++++++++++++++++++++++ lib/syntax_tree/yarv/disassembler.rb | 209 ++++++++++++ 5 files changed, 807 insertions(+), 157 deletions(-) create mode 100644 lib/syntax_tree/dsl.rb create mode 100644 lib/syntax_tree/yarv/bf.rb create mode 100644 lib/syntax_tree/yarv/disassembler.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 187ff74d..2cbfa2e4 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -26,8 +26,11 @@ require_relative "syntax_tree/pattern" require_relative "syntax_tree/search" +require_relative "syntax_tree/dsl" require_relative "syntax_tree/yarv" require_relative "syntax_tree/compiler" +require_relative "syntax_tree/yarv/bf" +require_relative "syntax_tree/yarv/disassembler" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the diff --git a/lib/syntax_tree/dsl.rb b/lib/syntax_tree/dsl.rb new file mode 100644 index 00000000..05911ee3 --- /dev/null +++ b/lib/syntax_tree/dsl.rb @@ -0,0 +1,129 @@ +# frozen_string_literal: true + +module SyntaxTree + module DSL + def ARef(collection, index) + ARef.new(collection: collection, index: index, location: Location.default) + end + + def ARefField(collection, index) + ARefField.new(collection: collection, index: index, location: Location.default) + end + + def Args(parts) + Args.new(parts: parts, location: Location.default) + end + + def ArgParen(arguments) + ArgParen.new(arguments: arguments, location: Location.default) + end + + def Assign(target, value) + Assign.new(target: target, value: value, location: Location.default) + end + + def Assoc(key, value) + Assoc.new(key: key, value: value, location: Location.default) + end + + def Binary(left, operator, right) + Binary.new(left: left, operator: operator, right: right, location: Location.default) + end + + def BlockNode(opening, block_var, bodystmt) + BlockNode.new(opening: opening, block_var: block_var, bodystmt: bodystmt, location: Location.default) + end + + def BodyStmt(statements, rescue_clause, else_keyword, else_clause, ensure_clause) + BodyStmt.new(statements: statements, rescue_clause: rescue_clause, else_keyword: else_keyword, else_clause: else_clause, ensure_clause: ensure_clause, location: Location.default) + end + + def CallNode(receiver, operator, message, arguments) + CallNode.new(receiver: receiver, operator: operator, message: message, arguments: arguments, location: Location.default) + end + + def Case(keyword, value, consequent) + Case.new(keyword: keyword, value: value, consequent: consequent, location: Location.default) + end + + def FloatLiteral(value) + FloatLiteral.new(value: value, location: Location.default) + end + + def GVar(value) + GVar.new(value: value, location: Location.default) + end + + def HashLiteral(lbrace, assocs) + HashLiteral.new(lbrace: lbrace, assocs: assocs, location: Location.default) + end + + def Ident(value) + Ident.new(value: value, location: Location.default) + end + + def IfNode(predicate, statements, consequent) + IfNode.new(predicate: predicate, statements: statements, consequent: consequent, location: Location.default) + end + + def Int(value) + Int.new(value: value, location: Location.default) + end + + def Kw(value) + Kw.new(value: value, location: Location.default) + end + + def LBrace(value) + LBrace.new(value: value, location: Location.default) + end + + def MethodAddBlock(call, block) + MethodAddBlock.new(call: call, block: block, location: Location.default) + end + + def Next(arguments) + Next.new(arguments: arguments, location: Location.default) + end + + def Op(value) + Op.new(value: value, location: Location.default) + end + + def OpAssign(target, operator, value) + OpAssign.new(target: target, operator: operator, value: value, location: Location.default) + end + + def Period(value) + Period.new(value: value, location: Location.default) + end + + def Program(statements) + Program.new(statements: statements, location: Location.default) + end + + def ReturnNode(arguments) + ReturnNode.new(arguments: arguments, location: Location.default) + end + + def Statements(body) + Statements.new(nil, body: body, location: Location.default) + end + + def SymbolLiteral(value) + SymbolLiteral.new(value: value, location: Location.default) + end + + def VarField(value) + VarField.new(value: value, location: Location.default) + end + + def VarRef(value) + VarRef.new(value: value, location: Location.default) + end + + def When(arguments, statements, consequent) + When.new(arguments: arguments, statements: statements, consequent: consequent, location: Location.default) + end + end +end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 93f2ac06..2224792a 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -799,163 +799,6 @@ def serialize(insn) end end - # This class is responsible for taking a compiled instruction sequence and - # walking through it to generate equivalent Ruby code. - class Disassembler - module DSL - def Args(parts) - Args.new(parts: parts, location: Location.default) - end - - def ArgParen(arguments) - ArgParen.new(arguments: arguments, location: Location.default) - end - - def Assign(target, value) - Assign.new(target: target, value: value, location: Location.default) - end - - def Binary(left, operator, right) - Binary.new(left: left, operator: operator, right: right, location: Location.default) - end - - def CallNode(receiver, operator, message, arguments) - CallNode.new(receiver: receiver, operator: operator, message: message, arguments: arguments, location: Location.default) - end - - def FloatLiteral(value) - FloatLiteral.new(value: value, location: Location.default) - end - - def Ident(value) - Ident.new(value: value, location: Location.default) - end - - def Int(value) - Int.new(value: value, location: Location.default) - end - - def Period(value) - Period.new(value: value, location: Location.default) - end - - def Program(statements) - Program.new(statements: statements, location: Location.default) - end - - def ReturnNode(arguments) - ReturnNode.new(arguments: arguments, location: Location.default) - end - - def Statements(body) - Statements.new(nil, body: body, location: Location.default) - end - - def VarField(value) - VarField.new(value: value, location: Location.default) - end - - def VarRef(value) - VarRef.new(value: value, location: Location.default) - end - end - - include DSL - attr_reader :iseq - - def initialize(iseq) - @iseq = iseq - end - - def to_ruby - stack = [] - - iseq.insns.each do |insn| - # skip line numbers and events - next unless insn.is_a?(Array) - - case insn[0] - when :getlocal_WC_0 - stack << VarRef(Ident(local_name(insn[1], 0))) - when :leave - stack << ReturnNode(Args([stack.pop])) - when :opt_and - left, right = stack.pop(2) - stack << Binary(left, :&, right) - when :opt_div - left, right = stack.pop(2) - stack << Binary(left, :/, right) - when :opt_eq - left, right = stack.pop(2) - stack << Binary(left, :==, right) - when :opt_ge - left, right = stack.pop(2) - stack << Binary(left, :>=, right) - when :opt_gt - left, right = stack.pop(2) - stack << Binary(left, :>, right) - when :opt_le - left, right = stack.pop(2) - stack << Binary(left, :<=, right) - when :opt_lt - left, right = stack.pop(2) - stack << Binary(left, :<, right) - when :opt_ltlt - left, right = stack.pop(2) - stack << Binary(left, :<<, right) - when :opt_minus - left, right = stack.pop(2) - stack << Binary(left, :-, right) - when :opt_mod - left, right = stack.pop(2) - stack << Binary(left, :%, right) - when :opt_mult - left, right = stack.pop(2) - stack << Binary(left, :*, right) - when :opt_neq - left, right = stack.pop(2) - stack << Binary(left, :"!=", right) - when :opt_or - left, right = stack.pop(2) - stack << Binary(left, :|, right) - when :opt_plus - left, right = stack.pop(2) - stack << Binary(left, :+, right) - when :opt_send_without_block - receiver, *arguments = stack.pop(insn[1][:orig_argc] + 1) - stack << CallNode(receiver, Period("."), Ident(insn[1][:mid]), ArgParen(Args(arguments))) - when :putobject - case insn[1] - when Float - stack << FloatLiteral(insn[1].inspect) - when Integer - stack << Int(insn[1].inspect) - else - raise "Unknown object type: #{insn[1].class.name}" - end - when :putobject_INT2FIX_0_ - stack << Int("0") - when :putobject_INT2FIX_1_ - stack << Int("1") - when :setlocal_WC_0 - stack << Assign(VarField(Ident(local_name(insn[1], 0))), stack.pop) - else - raise "Unknown instruction #{insn[0]}" - end - end - - Program(Statements(stack)) - end - - private - - def local_name(index, level) - current = iseq - level.times { current = current.parent_iseq } - current.local_table.locals[index].name.to_s - end - end - # These constants correspond to the putspecialobject instruction. They are # used to represent special objects that are pushed onto the stack. VM_SPECIAL_OBJECT_VMCORE = 1 diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb new file mode 100644 index 00000000..b826ebf2 --- /dev/null +++ b/lib/syntax_tree/yarv/bf.rb @@ -0,0 +1,466 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # Parses the given source code into a syntax tree, compiles that syntax tree + # into YARV bytecode. + class Bf + class Node + def format(q) + Format.new(q).visit(self) + end + + def pretty_print(q) + PrettyPrint.new(q).visit(self) + end + end + + # The root node of the syntax tree. + class Root < Node + attr_reader :nodes, :location + + def initialize(nodes:, location:) + @nodes = nodes + @location = location + end + + def accept(visitor) + visitor.visit_root(self) + end + + def child_nodes + nodes + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { nodes: nodes, location: location } + end + end + + # [ ... ] + class Loop < Node + attr_reader :nodes, :location + + def initialize(nodes:, location:) + @nodes = nodes + @location = location + end + + def accept(visitor) + visitor.visit_loop(self) + end + + def child_nodes + nodes + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { nodes: nodes, location: location } + end + end + + # + + class Increment < Node + attr_reader :location + + def initialize(location:) + @location = location + end + + def accept(visitor) + visitor.visit_increment(self) + end + + def child_nodes + [] + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { value: "+", location: location } + end + end + + # - + class Decrement < Node + attr_reader :location + + def initialize(location:) + @location = location + end + + def accept(visitor) + visitor.visit_decrement(self) + end + + def child_nodes + [] + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { value: "-", location: location } + end + end + + # > + class ShiftRight < Node + attr_reader :location + + def initialize(location:) + @location = location + end + + def accept(visitor) + visitor.visit_shift_right(self) + end + + def child_nodes + [] + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { value: ">", location: location } + end + end + + # < + class ShiftLeft < Node + attr_reader :location + + def initialize(location:) + @location = location + end + + def accept(visitor) + visitor.visit_shift_left(self) + end + + def child_nodes + [] + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { value: "<", location: location } + end + end + + # , + class Input < Node + attr_reader :location + + def initialize(location:) + @location = location + end + + def accept(visitor) + visitor.visit_input(self) + end + + def child_nodes + [] + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { value: ",", location: location } + end + end + + # . + class Output < Node + attr_reader :location + + def initialize(location:) + @location = location + end + + def accept(visitor) + visitor.visit_output(self) + end + + def child_nodes + [] + end + + alias deconstruct child_nodes + + def deconstruct_keys(keys) + { value: ".", location: location } + end + end + + # Allows visiting the syntax tree recursively. + class Visitor + def visit(node) + node.accept(self) + end + + def visit_all(nodes) + nodes.map { |node| visit(node) } + end + + def visit_child_nodes(node) + visit_all(node.child_nodes) + end + + # Visit a Root node. + alias visit_root visit_child_nodes + + # Visit a Loop node. + alias visit_loop visit_child_nodes + + # Visit an Increment node. + alias visit_increment visit_child_nodes + + # Visit a Decrement node. + alias visit_decrement visit_child_nodes + + # Visit a ShiftRight node. + alias visit_shift_right visit_child_nodes + + # Visit a ShiftLeft node. + alias visit_shift_left visit_child_nodes + + # Visit an Input node. + alias visit_input visit_child_nodes + + # Visit an Output node. + alias visit_output visit_child_nodes + end + + # Compiles the syntax tree into YARV bytecode. + class Compiler < Visitor + attr_reader :iseq + + def initialize + @iseq = InstructionSequence.new(:top, "", nil, Location.default) + end + + def visit_decrement(node) + change_by(-1) + end + + def visit_increment(node) + change_by(1) + end + + def visit_input(node) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.getglobal(:$stdin) + iseq.send(:getc, 0, VM_CALL_ARGS_SIMPLE) + iseq.send(:ord, 0, VM_CALL_ARGS_SIMPLE) + iseq.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + end + + def visit_loop(node) + start_label = iseq.label + + # First, we're going to compare the value at the current cursor to 0. + # If it's 0, then we'll jump past the loop. Otherwise we'll execute + # the loop. + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(:[], 1, VM_CALL_ARGS_SIMPLE) + iseq.putobject(0) + iseq.send(:==, 1, VM_CALL_ARGS_SIMPLE) + branchunless = iseq.branchunless(-1) + + # Otherwise, here we'll execute the loop. + visit_nodes(node.nodes) + + # Now that we've visited all of the child nodes, we need to jump back + # to the start of the loop. + iseq.jump(start_label) + + # Now that we have all of the instructions in place, we can patch the + # branchunless to point to the next instruction for skipping the loop. + branchunless[1] = iseq.label + end + + def visit_output(node) + iseq.getglobal(:$stdout) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(:[], 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:chr, 0, VM_CALL_ARGS_SIMPLE) + iseq.send(:putc, 1, VM_CALL_ARGS_SIMPLE) + end + + def visit_root(node) + iseq.duphash({ 0 => 0 }) + iseq.setglobal(:$tape) + iseq.getglobal(:$tape) + iseq.putobject(0) + iseq.send(:default=, 1, VM_CALL_ARGS_SIMPLE) + + iseq.putobject(0) + iseq.setglobal(:$cursor) + + visit_nodes(node.nodes) + + iseq.putself + iseq.send(:exit, 0, VM_CALL_ARGS_SIMPLE) + iseq + end + + def visit_shift_left(node) + shift_by(-1) + end + + def visit_shift_right(node) + shift_by(1) + end + + private + + def change_by(value) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(:[], 1, VM_CALL_ARGS_SIMPLE) + + if value < 0 + iseq.putobject(-value) + iseq.send(:-, 1, VM_CALL_ARGS_SIMPLE) + else + iseq.putobject(value) + iseq.send(:+, 1, VM_CALL_ARGS_SIMPLE) + end + + iseq.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + end + + def shift_by(value) + iseq.getglobal(:$cursor) + + if value < 0 + iseq.putobject(-value) + iseq.send(:-, 1, VM_CALL_ARGS_SIMPLE) + else + iseq.putobject(value) + iseq.send(:+, 1, VM_CALL_ARGS_SIMPLE) + end + + iseq.setglobal(:$cursor) + end + + def visit_nodes(nodes) + nodes + .chunk do |child| + case child + when Increment, Decrement + :change + when ShiftLeft, ShiftRight + :shift + else + :default + end + end + .each do |type, children| + case type + when :change + value = 0 + children.each { |child| value += child.is_a?(Increment) ? 1 : -1 } + change_by(value) + when :shift + value = 0 + children.each { |child| value += child.is_a?(ShiftRight) ? 1 : -1 } + shift_by(value) + else + visit_all(children) + end + end + end + end + + class Error < StandardError + end + + attr_reader :source + + def initialize(source) + @source = source + end + + def compile + Root.new(nodes: parse_segment(source, 0), location: 0...source.length).accept(Compiler.new) + end + + private + + def parse_segment(segment, offset) + index = 0 + nodes = [] + + while index < segment.length + location = offset + index + + case segment[index] + when "+" + nodes << Increment.new(location: location...(location + 1)) + index += 1 + when "-" + nodes << Decrement.new(location: location...(location + 1)) + index += 1 + when ">" + nodes << ShiftRight.new(location: location...(location + 1)) + index += 1 + when "<" + nodes << ShiftLeft.new(location: location...(location + 1)) + index += 1 + when "." + nodes << Output.new(location: location...(location + 1)) + index += 1 + when "," + nodes << Input.new(location: location...(location + 1)) + index += 1 + when "[" + matched = 1 + end_index = index + 1 + + while matched != 0 && end_index < segment.length + case segment[end_index] + when "[" + matched += 1 + when "]" + matched -= 1 + end + + end_index += 1 + end + + raise Error, "Unmatched start loop" if matched != 0 + + content = segment[(index + 1)...(end_index - 1)] + nodes << Loop.new( + nodes: parse_segment(content, offset + index + 1), + location: location...(offset + end_index) + ) + + index = end_index + when "]" + raise Error, "Unmatched end loop" + else + index += 1 + end + end + + nodes + end + end + end +end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb new file mode 100644 index 00000000..51d6fc08 --- /dev/null +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -0,0 +1,209 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This class is responsible for taking a compiled instruction sequence and + # walking through it to generate equivalent Ruby code. + class Disassembler + include DSL + attr_reader :iseq, :label_name, :label_field, :label_ref + + def initialize(iseq) + @iseq = iseq + + @label_name = "__disasm_label" + @label_field = VarField(Ident(label_name)) + @label_ref = VarRef(Ident(label_name)) + end + + def to_ruby + Program(Statements(disassemble(iseq.insns))) + end + + private + + def node_for(value) + case value + when Integer + Int(value.to_s) + when Symbol + SymbolLiteral(Ident(value.to_s)) + end + end + + def disassemble(insns) + label = :label_0 + clauses = {} + clause = [] + + insns.each do |insn| + if insn.is_a?(Symbol) && insn.start_with?("label_") + clause << Assign(label_field, node_for(insn)) unless clause.last.is_a?(Next) + clauses[label] = clause + clause = [] + label = insn + next + end + + case insn[0] + when :branchunless + clause << IfNode(clause.pop, Statements([Assign(label_field, node_for(insn[1])), Next(Args([]))]), nil) + when :dup + clause << clause.last + when :duphash + assocs = insn[1].map { |key, value| Assoc(node_for(key), node_for(value)) } + clause << HashLiteral(LBrace("{"), assocs) + when :getglobal + clause << VarRef(GVar(insn[1].to_s)) + when :getlocal_WC_0 + clause << VarRef(Ident(local_name(insn[1], 0))) + when :jump + clause << Assign(label_field, node_for(insn[1])) + clause << Next(Args([])) + when :leave + clause << ReturnNode(Args([clause.pop])) + when :opt_and + left, right = clause.pop(2) + clause << Binary(left, :&, right) + when :opt_aref + collection, arg = clause.pop(2) + clause << ARef(collection, Args([arg])) + when :opt_aset + collection, arg, value = clause.pop(3) + + if value.is_a?(Binary) && value.left.is_a?(ARef) && collection === value.left.collection && arg === value.left.index.parts[0] + clause << OpAssign(ARefField(collection, Args([arg])), Op("#{value.operator}="), value.right) + else + clause << Assign(ARefField(collection, Args([arg])), value) + end + when :opt_div + left, right = clause.pop(2) + clause << Binary(left, :/, right) + when :opt_eq + left, right = clause.pop(2) + clause << Binary(left, :==, right) + when :opt_ge + left, right = clause.pop(2) + clause << Binary(left, :>=, right) + when :opt_gt + left, right = clause.pop(2) + clause << Binary(left, :>, right) + when :opt_le + left, right = clause.pop(2) + clause << Binary(left, :<=, right) + when :opt_lt + left, right = clause.pop(2) + clause << Binary(left, :<, right) + when :opt_ltlt + left, right = clause.pop(2) + clause << Binary(left, :<<, right) + when :opt_minus + left, right = clause.pop(2) + clause << Binary(left, :-, right) + when :opt_mod + left, right = clause.pop(2) + clause << Binary(left, :%, right) + when :opt_mult + left, right = clause.pop(2) + clause << Binary(left, :*, right) + when :opt_neq + left, right = clause.pop(2) + clause << Binary(left, :"!=", right) + when :opt_or + left, right = clause.pop(2) + clause << Binary(left, :|, right) + when :opt_plus + left, right = clause.pop(2) + clause << Binary(left, :+, right) + when :opt_send_without_block + if insn[1][:orig_argc] == 0 + clause << CallNode(clause.pop, Period("."), Ident(insn[1][:mid]), nil) + elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") + receiver, argument = clause.pop(2) + clause << Assign(CallNode(receiver, Period("."), Ident(insn[1][:mid][0..-2]), nil), argument) + else + receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) + clause << CallNode(receiver, Period("."), Ident(insn[1][:mid]), ArgParen(Args(arguments))) + end + when :putobject + case insn[1] + when Float + clause << FloatLiteral(insn[1].inspect) + when Integer + clause << Int(insn[1].inspect) + else + raise "Unknown object type: #{insn[1].class.name}" + end + when :putobject_INT2FIX_0_ + clause << Int("0") + when :putobject_INT2FIX_1_ + clause << Int("1") + when :putself + clause << VarRef(Kw("self")) + when :setglobal + target = GVar(insn[1].to_s) + value = clause.pop + + if value.is_a?(Binary) && VarRef(target) === value.left + clause << OpAssign(VarField(target), Op("#{value.operator}="), value.right) + else + clause << Assign(VarField(target), value) + end + when :setlocal_WC_0 + target = Ident(local_name(insn[1], 0)) + value = clause.pop + + if value.is_a?(Binary) && VarRef(target) === value.left + clause << OpAssign(VarField(target), Op("#{value.operator}="), value.right) + else + clause << Assign(VarField(target), value) + end + else + raise "Unknown instruction #{insn[0]}" + end + end + + # If there's only one clause, then we don't need a case statement, and + # we can just disassemble the first clause. + clauses[label] = clause + return clauses.values.first if clauses.size == 1 + + # Here we're going to build up a big case statement that will handle all + # of the different labels. + current = nil + clauses.reverse_each do |label, clause| + current = When(Args([node_for(label)]), Statements(clause), current) + end + switch = Case(Kw("case"), label_ref, current) + + # Here we're going to make sure that any locals that were established in + # the label_0 block are initialized so that scoping rules work + # correctly. + stack = [] + locals = [label_name] + + clauses[:label_0].each do |node| + if node.is_a?(Assign) && node.target.is_a?(VarField) && node.target.value.is_a?(Ident) + value = node.target.value.value + next if locals.include?(value) + + stack << Assign(node.target, VarRef(Kw("nil"))) + locals << value + end + end + + # Finally, we'll set up the initial label and loop the entire case + # statement. + stack << Assign(label_field, node_for(:label_0)) + stack << MethodAddBlock(CallNode(nil, nil, Ident("loop"), Args([])), BlockNode(Kw("do"), nil, BodyStmt(Statements([switch]), nil, nil, nil, nil))) + stack + end + + def local_name(index, level) + current = iseq + level.times { current = current.parent_iseq } + current.local_table.locals[index].name.to_s + end + end + end +end From a1236fd6c4e4a22292e2a1d52facb95ecdc7a208 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 13:25:14 -0500 Subject: [PATCH 052/104] Default to VM_CALL_ARGS_SIMPLE --- lib/syntax_tree/compiler.rb | 20 ++++++------- lib/syntax_tree/dsl.rb | 4 +++ lib/syntax_tree/yarv.rb | 23 +++++++-------- lib/syntax_tree/yarv/bf.rb | 33 +++++++++++----------- lib/syntax_tree/yarv/disassembler.rb | 40 +++++++++++++++++--------- test/yarv_test.rb | 42 ++++++++++++++-------------- 6 files changed, 88 insertions(+), 74 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 926661cc..32b5f089 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -253,13 +253,13 @@ def visit_alias(node) iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) visit(node.left) visit(node.right) - iseq.send(:"core#set_method_alias", 3, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:"core#set_method_alias", 3) end def visit_aref(node) visit(node.collection) visit(node.index) - iseq.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:[], 1) end def visit_arg_block(node) @@ -313,7 +313,7 @@ def visit_assign(node) visit(node.target.index) visit(node.value) iseq.setn(3) - iseq.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:[]=, 2) iseq.pop when ConstPathField names = constant_names(node.target) @@ -337,7 +337,7 @@ def visit_assign(node) visit(node.target) visit(node.value) iseq.setn(2) - iseq.send(:"#{node.target.name.value}=", 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:"#{node.target.name.value}=", 1) iseq.pop when TopConstField name = node.target.constant.value.to_sym @@ -420,7 +420,7 @@ def visit_binary(node) else visit(node.left) visit(node.right) - iseq.send(node.operator, 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(node.operator, 1) end end @@ -981,7 +981,7 @@ def visit_mrhs(node) def visit_not(node) visit(node.statement) - iseq.send(:!, 0, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:!, 0) end def visit_opassign(node) @@ -1367,7 +1367,7 @@ def visit_undef(node) iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) visit(symbol) - iseq.send(:"core#undef_method", 2, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:"core#undef_method", 2) end end @@ -1523,7 +1523,7 @@ def visit_xstring_literal(node) def visit_yield(node) parts = argument_parts(node.arguments) visit_all(parts) - iseq.invokeblock(nil, parts.length, YARV::VM_CALL_ARGS_SIMPLE) + iseq.invokeblock(nil, parts.length) end def visit_zsuper(_node) @@ -1759,12 +1759,12 @@ def with_opassign(node) visit(node.target.index) iseq.dupn(2) - iseq.send(:[], 1, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:[], 1) yield iseq.setn(3) - iseq.send(:[]=, 2, YARV::VM_CALL_ARGS_SIMPLE) + iseq.send(:[]=, 2) iseq.pop when ConstPathField name = node.target.constant.value.to_sym diff --git a/lib/syntax_tree/dsl.rb b/lib/syntax_tree/dsl.rb index 05911ee3..1d1324df 100644 --- a/lib/syntax_tree/dsl.rb +++ b/lib/syntax_tree/dsl.rb @@ -38,6 +38,10 @@ def BodyStmt(statements, rescue_clause, else_keyword, else_clause, ensure_clause BodyStmt.new(statements: statements, rescue_clause: rescue_clause, else_keyword: else_keyword, else_clause: else_clause, ensure_clause: ensure_clause, location: Location.default) end + def Break(arguments) + Break.new(arguments: arguments, location: Location.default) + end + def CallNode(receiver, operator, message, arguments) CallNode.new(receiver: receiver, operator: operator, message: message, arguments: arguments, location: Location.default) end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 2224792a..822844fb 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -460,7 +460,7 @@ def intern push([:intern]) end - def invokeblock(method_id, argc, flag) + def invokeblock(method_id, argc, flag = VM_CALL_ARGS_SIMPLE) stack.change_by(-argc + 1) push([:invokeblock, call_data(method_id, argc, flag)]) end @@ -547,7 +547,7 @@ def opt_newarray_max(length) push([:opt_newarray_max, length]) else newarray(length) - send(:max, 0, VM_CALL_ARGS_SIMPLE) + send(:max, 0) end end @@ -557,7 +557,7 @@ def opt_newarray_min(length) push([:opt_newarray_min, length]) else newarray(length) - send(:min, 0, VM_CALL_ARGS_SIMPLE) + send(:min, 0) end end @@ -569,22 +569,20 @@ def opt_setinlinecache(inline_storage) def opt_str_freeze(value) if specialized_instruction stack.change_by(+1) - push( - [:opt_str_freeze, value, call_data(:freeze, 0, VM_CALL_ARGS_SIMPLE)] - ) + push([:opt_str_freeze, value, call_data(:freeze, 0)]) else putstring(value) - send(:freeze, 0, VM_CALL_ARGS_SIMPLE) + send(:freeze, 0) end end def opt_str_uminus(value) if specialized_instruction stack.change_by(+1) - push([:opt_str_uminus, value, call_data(:-@, 0, VM_CALL_ARGS_SIMPLE)]) + push([:opt_str_uminus, value, call_data(:-@, 0)]) else putstring(value) - send(:-@, 0, VM_CALL_ARGS_SIMPLE) + send(:-@, 0) end end @@ -633,7 +631,7 @@ def putstring(object) push([:putstring, object]) end - def send(method_id, argc, flag, block_iseq = nil) + def send(method_id, argc, flag = VM_CALL_ARGS_SIMPLE, block_iseq = nil) stack.change_by(-(argc + 1) + 1) cdata = call_data(method_id, argc, flag) @@ -669,8 +667,7 @@ def send(method_id, argc, flag, block_iseq = nil) when [:|, 1] then push([:opt_or, cdata]) when [:[]=, 2] then push([:opt_aset, cdata]) when [:!=, 1] - eql_data = call_data(:==, 1, VM_CALL_ARGS_SIMPLE) - push([:opt_neq, eql_data, cdata]) + push([:opt_neq, call_data(:==, 1), cdata]) else push([:opt_send_without_block, cdata]) end @@ -762,7 +759,7 @@ def toregexp(options, length) # This creates a call data object that is used as the operand for the # send, invokesuper, and objtostring instructions. - def call_data(method_id, argc, flag) + def call_data(method_id, argc, flag = VM_CALL_ARGS_SIMPLE) { mid: method_id, flag: flag, orig_argc: argc } end diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb index b826ebf2..16098190 100644 --- a/lib/syntax_tree/yarv/bf.rb +++ b/lib/syntax_tree/yarv/bf.rb @@ -260,9 +260,9 @@ def visit_input(node) iseq.getglobal(:$tape) iseq.getglobal(:$cursor) iseq.getglobal(:$stdin) - iseq.send(:getc, 0, VM_CALL_ARGS_SIMPLE) - iseq.send(:ord, 0, VM_CALL_ARGS_SIMPLE) - iseq.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + iseq.send(:getc, 0) + iseq.send(:ord, 0) + iseq.send(:[]=, 2) end def visit_loop(node) @@ -273,9 +273,9 @@ def visit_loop(node) # the loop. iseq.getglobal(:$tape) iseq.getglobal(:$cursor) - iseq.send(:[], 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:[], 1) iseq.putobject(0) - iseq.send(:==, 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:==, 1) branchunless = iseq.branchunless(-1) # Otherwise, here we'll execute the loop. @@ -294,9 +294,9 @@ def visit_output(node) iseq.getglobal(:$stdout) iseq.getglobal(:$tape) iseq.getglobal(:$cursor) - iseq.send(:[], 1, VM_CALL_ARGS_SIMPLE) - iseq.send(:chr, 0, VM_CALL_ARGS_SIMPLE) - iseq.send(:putc, 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:[], 1) + iseq.send(:chr, 0) + iseq.send(:putc, 1) end def visit_root(node) @@ -304,15 +304,14 @@ def visit_root(node) iseq.setglobal(:$tape) iseq.getglobal(:$tape) iseq.putobject(0) - iseq.send(:default=, 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:default=, 1) iseq.putobject(0) iseq.setglobal(:$cursor) visit_nodes(node.nodes) - iseq.putself - iseq.send(:exit, 0, VM_CALL_ARGS_SIMPLE) + iseq.leave iseq end @@ -331,17 +330,17 @@ def change_by(value) iseq.getglobal(:$cursor) iseq.getglobal(:$tape) iseq.getglobal(:$cursor) - iseq.send(:[], 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:[], 1) if value < 0 iseq.putobject(-value) - iseq.send(:-, 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:-, 1) else iseq.putobject(value) - iseq.send(:+, 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:+, 1) end - iseq.send(:[]=, 2, VM_CALL_ARGS_SIMPLE) + iseq.send(:[]=, 2) end def shift_by(value) @@ -349,10 +348,10 @@ def shift_by(value) if value < 0 iseq.putobject(-value) - iseq.send(:-, 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:-, 1) else iseq.putobject(value) - iseq.send(:+, 1, VM_CALL_ARGS_SIMPLE) + iseq.send(:+, 1) end iseq.setglobal(:$cursor) diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 51d6fc08..566ed984 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -17,7 +17,7 @@ def initialize(iseq) end def to_ruby - Program(Statements(disassemble(iseq.insns))) + Program(disassemble(iseq)) end private @@ -31,12 +31,12 @@ def node_for(value) end end - def disassemble(insns) + def disassemble(iseq) label = :label_0 clauses = {} clause = [] - insns.each do |insn| + iseq.insns.each do |insn| if insn.is_a?(Symbol) && insn.start_with?("label_") clause << Assign(label_field, node_for(insn)) unless clause.last.is_a?(Next) clauses[label] = clause @@ -61,7 +61,8 @@ def disassemble(insns) clause << Assign(label_field, node_for(insn[1])) clause << Next(Args([])) when :leave - clause << ReturnNode(Args([clause.pop])) + value = Args([clause.pop]) + clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) when :opt_and left, right = clause.pop(2) clause << Binary(left, :&, right) @@ -116,14 +117,27 @@ def disassemble(insns) left, right = clause.pop(2) clause << Binary(left, :+, right) when :opt_send_without_block - if insn[1][:orig_argc] == 0 - clause << CallNode(clause.pop, Period("."), Ident(insn[1][:mid]), nil) - elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") - receiver, argument = clause.pop(2) - clause << Assign(CallNode(receiver, Period("."), Ident(insn[1][:mid][0..-2]), nil), argument) + if insn[1][:flag] & VM_CALL_FCALL > 0 + if insn[1][:orig_argc] == 0 + clause.pop + clause << CallNode(nil, nil, Ident(insn[1][:mid]), Args([])) + elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") + _receiver, argument = clause.pop(2) + clause << Assign(CallNode(nil, nil, Ident(insn[1][:mid][0..-2]), nil), argument) + else + _receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) + clause << CallNode(nil, nil, Ident(insn[1][:mid]), ArgParen(Args(arguments))) + end else - receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) - clause << CallNode(receiver, Period("."), Ident(insn[1][:mid]), ArgParen(Args(arguments))) + if insn[1][:orig_argc] == 0 + clause << CallNode(clause.pop, Period("."), Ident(insn[1][:mid]), nil) + elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") + receiver, argument = clause.pop(2) + clause << Assign(CallNode(receiver, Period("."), Ident(insn[1][:mid][0..-2]), nil), argument) + else + receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) + clause << CallNode(receiver, Period("."), Ident(insn[1][:mid]), ArgParen(Args(arguments))) + end end when :putobject case insn[1] @@ -166,7 +180,7 @@ def disassemble(insns) # If there's only one clause, then we don't need a case statement, and # we can just disassemble the first clause. clauses[label] = clause - return clauses.values.first if clauses.size == 1 + return Statements(clauses.values.first) if clauses.size == 1 # Here we're going to build up a big case statement that will handle all # of the different labels. @@ -196,7 +210,7 @@ def disassemble(insns) # statement. stack << Assign(label_field, node_for(:label_0)) stack << MethodAddBlock(CallNode(nil, nil, Ident("loop"), Args([])), BlockNode(Kw("do"), nil, BodyStmt(Statements([switch]), nil, nil, nil, nil))) - stack + Statements(stack) end def local_name(index, level) diff --git a/test/yarv_test.rb b/test/yarv_test.rb index 57371ba3..da348224 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -6,27 +6,27 @@ module SyntaxTree class YARVTest < Minitest::Test CASES = { - "0" => "return 0\n", - "1" => "return 1\n", - "2" => "return 2\n", - "1.0" => "return 1.0\n", - "1 + 2" => "return 1 + 2\n", - "1 - 2" => "return 1 - 2\n", - "1 * 2" => "return 1 * 2\n", - "1 / 2" => "return 1 / 2\n", - "1 % 2" => "return 1 % 2\n", - "1 < 2" => "return 1 < 2\n", - "1 <= 2" => "return 1 <= 2\n", - "1 > 2" => "return 1 > 2\n", - "1 >= 2" => "return 1 >= 2\n", - "1 == 2" => "return 1 == 2\n", - "1 != 2" => "return 1 != 2\n", - "1 & 2" => "return 1 & 2\n", - "1 | 2" => "return 1 | 2\n", - "1 << 2" => "return 1 << 2\n", - "1 >> 2" => "return 1.>>(2)\n", - "1 ** 2" => "return 1.**(2)\n", - "a = 1; a" => "a = 1\nreturn a\n", + "0" => "break 0\n", + "1" => "break 1\n", + "2" => "break 2\n", + "1.0" => "break 1.0\n", + "1 + 2" => "break 1 + 2\n", + "1 - 2" => "break 1 - 2\n", + "1 * 2" => "break 1 * 2\n", + "1 / 2" => "break 1 / 2\n", + "1 % 2" => "break 1 % 2\n", + "1 < 2" => "break 1 < 2\n", + "1 <= 2" => "break 1 <= 2\n", + "1 > 2" => "break 1 > 2\n", + "1 >= 2" => "break 1 >= 2\n", + "1 == 2" => "break 1 == 2\n", + "1 != 2" => "break 1 != 2\n", + "1 & 2" => "break 1 & 2\n", + "1 | 2" => "break 1 | 2\n", + "1 << 2" => "break 1 << 2\n", + "1 >> 2" => "break 1.>>(2)\n", + "1 ** 2" => "break 1.**(2)\n", + "a = 1; a" => "a = 1\nbreak a\n", }.freeze CASES.each do |source, expected| From d8815de6b2c00ae2001980d557cc62302e029123 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 13:40:46 -0500 Subject: [PATCH 053/104] Add objects to wrap instructions --- .rubocop.yml | 3 + lib/syntax_tree.rb | 1 + lib/syntax_tree/compiler.rb | 109 ++- lib/syntax_tree/dsl.rb | 905 +++++++++++++++++++++- lib/syntax_tree/yarv.rb | 210 ++--- lib/syntax_tree/yarv/bf.rb | 553 ++++--------- lib/syntax_tree/yarv/disassembler.rb | 366 +++++---- lib/syntax_tree/yarv/instructions.rb | 1071 ++++++++++++++++++++++++++ test/yarv_test.rb | 11 +- 9 files changed, 2466 insertions(+), 763 deletions(-) create mode 100644 lib/syntax_tree/yarv/instructions.rb diff --git a/.rubocop.yml b/.rubocop.yml index d0bf0830..134a75dc 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -94,6 +94,9 @@ Style/MutableConstant: Style/NegatedIfElseCondition: Enabled: false +Style/Next: + Enabled: false + Style/NumericPredicate: Enabled: false diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 2cbfa2e4..792ba20c 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -31,6 +31,7 @@ require_relative "syntax_tree/compiler" require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/disassembler" +require_relative "syntax_tree/yarv/instructions" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 32b5f089..8327a080 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -407,7 +407,7 @@ def visit_binary(node) iseq.pop visit(node.right) - branchunless[1] = iseq.label + branchunless.patch!(iseq) when :"||" visit(node.left) iseq.dup @@ -416,7 +416,7 @@ def visit_binary(node) iseq.pop visit(node.right) - branchif[1] = iseq.label + branchif.patch!(iseq) else visit(node.left) visit(node.right) @@ -567,7 +567,7 @@ def visit_call(node) flag |= YARV::VM_CALL_FCALL if node.receiver.nil? iseq.send(node.message.value.to_sym, argc, flag, block_iseq) - branchnil[1] = iseq.label if branchnil + branchnil.patch!(iseq) if branchnil end def visit_case(node) @@ -600,7 +600,7 @@ def visit_case(node) branches.each_with_index do |(clause, branchif), index| iseq.leave if index != 0 - branchif[1] = iseq.label + branchif.patch!(iseq) iseq.pop visit(clause) end @@ -616,21 +616,21 @@ def visit_class(node) iseq.leave end - flags = YARV::VM_DEFINECLASS_TYPE_CLASS + flags = YARV::DefineClass::TYPE_CLASS case node.constant when ConstPathRef - flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::DefineClass::FLAG_SCOPED visit(node.constant.parent) when ConstRef iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) when TopConstRef - flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::DefineClass::FLAG_SCOPED iseq.putobject(Object) end if node.superclass - flags |= YARV::VM_DEFINECLASS_FLAG_HAS_SUPERCLASS + flags |= YARV::DefineClass::FLAG_HAS_SUPERCLASS visit(node.superclass) else iseq.putnil @@ -675,16 +675,16 @@ def visit_const_path_ref(node) end def visit_def(node) - method_iseq = - with_child_iseq(iseq.method_child_iseq(node.name.value, node.location)) do - visit(node.params) if node.params - iseq.event(:RUBY_EVENT_CALL) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_RETURN) - iseq.leave - end - name = node.name.value.to_sym + method_iseq = iseq.method_child_iseq(name.to_s, node.location) + + with_child_iseq(method_iseq) do + visit(node.params) if node.params + iseq.event(:RUBY_EVENT_CALL) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_RETURN) + iseq.leave + end if node.target visit(node.target) @@ -714,18 +714,18 @@ def visit_defined(node) case value when Const iseq.putnil - iseq.defined(YARV::DEFINED_CONST, name, "constant") + iseq.defined(YARV::Defined::CONST, name, "constant") when CVar iseq.putnil - iseq.defined(YARV::DEFINED_CVAR, name, "class variable") + iseq.defined(YARV::Defined::CVAR, name, "class variable") when GVar iseq.putnil - iseq.defined(YARV::DEFINED_GVAR, name, "global-variable") + iseq.defined(YARV::Defined::GVAR, name, "global-variable") when Ident iseq.putobject("local-variable") when IVar iseq.putnil - iseq.defined(YARV::DEFINED_IVAR, name, "instance-variable") + iseq.defined(YARV::Defined::IVAR, name, "instance-variable") when Kw case name when :false @@ -742,13 +742,13 @@ def visit_defined(node) iseq.putself name = node.value.value.value.to_sym - iseq.defined(YARV::DEFINED_FUNC, name, "method") + iseq.defined(YARV::Defined::FUNC, name, "method") when YieldNode iseq.putnil - iseq.defined(YARV::DEFINED_YIELD, false, "yield") + iseq.defined(YARV::Defined::YIELD, false, "yield") when ZSuper iseq.putnil - iseq.defined(YARV::DEFINED_ZSUPER, false, "super") + iseq.defined(YARV::Defined::ZSUPER, false, "super") else iseq.putobject("expression") end @@ -842,7 +842,7 @@ def visit_if(node) if last_statement? iseq.leave - branchunless[1] = iseq.label + branchunless.patch!(iseq) node.consequent ? visit(node.consequent) : iseq.putnil else @@ -850,11 +850,11 @@ def visit_if(node) if node.consequent jump = iseq.jump(-1) - branchunless[1] = iseq.label + branchunless.patch!(iseq) visit(node.consequent) jump[1] = iseq.label else - branchunless[1] = iseq.label + branchunless.patch!(iseq) end end end @@ -953,16 +953,16 @@ def visit_module(node) iseq.leave end - flags = YARV::VM_DEFINECLASS_TYPE_MODULE + flags = YARV::DefineClass::TYPE_MODULE case node.constant when ConstPathRef - flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::DefineClass::FLAG_SCOPED visit(node.constant.parent) when ConstRef iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) when TopConstRef - flags |= YARV::VM_DEFINECLASS_FLAG_SCOPED + flags |= YARV::DefineClass::FLAG_SCOPED iseq.putobject(Object) end @@ -1004,15 +1004,15 @@ def visit_opassign(node) case node.target when ARefField iseq.leave - branchunless[1] = iseq.label + branchunless.patch!(iseq) iseq.setn(3) iseq.adjuststack(3) when ConstPathField, TopConstField - branchunless[1] = iseq.label + branchunless.patch!(iseq) iseq.swap iseq.pop else - branchunless[1] = iseq.label + branchunless.patch!(iseq) end when :"||" if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) @@ -1034,11 +1034,11 @@ def visit_opassign(node) if node.target.is_a?(ARefField) iseq.leave - branchif[1] = iseq.label + branchif.patch!(iseq) iseq.setn(3) iseq.adjuststack(3) else - branchif[1] = iseq.label + branchif.patch!(iseq) end end else @@ -1092,7 +1092,10 @@ def visit_params(node) if node.keywords.any? argument_options[:kwbits] = 0 argument_options[:keyword] = [] - checkkeywords = [] + + keyword_bits_name = node.keyword_rest ? 3 : 2 + iseq.argument_size += 1 + keyword_bits_index = iseq.local_table.locals.size + node.keywords.size node.keywords.each_with_index do |(keyword, value), keyword_index| name = keyword.value.chomp(":").to_sym @@ -1105,24 +1108,18 @@ def visit_params(node) if value.nil? argument_options[:keyword] << name elsif (compiled = RubyVisitor.compile(value)) - compiled = value.accept(RubyVisitor.new) argument_options[:keyword] << [name, compiled] else argument_options[:keyword] << [name] - checkkeywords << iseq.checkkeyword(-1, keyword_index) + iseq.checkkeyword(keyword_bits_index, keyword_index) branchif = iseq.branchif(-1) visit(value) iseq.setlocal(index, 0) - branchif[1] = iseq.label + branchif.patch!(iseq) end end - name = node.keyword_rest ? 3 : 2 - iseq.argument_size += 1 - iseq.local_table.plain(name) - - lookup = iseq.local_table.find(name, 0) - checkkeywords.each { |checkkeyword| checkkeyword[1] = lookup.index } + iseq.local_table.plain(keyword_bits_name) end if node.keyword_rest.is_a?(ArgsForward) @@ -1251,7 +1248,7 @@ def visit_sclass(node) iseq.defineclass( :singletonclass, singleton_iseq, - YARV::VM_DEFINECLASS_TYPE_SINGLETON_CLASS + YARV::DefineClass::TYPE_SINGLETON_CLASS ) end @@ -1378,7 +1375,7 @@ def visit_unless(node) if last_statement? iseq.leave - branchunless[1] = iseq.label + branchunless.patch!(iseq) visit(node.statements) else @@ -1386,11 +1383,11 @@ def visit_unless(node) if node.consequent jump = iseq.jump(-1) - branchunless[1] = iseq.label + branchunless.patch!(iseq) visit(node.consequent) jump[1] = iseq.label else - branchunless[1] = iseq.label + branchunless.patch!(iseq) end end end @@ -1598,24 +1595,24 @@ def opassign_defined(node) name = node.target.constant.value.to_sym iseq.dup - iseq.defined(YARV::DEFINED_CONST_FROM, name, true) + iseq.defined(YARV::Defined::CONST_FROM, name, true) when TopConstField name = node.target.constant.value.to_sym iseq.putobject(Object) iseq.dup - iseq.defined(YARV::DEFINED_CONST_FROM, name, true) + iseq.defined(YARV::Defined::CONST_FROM, name, true) when VarField name = node.target.value.value.to_sym iseq.putnil case node.target.value when Const - iseq.defined(YARV::DEFINED_CONST, name, true) + iseq.defined(YARV::Defined::CONST, name, true) when CVar - iseq.defined(YARV::DEFINED_CVAR, name, true) + iseq.defined(YARV::Defined::CVAR, name, true) when GVar - iseq.defined(YARV::DEFINED_GVAR, name, true) + iseq.defined(YARV::Defined::GVAR, name, true) end end @@ -1641,7 +1638,7 @@ def opassign_defined(node) branchif = iseq.branchif(-1) iseq.pop - branchunless[1] = iseq.label + branchunless.patch!(iseq) visit(node.value) case node.target @@ -1663,7 +1660,7 @@ def opassign_defined(node) end end - branchif[1] = iseq.label + branchif.patch!(iseq) end # Whenever a value is interpolated into a string-like structure, these diff --git a/lib/syntax_tree/dsl.rb b/lib/syntax_tree/dsl.rb index 1d1324df..860a1fe5 100644 --- a/lib/syntax_tree/dsl.rb +++ b/lib/syntax_tree/dsl.rb @@ -1,133 +1,1004 @@ # frozen_string_literal: true module SyntaxTree + # This module provides shortcuts for creating AST nodes. module DSL + # Create a new BEGINBlock node. + def BEGINBlock(lbrace, statements) + BEGINBlock.new( + lbrace: lbrace, + statements: statements, + location: Location.default + ) + end + + # Create a new CHAR node. + def CHAR(value) + CHAR.new(value: value, location: Location.default) + end + + # Create a new ENDBlock node. + def ENDBlock(lbrace, statements) + ENDBlock.new( + lbrace: lbrace, + statements: statements, + location: Location.default + ) + end + + # Create a new EndContent node. + def EndContent(value) + EndContent.new(value: value, location: Location.default) + end + + # Create a new AliasNode node. + def AliasNode(left, right) + AliasNode.new(left: left, right: right, location: Location.default) + end + + # Create a new ARef node. def ARef(collection, index) ARef.new(collection: collection, index: index, location: Location.default) end + # Create a new ARefField node. def ARefField(collection, index) - ARefField.new(collection: collection, index: index, location: Location.default) + ARefField.new( + collection: collection, + index: index, + location: Location.default + ) end + # Create a new ArgParen node. + def ArgParen(arguments) + ArgParen.new(arguments: arguments, location: Location.default) + end + + # Create a new Args node. def Args(parts) Args.new(parts: parts, location: Location.default) end - def ArgParen(arguments) - ArgParen.new(arguments: arguments, location: Location.default) + # Create a new ArgBlock node. + def ArgBlock(value) + ArgBlock.new(value: value, location: Location.default) + end + + # Create a new ArgStar node. + def ArgStar(value) + ArgStar.new(value: value, location: Location.default) + end + + # Create a new ArgsForward node. + def ArgsForward + ArgsForward.new(location: Location.default) + end + + # Create a new ArrayLiteral node. + def ArrayLiteral(lbracket, contents) + ArrayLiteral.new( + lbracket: lbracket, + contents: contents, + location: Location.default + ) end + # Create a new AryPtn node. + def AryPtn(constant, requireds, rest, posts) + AryPtn.new( + constant: constant, + requireds: requireds, + rest: rest, + posts: posts, + location: Location.default + ) + end + + # Create a new Assign node. def Assign(target, value) Assign.new(target: target, value: value, location: Location.default) end + # Create a new Assoc node. def Assoc(key, value) Assoc.new(key: key, value: value, location: Location.default) end - def Binary(left, operator, right) - Binary.new(left: left, operator: operator, right: right, location: Location.default) + # Create a new AssocSplat node. + def AssocSplat(value) + AssocSplat.new(value: value, location: Location.default) end - def BlockNode(opening, block_var, bodystmt) - BlockNode.new(opening: opening, block_var: block_var, bodystmt: bodystmt, location: Location.default) + # Create a new Backref node. + def Backref(value) + Backref.new(value: value, location: Location.default) + end + + # Create a new Backtick node. + def Backtick(value) + Backtick.new(value: value, location: Location.default) + end + + # Create a new BareAssocHash node. + def BareAssocHash(assocs) + BareAssocHash.new(assocs: assocs, location: Location.default) end - def BodyStmt(statements, rescue_clause, else_keyword, else_clause, ensure_clause) - BodyStmt.new(statements: statements, rescue_clause: rescue_clause, else_keyword: else_keyword, else_clause: else_clause, ensure_clause: ensure_clause, location: Location.default) + # Create a new Begin node. + def Begin(bodystmt) + Begin.new(bodystmt: bodystmt, location: Location.default) end + # Create a new PinnedBegin node. + def PinnedBegin(statement) + PinnedBegin.new(statement: statement, location: Location.default) + end + + # Create a new Binary node. + def Binary(left, operator, right) + Binary.new( + left: left, + operator: operator, + right: right, + location: Location.default + ) + end + + # Create a new BlockVar node. + def BlockVar(params, locals) + BlockVar.new(params: params, locals: locals, location: Location.default) + end + + # Create a new BlockArg node. + def BlockArg(name) + BlockArg.new(name: name, location: Location.default) + end + + # Create a new BodyStmt node. + def BodyStmt( + statements, + rescue_clause, + else_keyword, + else_clause, + ensure_clause + ) + BodyStmt.new( + statements: statements, + rescue_clause: rescue_clause, + else_keyword: else_keyword, + else_clause: else_clause, + ensure_clause: ensure_clause, + location: Location.default + ) + end + + # Create a new Break node. def Break(arguments) Break.new(arguments: arguments, location: Location.default) end + # Create a new CallNode node. def CallNode(receiver, operator, message, arguments) - CallNode.new(receiver: receiver, operator: operator, message: message, arguments: arguments, location: Location.default) + CallNode.new( + receiver: receiver, + operator: operator, + message: message, + arguments: arguments, + location: Location.default + ) end + # Create a new Case node. def Case(keyword, value, consequent) - Case.new(keyword: keyword, value: value, consequent: consequent, location: Location.default) + Case.new( + keyword: keyword, + value: value, + consequent: consequent, + location: Location.default + ) + end + + # Create a new RAssign node. + def RAssign(value, operator, pattern) + RAssign.new( + value: value, + operator: operator, + pattern: pattern, + location: Location.default + ) + end + + # Create a new ClassDeclaration node. + def ClassDeclaration(constant, superclass, bodystmt) + ClassDeclaration.new( + constant: constant, + superclass: superclass, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new Comma node. + def Comma(value) + Comma.new(value: value, location: Location.default) end + # Create a new Command node. + def Command(message, arguments, block) + Command.new( + message: message, + arguments: arguments, + block: block, + location: Location.default + ) + end + + # Create a new CommandCall node. + def CommandCall(receiver, operator, message, arguments, block) + CommandCall.new( + receiver: receiver, + operator: operator, + message: message, + arguments: arguments, + block: block, + location: Location.default + ) + end + + # Create a new Comment node. + def Comment(value, inline) + Comment.new(value: value, inline: inline, location: Location.default) + end + + # Create a new Const node. + def Const(value) + Const.new(value: value, location: Location.default) + end + + # Create a new ConstPathField node. + def ConstPathField(parent, constant) + ConstPathField.new( + parent: parent, + constant: constant, + location: Location.default + ) + end + + # Create a new ConstPathRef node. + def ConstPathRef(parent, constant) + ConstPathRef.new( + parent: parent, + constant: constant, + location: Location.default + ) + end + + # Create a new ConstRef node. + def ConstRef(constant) + ConstRef.new(constant: constant, location: Location.default) + end + + # Create a new CVar node. + def CVar(value) + CVar.new(value: value, location: Location.default) + end + + # Create a new DefNode node. + def DefNode(target, operator, name, params, bodystmt) + DefNode.new( + target: target, + operator: operator, + name: name, + params: params, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new Defined node. + def Defined(value) + Defined.new(value: value, location: Location.default) + end + + # Create a new BlockNode node. + def BlockNode(opening, block_var, bodystmt) + BlockNode.new( + opening: opening, + block_var: block_var, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new RangeNode node. + def RangeNode(left, operator, right) + RangeNode.new( + left: left, + operator: operator, + right: right, + location: Location.default + ) + end + + # Create a new DynaSymbol node. + def DynaSymbol(parts, quote) + DynaSymbol.new(parts: parts, quote: quote, location: Location.default) + end + + # Create a new Else node. + def Else(keyword, statements) + Else.new( + keyword: keyword, + statements: statements, + location: Location.default + ) + end + + # Create a new Elsif node. + def Elsif(predicate, statements, consequent) + Elsif.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new EmbDoc node. + def EmbDoc(value) + EmbDoc.new(value: value, location: Location.default) + end + + # Create a new EmbExprBeg node. + def EmbExprBeg(value) + EmbExprBeg.new(value: value, location: Location.default) + end + + # Create a new EmbExprEnd node. + def EmbExprEnd(value) + EmbExprEnd.new(value: value, location: Location.default) + end + + # Create a new EmbVar node. + def EmbVar(value) + EmbVar.new(value: value, location: Location.default) + end + + # Create a new Ensure node. + def Ensure(keyword, statements) + Ensure.new( + keyword: keyword, + statements: statements, + location: Location.default + ) + end + + # Create a new ExcessedComma node. + def ExcessedComma(value) + ExcessedComma.new(value: value, location: Location.default) + end + + # Create a new Field node. + def Field(parent, operator, name) + Field.new( + parent: parent, + operator: operator, + name: name, + location: Location.default + ) + end + + # Create a new FloatLiteral node. def FloatLiteral(value) FloatLiteral.new(value: value, location: Location.default) end + # Create a new FndPtn node. + def FndPtn(constant, left, values, right) + FndPtn.new( + constant: constant, + left: left, + values: values, + right: right, + location: Location.default + ) + end + + # Create a new For node. + def For(index, collection, statements) + For.new( + index: index, + collection: collection, + statements: statements, + location: Location.default + ) + end + + # Create a new GVar node. def GVar(value) GVar.new(value: value, location: Location.default) end + # Create a new HashLiteral node. def HashLiteral(lbrace, assocs) - HashLiteral.new(lbrace: lbrace, assocs: assocs, location: Location.default) + HashLiteral.new( + lbrace: lbrace, + assocs: assocs, + location: Location.default + ) + end + + # Create a new Heredoc node. + def Heredoc(beginning, ending, dedent, parts) + Heredoc.new( + beginning: beginning, + ending: ending, + dedent: dedent, + parts: parts, + location: Location.default + ) + end + + # Create a new HeredocBeg node. + def HeredocBeg(value) + HeredocBeg.new(value: value, location: Location.default) + end + + # Create a new HeredocEnd node. + def HeredocEnd(value) + HeredocEnd.new(value: value, location: Location.default) + end + + # Create a new HshPtn node. + def HshPtn(constant, keywords, keyword_rest) + HshPtn.new( + constant: constant, + keywords: keywords, + keyword_rest: keyword_rest, + location: Location.default + ) end + # Create a new Ident node. def Ident(value) Ident.new(value: value, location: Location.default) end + # Create a new IfNode node. def IfNode(predicate, statements, consequent) - IfNode.new(predicate: predicate, statements: statements, consequent: consequent, location: Location.default) + IfNode.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: Location.default + ) end + # Create a new IfOp node. + def IfOp(predicate, truthy, falsy) + IfOp.new( + predicate: predicate, + truthy: truthy, + falsy: falsy, + location: Location.default + ) + end + + # Create a new Imaginary node. + def Imaginary(value) + Imaginary.new(value: value, location: Location.default) + end + + # Create a new In node. + def In(pattern, statements, consequent) + In.new( + pattern: pattern, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new Int node. def Int(value) Int.new(value: value, location: Location.default) end + # Create a new IVar node. + def IVar(value) + IVar.new(value: value, location: Location.default) + end + + # Create a new Kw node. def Kw(value) Kw.new(value: value, location: Location.default) end + # Create a new KwRestParam node. + def KwRestParam(name) + KwRestParam.new(name: name, location: Location.default) + end + + # Create a new Label node. + def Label(value) + Label.new(value: value, location: Location.default) + end + + # Create a new LabelEnd node. + def LabelEnd(value) + LabelEnd.new(value: value, location: Location.default) + end + + # Create a new Lambda node. + def Lambda(params, statements) + Lambda.new( + params: params, + statements: statements, + location: Location.default + ) + end + + # Create a new LambdaVar node. + def LambdaVar(params, locals) + LambdaVar.new(params: params, locals: locals, location: Location.default) + end + + # Create a new LBrace node. def LBrace(value) LBrace.new(value: value, location: Location.default) end + # Create a new LBracket node. + def LBracket(value) + LBracket.new(value: value, location: Location.default) + end + + # Create a new LParen node. + def LParen(value) + LParen.new(value: value, location: Location.default) + end + + # Create a new MAssign node. + def MAssign(target, value) + MAssign.new(target: target, value: value, location: Location.default) + end + + # Create a new MethodAddBlock node. def MethodAddBlock(call, block) MethodAddBlock.new(call: call, block: block, location: Location.default) end + # Create a new MLHS node. + def MLHS(parts, comma) + MLHS.new(parts: parts, comma: comma, location: Location.default) + end + + # Create a new MLHSParen node. + def MLHSParen(contents, comma) + MLHSParen.new( + contents: contents, + comma: comma, + location: Location.default + ) + end + + # Create a new ModuleDeclaration node. + def ModuleDeclaration(constant, bodystmt) + ModuleDeclaration.new( + constant: constant, + bodystmt: bodystmt, + location: Location.default + ) + end + + # Create a new MRHS node. + def MRHS(parts) + MRHS.new(parts: parts, location: Location.default) + end + + # Create a new Next node. def Next(arguments) Next.new(arguments: arguments, location: Location.default) end + # Create a new Op node. def Op(value) Op.new(value: value, location: Location.default) end + # Create a new OpAssign node. def OpAssign(target, operator, value) - OpAssign.new(target: target, operator: operator, value: value, location: Location.default) - end - + OpAssign.new( + target: target, + operator: operator, + value: value, + location: Location.default + ) + end + + # Create a new Params node. + def Params(requireds, optionals, rest, posts, keywords, keyword_rest, block) + Params.new( + requireds: requireds, + optionals: optionals, + rest: rest, + posts: posts, + keywords: keywords, + keyword_rest: keyword_rest, + block: block, + location: Location.default + ) + end + + # Create a new Paren node. + def Paren(lparen, contents) + Paren.new(lparen: lparen, contents: contents, location: Location.default) + end + + # Create a new Period node. def Period(value) Period.new(value: value, location: Location.default) end + # Create a new Program node. def Program(statements) Program.new(statements: statements, location: Location.default) end + # Create a new QSymbols node. + def QSymbols(beginning, elements) + QSymbols.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new QSymbolsBeg node. + def QSymbolsBeg(value) + QSymbolsBeg.new(value: value, location: Location.default) + end + + # Create a new QWords node. + def QWords(beginning, elements) + QWords.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new QWordsBeg node. + def QWordsBeg(value) + QWordsBeg.new(value: value, location: Location.default) + end + + # Create a new RationalLiteral node. + def RationalLiteral(value) + RationalLiteral.new(value: value, location: Location.default) + end + + # Create a new RBrace node. + def RBrace(value) + RBrace.new(value: value, location: Location.default) + end + + # Create a new RBracket node. + def RBracket(value) + RBracket.new(value: value, location: Location.default) + end + + # Create a new Redo node. + def Redo + Redo.new(location: Location.default) + end + + # Create a new RegexpContent node. + def RegexpContent(beginning, parts) + RegexpContent.new( + beginning: beginning, + parts: parts, + location: Location.default + ) + end + + # Create a new RegexpBeg node. + def RegexpBeg(value) + RegexpBeg.new(value: value, location: Location.default) + end + + # Create a new RegexpEnd node. + def RegexpEnd(value) + RegexpEnd.new(value: value, location: Location.default) + end + + # Create a new RegexpLiteral node. + def RegexpLiteral(beginning, ending, parts) + RegexpLiteral.new( + beginning: beginning, + ending: ending, + parts: parts, + location: Location.default + ) + end + + # Create a new RescueEx node. + def RescueEx(exceptions, variable) + RescueEx.new( + exceptions: exceptions, + variable: variable, + location: Location.default + ) + end + + # Create a new Rescue node. + def Rescue(keyword, exception, statements, consequent) + Rescue.new( + keyword: keyword, + exception: exception, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new RescueMod node. + def RescueMod(statement, value) + RescueMod.new( + statement: statement, + value: value, + location: Location.default + ) + end + + # Create a new RestParam node. + def RestParam(name) + RestParam.new(name: name, location: Location.default) + end + + # Create a new Retry node. + def Retry + Retry.new(location: Location.default) + end + + # Create a new ReturnNode node. def ReturnNode(arguments) ReturnNode.new(arguments: arguments, location: Location.default) end + # Create a new RParen node. + def RParen(value) + RParen.new(value: value, location: Location.default) + end + + # Create a new SClass node. + def SClass(target, bodystmt) + SClass.new(target: target, bodystmt: bodystmt, location: Location.default) + end + + # Create a new Statements node. def Statements(body) Statements.new(nil, body: body, location: Location.default) end + # Create a new StringContent node. + def StringContent(parts) + StringContent.new(parts: parts, location: Location.default) + end + + # Create a new StringConcat node. + def StringConcat(left, right) + StringConcat.new(left: left, right: right, location: Location.default) + end + + # Create a new StringDVar node. + def StringDVar(variable) + StringDVar.new(variable: variable, location: Location.default) + end + + # Create a new StringEmbExpr node. + def StringEmbExpr(statements) + StringEmbExpr.new(statements: statements, location: Location.default) + end + + # Create a new StringLiteral node. + def StringLiteral(parts, quote) + StringLiteral.new(parts: parts, quote: quote, location: Location.default) + end + + # Create a new Super node. + def Super(arguments) + Super.new(arguments: arguments, location: Location.default) + end + + # Create a new SymBeg node. + def SymBeg(value) + SymBeg.new(value: value, location: Location.default) + end + + # Create a new SymbolContent node. + def SymbolContent(value) + SymbolContent.new(value: value, location: Location.default) + end + + # Create a new SymbolLiteral node. def SymbolLiteral(value) SymbolLiteral.new(value: value, location: Location.default) end + # Create a new Symbols node. + def Symbols(beginning, elements) + Symbols.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new SymbolsBeg node. + def SymbolsBeg(value) + SymbolsBeg.new(value: value, location: Location.default) + end + + # Create a new TLambda node. + def TLambda(value) + TLambda.new(value: value, location: Location.default) + end + + # Create a new TLamBeg node. + def TLamBeg(value) + TLamBeg.new(value: value, location: Location.default) + end + + # Create a new TopConstField node. + def TopConstField(constant) + TopConstField.new(constant: constant, location: Location.default) + end + + # Create a new TopConstRef node. + def TopConstRef(constant) + TopConstRef.new(constant: constant, location: Location.default) + end + + # Create a new TStringBeg node. + def TStringBeg(value) + TStringBeg.new(value: value, location: Location.default) + end + + # Create a new TStringContent node. + def TStringContent(value) + TStringContent.new(value: value, location: Location.default) + end + + # Create a new TStringEnd node. + def TStringEnd(value) + TStringEnd.new(value: value, location: Location.default) + end + + # Create a new Not node. + def Not(statement, parentheses) + Not.new( + statement: statement, + parentheses: parentheses, + location: Location.default + ) + end + + # Create a new Unary node. + def Unary(operator, statement) + Unary.new( + operator: operator, + statement: statement, + location: Location.default + ) + end + + # Create a new Undef node. + def Undef(symbols) + Undef.new(symbols: symbols, location: Location.default) + end + + # Create a new UnlessNode node. + def UnlessNode(predicate, statements, consequent) + UnlessNode.new( + predicate: predicate, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new UntilNode node. + def UntilNode(predicate, statements) + UntilNode.new( + predicate: predicate, + statements: statements, + location: Location.default + ) + end + + # Create a new VarField node. def VarField(value) VarField.new(value: value, location: Location.default) end + # Create a new VarRef node. def VarRef(value) VarRef.new(value: value, location: Location.default) end + # Create a new PinnedVarRef node. + def PinnedVarRef(value) + PinnedVarRef.new(value: value, location: Location.default) + end + + # Create a new VCall node. + def VCall(value) + VCall.new(value: value, location: Location.default) + end + + # Create a new VoidStmt node. + def VoidStmt + VoidStmt.new(location: Location.default) + end + + # Create a new When node. def When(arguments, statements, consequent) - When.new(arguments: arguments, statements: statements, consequent: consequent, location: Location.default) + When.new( + arguments: arguments, + statements: statements, + consequent: consequent, + location: Location.default + ) + end + + # Create a new WhileNode node. + def WhileNode(predicate, statements) + WhileNode.new( + predicate: predicate, + statements: statements, + location: Location.default + ) + end + + # Create a new Word node. + def Word(parts) + Word.new(parts: parts, location: Location.default) + end + + # Create a new Words node. + def Words(beginning, elements) + Words.new( + beginning: beginning, + elements: elements, + location: Location.default + ) + end + + # Create a new WordsBeg node. + def WordsBeg(value) + WordsBeg.new(value: value, location: Location.default) + end + + # Create a new XString node. + def XString(parts) + XString.new(parts: parts, location: Location.default) + end + + # Create a new XStringLiteral node. + def XStringLiteral(parts) + XStringLiteral.new(parts: parts, location: Location.default) + end + + # Create a new YieldNode node. + def YieldNode(arguments) + YieldNode.new(arguments: arguments, location: Location.default) + end + + # Create a new ZSuper node. + def ZSuper + ZSuper.new(location: Location.default) end end end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 822844fb..a29714a5 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -206,7 +206,12 @@ def inline_storage_for(name) def length insns.inject(0) do |sum, insn| - insn.is_a?(Array) ? sum + insn.length : sum + case insn + when Integer, Symbol + sum + else + sum + insn.length + end end end @@ -241,7 +246,38 @@ def to_a local_table.names, argument_options, [], - insns.map { |insn| serialize(insn) } + insns.map do |insn| + case insn + when Integer, Symbol + insn + when Array + case insn[0] + when :setlocal_WC_0, :setlocal_WC_1, :setlocal + iseq = self + + case insn[0] + when :setlocal_WC_1 + iseq = iseq.parent_iseq + when :setlocal + insn[2].times { iseq = iseq.parent_iseq } + end + + # Here we need to map the local variable index to the offset + # from the top of the stack where it will be stored. + [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] + when :send + # For any instructions that push instruction sequences onto the + # stack, we need to call #to_a on them as well. + [insn[0], insn[1], (insn[2].to_a if insn[2])] + when :once + [insn[0], insn[1].to_a, insn[2]] + else + insn + end + else + insn.to_a(self) + end + end ] end @@ -289,7 +325,14 @@ def singleton_class_child_iseq(location) def push(insn) insns << insn - insn + + case insn + when Integer, Symbol, Array + insn + else + stack.change_by(-insn.pops + insn.pushes) + insn + end end # This creates a new label at the current length of the instruction @@ -304,134 +347,106 @@ def event(name) end def adjuststack(number) - stack.change_by(-number) - push([:adjuststack, number]) + push(AdjustStack.new(number)) end def anytostring - stack.change_by(-2 + 1) - push([:anytostring]) + push(AnyToString.new) end - def branchif(index) - stack.change_by(-1) - push([:branchif, index]) + def branchif(label) + push(BranchIf.new(label)) end - def branchnil(index) - stack.change_by(-1) - push([:branchnil, index]) + def branchnil(label) + push(BranchNil.new(label)) end - def branchunless(index) - stack.change_by(-1) - push([:branchunless, index]) + def branchunless(label) + push(BranchUnless.new(label)) end - def checkkeyword(index, keyword_index) - stack.change_by(+1) - push([:checkkeyword, index, keyword_index]) + def checkkeyword(keyword_bits_index, keyword_index) + push(CheckKeyword.new(keyword_bits_index, keyword_index)) end def concatarray - stack.change_by(-2 + 1) - push([:concatarray]) + push(ConcatArray.new) end def concatstrings(number) - stack.change_by(-number + 1) - push([:concatstrings, number]) + push(ConcatStrings.new(number)) end def defined(type, name, message) - stack.change_by(-1 + 1) - push([:defined, type, name, message]) + push(Defined.new(type, name, message)) end def defineclass(name, class_iseq, flags) - stack.change_by(-2 + 1) - push([:defineclass, name, class_iseq, flags]) + push(DefineClass.new(name, class_iseq, flags)) end def definemethod(name, method_iseq) - stack.change_by(0) - push([:definemethod, name, method_iseq]) + push(DefineMethod.new(name, method_iseq)) end def definesmethod(name, method_iseq) - stack.change_by(-1) - push([:definesmethod, name, method_iseq]) + push(DefineSMethod.new(name, method_iseq)) end def dup - stack.change_by(-1 + 2) - push([:dup]) + push(Dup.new) end def duparray(object) - stack.change_by(+1) - push([:duparray, object]) + push(DupArray.new(object)) end def duphash(object) - stack.change_by(+1) - push([:duphash, object]) + push(DupHash.new(object)) end def dupn(number) - stack.change_by(+number) - push([:dupn, number]) + push(DupN.new(number)) end - def expandarray(length, flag) - stack.change_by(-1 + length) - push([:expandarray, length, flag]) + def expandarray(length, flags) + push(ExpandArray.new(length, flags)) end def getblockparam(index, level) - stack.change_by(+1) - push([:getblockparam, index, level]) + push(GetBlockParam.new(index, level)) end def getblockparamproxy(index, level) - stack.change_by(+1) - push([:getblockparamproxy, index, level]) + push(GetBlockParamProxy.new(index, level)) end def getclassvariable(name) - stack.change_by(+1) - - if RUBY_VERSION >= "3.0" - push([:getclassvariable, name, inline_storage_for(name)]) + if RUBY_VERSION < "3.0" + push(GetClassVariableUncached.new(name)) else - push([:getclassvariable, name]) + push(GetClassVariable.new(name, inline_storage_for(name))) end end def getconstant(name) - stack.change_by(-2 + 1) - push([:getconstant, name]) + push(GetConstant.new(name)) end def getglobal(name) - stack.change_by(+1) - push([:getglobal, name]) + push(GetGlobal.new(name)) end def getinstancevariable(name) - stack.change_by(+1) - - if RUBY_VERSION >= "3.2" - push([:getinstancevariable, name, inline_storage]) + if RUBY_VERSION < "3.2" + push(GetInstanceVariable.new(name, inline_storage_for(name))) else - inline_storage = inline_storage_for(name) - push([:getinstancevariable, name, inline_storage]) + push(GetInstanceVariable.new(name, inline_storage)) end end def getlocal(index, level) - stack.change_by(+1) - if operands_unification # Specialize the getlocal instruction based on the level of the # local variable. If it's 0 or 1, then there's a specialized @@ -439,14 +454,14 @@ def getlocal(index, level) # scope, respectively, and requires fewer operands. case level when 0 - push([:getlocal_WC_0, index]) + push(GetLocalWC0.new(index)) when 1 - push([:getlocal_WC_1, index]) + push(GetLocalWC1.new(index)) else - push([:getlocal, index, level]) + push(GetLocal.new(index, level)) end else - push([:getlocal, index, level]) + push(GetLocal.new(index, level)) end end @@ -762,38 +777,6 @@ def toregexp(options, length) def call_data(method_id, argc, flag = VM_CALL_ARGS_SIMPLE) { mid: method_id, flag: flag, orig_argc: argc } end - - def serialize(insn) - case insn[0] - when :checkkeyword, :getblockparam, :getblockparamproxy, :getlocal_WC_0, - :getlocal_WC_1, :getlocal, :setlocal_WC_0, :setlocal_WC_1, - :setlocal - iseq = self - - case insn[0] - when :getlocal_WC_1, :setlocal_WC_1 - iseq = iseq.parent_iseq - when :getblockparam, :getblockparamproxy, :getlocal, :setlocal - insn[2].times { iseq = iseq.parent_iseq } - end - - # Here we need to map the local variable index to the offset - # from the top of the stack where it will be stored. - [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] - when :defineclass - [insn[0], insn[1], insn[2].to_a, insn[3]] - when :definemethod, :definesmethod - [insn[0], insn[1], insn[2].to_a] - when :send - # For any instructions that push instruction sequences onto the - # stack, we need to call #to_a on them as well. - [insn[0], insn[1], (insn[2].to_a if insn[2])] - when :once - [insn[0], insn[1].to_a, insn[2]] - else - insn - end - end end # These constants correspond to the putspecialobject instruction. They are @@ -819,34 +802,5 @@ def serialize(insn) VM_CALL_ZSUPER = 1 << 10 VM_CALL_OPT_SEND = 1 << 11 VM_CALL_KW_SPLAT_MUT = 1 << 12 - - # These constants correspond to the value passed as part of the defined - # instruction. It's an enum defined in the CRuby codebase that tells that - # instruction what kind of defined check to perform. - DEFINED_NIL = 1 - DEFINED_IVAR = 2 - DEFINED_LVAR = 3 - DEFINED_GVAR = 4 - DEFINED_CVAR = 5 - DEFINED_CONST = 6 - DEFINED_METHOD = 7 - DEFINED_YIELD = 8 - DEFINED_ZSUPER = 9 - DEFINED_SELF = 10 - DEFINED_TRUE = 11 - DEFINED_FALSE = 12 - DEFINED_ASGN = 13 - DEFINED_EXPR = 14 - DEFINED_REF = 15 - DEFINED_FUNC = 16 - DEFINED_CONST_FROM = 17 - - # These constants correspond to the value passed in the flags as part of - # the defineclass instruction. - VM_DEFINECLASS_TYPE_CLASS = 0 - VM_DEFINECLASS_TYPE_SINGLETON_CLASS = 1 - VM_DEFINECLASS_TYPE_MODULE = 2 - VM_DEFINECLASS_FLAG_SCOPED = 8 - VM_DEFINECLASS_FLAG_HAS_SUPERCLASS = 16 end end diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb index 16098190..05c05705 100644 --- a/lib/syntax_tree/yarv/bf.rb +++ b/lib/syntax_tree/yarv/bf.rb @@ -5,460 +5,171 @@ module YARV # Parses the given source code into a syntax tree, compiles that syntax tree # into YARV bytecode. class Bf - class Node - def format(q) - Format.new(q).visit(self) - end - - def pretty_print(q) - PrettyPrint.new(q).visit(self) - end - end - - # The root node of the syntax tree. - class Root < Node - attr_reader :nodes, :location - - def initialize(nodes:, location:) - @nodes = nodes - @location = location - end - - def accept(visitor) - visitor.visit_root(self) - end - - def child_nodes - nodes - end - - alias deconstruct child_nodes - - def deconstruct_keys(keys) - { nodes: nodes, location: location } - end - end - - # [ ... ] - class Loop < Node - attr_reader :nodes, :location - - def initialize(nodes:, location:) - @nodes = nodes - @location = location - end - - def accept(visitor) - visitor.visit_loop(self) - end - - def child_nodes - nodes - end - - alias deconstruct child_nodes - - def deconstruct_keys(keys) - { nodes: nodes, location: location } - end - end - - # + - class Increment < Node - attr_reader :location - - def initialize(location:) - @location = location - end - - def accept(visitor) - visitor.visit_increment(self) - end - - def child_nodes - [] - end - - alias deconstruct child_nodes + attr_reader :source - def deconstruct_keys(keys) - { value: "+", location: location } - end + def initialize(source) + @source = source end - # - - class Decrement < Node - attr_reader :location - - def initialize(location:) - @location = location - end - - def accept(visitor) - visitor.visit_decrement(self) - end - - def child_nodes - [] - end - - alias deconstruct child_nodes + def compile + # Set up the top-level instruction sequence that will be returned. + iseq = InstructionSequence.new(:top, "", nil, location) + + # Set up the $tape global variable that will hold our state. + iseq.duphash({ 0 => 0 }) + iseq.setglobal(:$tape) + iseq.getglobal(:$tape) + iseq.putobject(0) + iseq.send(:default=, 1) + + # Set up the $cursor global variable that will hold the current position + # in the tape. + iseq.putobject(0) + iseq.setglobal(:$cursor) + + stack = [] + source + .each_char + .chunk do |char| + # For each character, we're going to assign a type to it. This + # allows a couple of optimizations to be made by combining multiple + # instructions into single instructions, e.g., +++ becomes a single + # change_by(3) instruction. + case char + when "+", "-" + :change + when ">", "<" + :shift + when "." + :output + when "," + :input + when "[", "]" + :loop + else + :ignored + end + end + .each do |type, chunk| + # For each chunk, we're going to emit the appropriate instruction. + case type + when :change + change_by(iseq, chunk.count("+") - chunk.count("-")) + when :shift + shift_by(iseq, chunk.count(">") - chunk.count("<")) + when :output + chunk.length.times { output_char(iseq) } + when :input + chunk.length.times { input_char(iseq) } + when :loop + chunk.each do |char| + case char + when "[" + stack << loop_start(iseq) + when "]" + loop_end(iseq, *stack.pop) + end + end + end + end - def deconstruct_keys(keys) - { value: "-", location: location } - end + iseq.leave + iseq end - # > - class ShiftRight < Node - attr_reader :location - - def initialize(location:) - @location = location - end - - def accept(visitor) - visitor.visit_shift_right(self) - end - - def child_nodes - [] - end - - alias deconstruct child_nodes + private - def deconstruct_keys(keys) - { value: ">", location: location } - end + # This is the location of the top instruction sequence, derived from the + # source string. + def location + Location.new( + start_line: 1, + start_char: 0, + start_column: 0, + end_line: source.count("\n") + 1, + end_char: source.size, + end_column: source.size - (source.rindex("\n") || 0) - 1 + ) end - # < - class ShiftLeft < Node - attr_reader :location - - def initialize(location:) - @location = location - end + # $tape[$cursor] += value + def change_by(iseq, value) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) - def accept(visitor) - visitor.visit_shift_left(self) - end + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(:[], 1) - def child_nodes - [] + if value < 0 + iseq.putobject(-value) + iseq.send(:-, 1) + else + iseq.putobject(value) + iseq.send(:+, 1) end - alias deconstruct child_nodes - - def deconstruct_keys(keys) - { value: "<", location: location } - end + iseq.send(:[]=, 2) end - # , - class Input < Node - attr_reader :location - - def initialize(location:) - @location = location - end + # $cursor += value + def shift_by(iseq, value) + iseq.getglobal(:$cursor) - def accept(visitor) - visitor.visit_input(self) + if value < 0 + iseq.putobject(-value) + iseq.send(:-, 1) + else + iseq.putobject(value) + iseq.send(:+, 1) end - def child_nodes - [] - end - - alias deconstruct child_nodes - - def deconstruct_keys(keys) - { value: ",", location: location } - end + iseq.setglobal(:$cursor) end - # . - class Output < Node - attr_reader :location + # $stdout.putc($tape[$cursor].chr) + def output_char(iseq) + iseq.getglobal(:$stdout) - def initialize(location:) - @location = location - end - - def accept(visitor) - visitor.visit_output(self) - end - - def child_nodes - [] - end - - alias deconstruct child_nodes - - def deconstruct_keys(keys) - { value: ".", location: location } - end - end + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(:[], 1) + iseq.send(:chr, 0) - # Allows visiting the syntax tree recursively. - class Visitor - def visit(node) - node.accept(self) - end - - def visit_all(nodes) - nodes.map { |node| visit(node) } - end - - def visit_child_nodes(node) - visit_all(node.child_nodes) - end - - # Visit a Root node. - alias visit_root visit_child_nodes - - # Visit a Loop node. - alias visit_loop visit_child_nodes - - # Visit an Increment node. - alias visit_increment visit_child_nodes - - # Visit a Decrement node. - alias visit_decrement visit_child_nodes - - # Visit a ShiftRight node. - alias visit_shift_right visit_child_nodes - - # Visit a ShiftLeft node. - alias visit_shift_left visit_child_nodes - - # Visit an Input node. - alias visit_input visit_child_nodes - - # Visit an Output node. - alias visit_output visit_child_nodes + iseq.send(:putc, 1) end - # Compiles the syntax tree into YARV bytecode. - class Compiler < Visitor - attr_reader :iseq - - def initialize - @iseq = InstructionSequence.new(:top, "", nil, Location.default) - end - - def visit_decrement(node) - change_by(-1) - end - - def visit_increment(node) - change_by(1) - end - - def visit_input(node) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.getglobal(:$stdin) - iseq.send(:getc, 0) - iseq.send(:ord, 0) - iseq.send(:[]=, 2) - end - - def visit_loop(node) - start_label = iseq.label - - # First, we're going to compare the value at the current cursor to 0. - # If it's 0, then we'll jump past the loop. Otherwise we'll execute - # the loop. - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.send(:[], 1) - iseq.putobject(0) - iseq.send(:==, 1) - branchunless = iseq.branchunless(-1) - - # Otherwise, here we'll execute the loop. - visit_nodes(node.nodes) - - # Now that we've visited all of the child nodes, we need to jump back - # to the start of the loop. - iseq.jump(start_label) - - # Now that we have all of the instructions in place, we can patch the - # branchunless to point to the next instruction for skipping the loop. - branchunless[1] = iseq.label - end - - def visit_output(node) - iseq.getglobal(:$stdout) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.send(:[], 1) - iseq.send(:chr, 0) - iseq.send(:putc, 1) - end - - def visit_root(node) - iseq.duphash({ 0 => 0 }) - iseq.setglobal(:$tape) - iseq.getglobal(:$tape) - iseq.putobject(0) - iseq.send(:default=, 1) - - iseq.putobject(0) - iseq.setglobal(:$cursor) - - visit_nodes(node.nodes) - - iseq.leave - iseq - end - - def visit_shift_left(node) - shift_by(-1) - end - - def visit_shift_right(node) - shift_by(1) - end - - private - - def change_by(value) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.getglobal(:$tape) - iseq.getglobal(:$cursor) - iseq.send(:[], 1) - - if value < 0 - iseq.putobject(-value) - iseq.send(:-, 1) - else - iseq.putobject(value) - iseq.send(:+, 1) - end - - iseq.send(:[]=, 2) - end - - def shift_by(value) - iseq.getglobal(:$cursor) - - if value < 0 - iseq.putobject(-value) - iseq.send(:-, 1) - else - iseq.putobject(value) - iseq.send(:+, 1) - end + # $tape[$cursor] = $stdin.getc.ord + def input_char(iseq) + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) - iseq.setglobal(:$cursor) - end + iseq.getglobal(:$stdin) + iseq.send(:getc, 0) + iseq.send(:ord, 0) - def visit_nodes(nodes) - nodes - .chunk do |child| - case child - when Increment, Decrement - :change - when ShiftLeft, ShiftRight - :shift - else - :default - end - end - .each do |type, children| - case type - when :change - value = 0 - children.each { |child| value += child.is_a?(Increment) ? 1 : -1 } - change_by(value) - when :shift - value = 0 - children.each { |child| value += child.is_a?(ShiftRight) ? 1 : -1 } - shift_by(value) - else - visit_all(children) - end - end - end + iseq.send(:[]=, 2) end - class Error < StandardError - end + # unless $tape[$cursor] == 0 + def loop_start(iseq) + start_label = iseq.label - attr_reader :source + iseq.getglobal(:$tape) + iseq.getglobal(:$cursor) + iseq.send(:[], 1) - def initialize(source) - @source = source - end + iseq.putobject(0) + iseq.send(:==, 1) - def compile - Root.new(nodes: parse_segment(source, 0), location: 0...source.length).accept(Compiler.new) + branchunless = iseq.branchunless(-1) + [start_label, branchunless] end - private - - def parse_segment(segment, offset) - index = 0 - nodes = [] - - while index < segment.length - location = offset + index - - case segment[index] - when "+" - nodes << Increment.new(location: location...(location + 1)) - index += 1 - when "-" - nodes << Decrement.new(location: location...(location + 1)) - index += 1 - when ">" - nodes << ShiftRight.new(location: location...(location + 1)) - index += 1 - when "<" - nodes << ShiftLeft.new(location: location...(location + 1)) - index += 1 - when "." - nodes << Output.new(location: location...(location + 1)) - index += 1 - when "," - nodes << Input.new(location: location...(location + 1)) - index += 1 - when "[" - matched = 1 - end_index = index + 1 - - while matched != 0 && end_index < segment.length - case segment[end_index] - when "[" - matched += 1 - when "]" - matched -= 1 - end - - end_index += 1 - end - - raise Error, "Unmatched start loop" if matched != 0 - - content = segment[(index + 1)...(end_index - 1)] - nodes << Loop.new( - nodes: parse_segment(content, offset + index + 1), - location: location...(offset + end_index) - ) - - index = end_index - when "]" - raise Error, "Unmatched end loop" - else - index += 1 - end - end - - nodes + # Jump back to the start of the loop. + def loop_end(iseq, start_label, branchunless) + iseq.jump(start_label) + branchunless.patch!(iseq) end end end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 566ed984..7a6e8893 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -5,15 +5,33 @@ module YARV # This class is responsible for taking a compiled instruction sequence and # walking through it to generate equivalent Ruby code. class Disassembler + # When we're disassmebling, we use a looped case statement to emulate + # jumping around in the same way the virtual machine would. This class + # provides convenience methods for generating the AST nodes that have to + # do with that label. + class DisasmLabel + include DSL + attr_reader :name + + def initialize(name) + @name = name + end + + def field + VarField(Ident(name)) + end + + def ref + VarRef(Ident(name)) + end + end + include DSL - attr_reader :iseq, :label_name, :label_field, :label_ref + attr_reader :iseq, :disasm_label def initialize(iseq) @iseq = iseq - - @label_name = "__disasm_label" - @label_field = VarField(Ident(label_name)) - @label_ref = VarRef(Ident(label_name)) + @disasm_label = DisasmLabel.new("__disasm_label") end def to_ruby @@ -37,143 +55,198 @@ def disassemble(iseq) clause = [] iseq.insns.each do |insn| - if insn.is_a?(Symbol) && insn.start_with?("label_") - clause << Assign(label_field, node_for(insn)) unless clause.last.is_a?(Next) - clauses[label] = clause - clause = [] - label = insn - next - end + case insn + when Symbol + if insn.start_with?("label_") + unless clause.last.is_a?(Next) + clause << Assign(disasm_label.field, node_for(insn)) + end + + clauses[label] = clause + clause = [] + label = insn + end + when BranchUnless + body = [ + Assign(disasm_label.field, node_for(insn.label)), + Next(Args([])) + ] - case insn[0] - when :branchunless - clause << IfNode(clause.pop, Statements([Assign(label_field, node_for(insn[1])), Next(Args([]))]), nil) - when :dup + clause << IfNode(clause.pop, Statements(body), nil) + when Dup clause << clause.last - when :duphash - assocs = insn[1].map { |key, value| Assoc(node_for(key), node_for(value)) } + when DupHash + assocs = + insn.object.map do |key, value| + Assoc(node_for(key), node_for(value)) + end + clause << HashLiteral(LBrace("{"), assocs) - when :getglobal - clause << VarRef(GVar(insn[1].to_s)) - when :getlocal_WC_0 - clause << VarRef(Ident(local_name(insn[1], 0))) - when :jump - clause << Assign(label_field, node_for(insn[1])) - clause << Next(Args([])) - when :leave - value = Args([clause.pop]) - clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) - when :opt_and - left, right = clause.pop(2) - clause << Binary(left, :&, right) - when :opt_aref - collection, arg = clause.pop(2) - clause << ARef(collection, Args([arg])) - when :opt_aset - collection, arg, value = clause.pop(3) - - if value.is_a?(Binary) && value.left.is_a?(ARef) && collection === value.left.collection && arg === value.left.index.parts[0] - clause << OpAssign(ARefField(collection, Args([arg])), Op("#{value.operator}="), value.right) - else - clause << Assign(ARefField(collection, Args([arg])), value) - end - when :opt_div - left, right = clause.pop(2) - clause << Binary(left, :/, right) - when :opt_eq - left, right = clause.pop(2) - clause << Binary(left, :==, right) - when :opt_ge - left, right = clause.pop(2) - clause << Binary(left, :>=, right) - when :opt_gt - left, right = clause.pop(2) - clause << Binary(left, :>, right) - when :opt_le - left, right = clause.pop(2) - clause << Binary(left, :<=, right) - when :opt_lt - left, right = clause.pop(2) - clause << Binary(left, :<, right) - when :opt_ltlt - left, right = clause.pop(2) - clause << Binary(left, :<<, right) - when :opt_minus - left, right = clause.pop(2) - clause << Binary(left, :-, right) - when :opt_mod - left, right = clause.pop(2) - clause << Binary(left, :%, right) - when :opt_mult - left, right = clause.pop(2) - clause << Binary(left, :*, right) - when :opt_neq - left, right = clause.pop(2) - clause << Binary(left, :"!=", right) - when :opt_or - left, right = clause.pop(2) - clause << Binary(left, :|, right) - when :opt_plus - left, right = clause.pop(2) - clause << Binary(left, :+, right) - when :opt_send_without_block - if insn[1][:flag] & VM_CALL_FCALL > 0 - if insn[1][:orig_argc] == 0 - clause.pop - clause << CallNode(nil, nil, Ident(insn[1][:mid]), Args([])) - elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") - _receiver, argument = clause.pop(2) - clause << Assign(CallNode(nil, nil, Ident(insn[1][:mid][0..-2]), nil), argument) + when GetGlobal + clause << VarRef(GVar(insn.name.to_s)) + when GetLocalWC0 + local = iseq.local_table.locals[insn.index] + clause << VarRef(Ident(local.name.to_s)) + when Array + case insn[0] + when :jump + clause << Assign(disasm_label.field, node_for(insn[1])) + clause << Next(Args([])) + when :leave + value = Args([clause.pop]) + clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) + when :opt_and + left, right = clause.pop(2) + clause << Binary(left, :&, right) + when :opt_aref + collection, arg = clause.pop(2) + clause << ARef(collection, Args([arg])) + when :opt_aset + collection, arg, value = clause.pop(3) + + clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && + collection === value.left.collection && + arg === value.left.index.parts[0] + OpAssign( + ARefField(collection, Args([arg])), + Op("#{value.operator}="), + value.right + ) else - _receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) - clause << CallNode(nil, nil, Ident(insn[1][:mid]), ArgParen(Args(arguments))) + Assign(ARefField(collection, Args([arg])), value) end - else - if insn[1][:orig_argc] == 0 - clause << CallNode(clause.pop, Period("."), Ident(insn[1][:mid]), nil) - elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") - receiver, argument = clause.pop(2) - clause << Assign(CallNode(receiver, Period("."), Ident(insn[1][:mid][0..-2]), nil), argument) + when :opt_div + left, right = clause.pop(2) + clause << Binary(left, :/, right) + when :opt_eq + left, right = clause.pop(2) + clause << Binary(left, :==, right) + when :opt_ge + left, right = clause.pop(2) + clause << Binary(left, :>=, right) + when :opt_gt + left, right = clause.pop(2) + clause << Binary(left, :>, right) + when :opt_le + left, right = clause.pop(2) + clause << Binary(left, :<=, right) + when :opt_lt + left, right = clause.pop(2) + clause << Binary(left, :<, right) + when :opt_ltlt + left, right = clause.pop(2) + clause << Binary(left, :<<, right) + when :opt_minus + left, right = clause.pop(2) + clause << Binary(left, :-, right) + when :opt_mod + left, right = clause.pop(2) + clause << Binary(left, :%, right) + when :opt_mult + left, right = clause.pop(2) + clause << Binary(left, :*, right) + when :opt_neq + left, right = clause.pop(2) + clause << Binary(left, :"!=", right) + when :opt_or + left, right = clause.pop(2) + clause << Binary(left, :|, right) + when :opt_plus + left, right = clause.pop(2) + clause << Binary(left, :+, right) + when :opt_send_without_block + if insn[1][:flag] & VM_CALL_FCALL > 0 + if insn[1][:orig_argc] == 0 + clause.pop + clause << CallNode(nil, nil, Ident(insn[1][:mid]), Args([])) + elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") + _receiver, argument = clause.pop(2) + clause << Assign( + CallNode(nil, nil, Ident(insn[1][:mid][0..-2]), nil), + argument + ) + else + _receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) + clause << CallNode( + nil, + nil, + Ident(insn[1][:mid]), + ArgParen(Args(arguments)) + ) + end else - receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) - clause << CallNode(receiver, Period("."), Ident(insn[1][:mid]), ArgParen(Args(arguments))) + if insn[1][:orig_argc] == 0 + clause << CallNode( + clause.pop, + Period("."), + Ident(insn[1][:mid]), + nil + ) + elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") + receiver, argument = clause.pop(2) + clause << Assign( + CallNode( + receiver, + Period("."), + Ident(insn[1][:mid][0..-2]), + nil + ), + argument + ) + else + receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) + clause << CallNode( + receiver, + Period("."), + Ident(insn[1][:mid]), + ArgParen(Args(arguments)) + ) + end end - end - when :putobject - case insn[1] - when Float - clause << FloatLiteral(insn[1].inspect) - when Integer - clause << Int(insn[1].inspect) - else - raise "Unknown object type: #{insn[1].class.name}" - end - when :putobject_INT2FIX_0_ - clause << Int("0") - when :putobject_INT2FIX_1_ - clause << Int("1") - when :putself - clause << VarRef(Kw("self")) - when :setglobal - target = GVar(insn[1].to_s) - value = clause.pop - - if value.is_a?(Binary) && VarRef(target) === value.left - clause << OpAssign(VarField(target), Op("#{value.operator}="), value.right) - else - clause << Assign(VarField(target), value) - end - when :setlocal_WC_0 - target = Ident(local_name(insn[1], 0)) - value = clause.pop + when :putobject + case insn[1] + when Float + clause << FloatLiteral(insn[1].inspect) + when Integer + clause << Int(insn[1].inspect) + else + raise "Unknown object type: #{insn[1].class.name}" + end + when :putobject_INT2FIX_0_ + clause << Int("0") + when :putobject_INT2FIX_1_ + clause << Int("1") + when :putself + clause << VarRef(Kw("self")) + when :setglobal + target = GVar(insn[1].to_s) + value = clause.pop - if value.is_a?(Binary) && VarRef(target) === value.left - clause << OpAssign(VarField(target), Op("#{value.operator}="), value.right) + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign( + VarField(target), + Op("#{value.operator}="), + value.right + ) + else + Assign(VarField(target), value) + end + when :setlocal_WC_0 + target = Ident(local_name(insn[1], 0)) + value = clause.pop + + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign( + VarField(target), + Op("#{value.operator}="), + value.right + ) + else + Assign(VarField(target), value) + end else - clause << Assign(VarField(target), value) + raise "Unknown instruction #{insn[0]}" end - else - raise "Unknown instruction #{insn[0]}" end end @@ -185,31 +258,44 @@ def disassemble(iseq) # Here we're going to build up a big case statement that will handle all # of the different labels. current = nil - clauses.reverse_each do |label, clause| - current = When(Args([node_for(label)]), Statements(clause), current) + clauses.reverse_each do |current_label, current_clause| + current = + When( + Args([node_for(current_label)]), + Statements(current_clause), + current + ) end - switch = Case(Kw("case"), label_ref, current) + switch = Case(Kw("case"), disasm_label.ref, current) # Here we're going to make sure that any locals that were established in # the label_0 block are initialized so that scoping rules work # correctly. stack = [] - locals = [label_name] + locals = [disasm_label.name] clauses[:label_0].each do |node| - if node.is_a?(Assign) && node.target.is_a?(VarField) && node.target.value.is_a?(Ident) + if node.is_a?(Assign) && node.target.is_a?(VarField) && + node.target.value.is_a?(Ident) value = node.target.value.value next if locals.include?(value) stack << Assign(node.target, VarRef(Kw("nil"))) - locals << value + locals << value end end # Finally, we'll set up the initial label and loop the entire case # statement. - stack << Assign(label_field, node_for(:label_0)) - stack << MethodAddBlock(CallNode(nil, nil, Ident("loop"), Args([])), BlockNode(Kw("do"), nil, BodyStmt(Statements([switch]), nil, nil, nil, nil))) + stack << Assign(disasm_label.field, node_for(:label_0)) + stack << MethodAddBlock( + CallNode(nil, nil, Ident("loop"), Args([])), + BlockNode( + Kw("do"), + nil, + BodyStmt(Statements([switch]), nil, nil, nil, nil) + ) + ) Statements(stack) end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb new file mode 100644 index 00000000..c50c5c84 --- /dev/null +++ b/lib/syntax_tree/yarv/instructions.rb @@ -0,0 +1,1071 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # ### Summary + # + # `adjuststack` accepts a single integer argument and removes that many + # elements from the top of the stack. + # + # ### Usage + # + # ~~~ruby + # x = [true] + # x[0] ||= nil + # x[0] + # ~~~ + # + class AdjustStack + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:adjuststack, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 0 + end + end + + # ### Summary + # + # `anytostring` ensures that the value on top of the stack is a string. + # + # It pops two values off the stack. If the first value is a string it + # pushes it back on the stack. If the first value is not a string, it uses + # Ruby's built in string coercion to coerce the second value to a string + # and then pushes that back on the stack. + # + # This is used in conjunction with `objtostring` as a fallback for when an + # object's `to_s` method does not return a string. + # + # ### Usage + # + # ~~~ruby + # "#{5}" + # ~~~ + # + class AnyToString + def to_a(_iseq) + [:anytostring] + end + + def length + 1 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `branchif` has one argument: the jump index. It pops one value off the + # stack: the jump condition. + # + # If the value popped off the stack is true, `branchif` jumps to + # the jump index and continues executing there. + # + # ### Usage + # + # ~~~ruby + # x = true + # x ||= "foo" + # puts x + # ~~~ + # + class BranchIf + attr_reader :label + + def initialize(label) + @label = label + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:branchif, label] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `branchnil` has one argument: the jump index. It pops one value off the + # stack: the jump condition. + # + # If the value popped off the stack is nil, `branchnil` jumps to + # the jump index and continues executing there. + # + # ### Usage + # + # ~~~ruby + # x = nil + # if x&.to_s + # puts "hi" + # end + # ~~~ + # + class BranchNil + attr_reader :label + + def initialize(label) + @label = label + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:branchnil, label] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `branchunless` has one argument: the jump index. It pops one value off + # the stack: the jump condition. + # + # If the value popped off the stack is false or nil, `branchunless` jumps + # to the jump index and continues executing there. + # + # ### Usage + # + # ~~~ruby + # if 2 + 3 + # puts "foo" + # end + # ~~~ + # + class BranchUnless + attr_reader :label + + def initialize(label) + @label = label + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:branchunless, label] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `checkkeyword` checks if a keyword was passed at the callsite that + # called into the method represented by the instruction sequence. It has + # two arguments: the index of the local variable that stores the keywords + # metadata and the index of the keyword within that metadata. It pushes + # a boolean onto the stack indicating whether or not the keyword was + # given. + # + # ### Usage + # + # ~~~ruby + # def evaluate(value: rand) + # value + # end + # + # evaluate(value: 3) + # ~~~ + # + class CheckKeyword + attr_reader :keyword_bits_index, :keyword_index + + def initialize(keyword_bits_index, keyword_index) + @keyword_bits_index = keyword_bits_index + @keyword_index = keyword_index + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(iseq) + [ + :checkkeyword, + iseq.local_table.offset(keyword_bits_index), + keyword_index + ] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `concatarray` concatenates the two Arrays on top of the stack. + # + # It coerces the two objects at the top of the stack into Arrays by + # calling `to_a` if necessary, and makes sure to `dup` the first Array if + # it was already an Array, to avoid mutating it when concatenating. + # + # ### Usage + # + # ~~~ruby + # [1, *2] + # ~~~ + # + class ConcatArray + def to_a(_iseq) + [:concatarray] + end + + def length + 1 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `concatstrings` pops a number of strings from the stack joins them + # together into a single string and pushes that string back on the stack. + # + # This does no coercion and so is always used in conjunction with + # `objtostring` and `anytostring` to ensure the stack contents are always + # strings. + # + # ### Usage + # + # ~~~ruby + # "#{5}" + # ~~~ + # + class ConcatStrings + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:concatstrings, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `defined` checks if the top value of the stack is defined. If it is, it + # pushes its value onto the stack. Otherwise it pushes `nil`. + # + # ### Usage + # + # ~~~ruby + # defined?(x) + # ~~~ + # + class Defined + NIL = 1 + IVAR = 2 + LVAR = 3 + GVAR = 4 + CVAR = 5 + CONST = 6 + METHOD = 7 + YIELD = 8 + ZSUPER = 9 + SELF = 10 + TRUE = 11 + FALSE = 12 + ASGN = 13 + EXPR = 14 + REF = 15 + FUNC = 16 + CONST_FROM = 17 + + attr_reader :type, :name, :message + + def initialize(type, name, message) + @type = type + @name = name + @message = message + end + + def to_a(_iseq) + [:defined, type, name, message] + end + + def length + 4 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `defineclass` defines a class. First it pops the superclass off the + # stack, then it pops the object off the stack that the class should be + # defined under. It has three arguments: the name of the constant, the + # instruction sequence associated with the class, and various flags that + # indicate if it is a singleton class, a module, or a regular class. + # + # ### Usage + # + # ~~~ruby + # class Foo + # end + # ~~~ + # + class DefineClass + TYPE_CLASS = 0 + TYPE_SINGLETON_CLASS = 1 + TYPE_MODULE = 2 + FLAG_SCOPED = 8 + FLAG_HAS_SUPERCLASS = 16 + + attr_reader :name, :class_iseq, :flags + + def initialize(name, class_iseq, flags) + @name = name + @class_iseq = class_iseq + @flags = flags + end + + def to_a(_iseq) + [:defineclass, name, class_iseq.to_a, flags] + end + + def length + 4 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `definemethod` defines a method on the class of the current value of + # `self`. It accepts two arguments. The first is the name of the method + # being defined. The second is the instruction sequence representing the + # body of the method. + # + # ### Usage + # + # ~~~ruby + # def value = "value" + # ~~~ + # + class DefineMethod + attr_reader :name, :method_iseq + + def initialize(name, method_iseq) + @name = name + @method_iseq = method_iseq + end + + def to_a(_iseq) + [:definemethod, name, method_iseq.to_a] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `definesmethod` defines a method on the singleton class of the current + # value of `self`. It accepts two arguments. The first is the name of the + # method being defined. The second is the instruction sequence representing + # the body of the method. It pops the object off the stack that the method + # should be defined on. + # + # ### Usage + # + # ~~~ruby + # def self.value = "value" + # ~~~ + # + class DefineSMethod + attr_reader :name, :method_iseq + + def initialize(name, method_iseq) + @name = name + @method_iseq = method_iseq + end + + def to_a(_iseq) + [:definesmethod, name, method_iseq.to_a] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `dup` copies the top value of the stack and pushes it onto the stack. + # + # ### Usage + # + # ~~~ruby + # $global = 5 + # ~~~ + # + class Dup + def to_a(_iseq) + [:dup] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + 2 + end + end + + # ### Summary + # + # `duparray` dups an Array literal and pushes it onto the stack. + # + # ### Usage + # + # ~~~ruby + # [true] + # ~~~ + # + class DupArray + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:duparray, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `duphash` dups a Hash literal and pushes it onto the stack. + # + # ### Usage + # + # ~~~ruby + # { a: 1 } + # ~~~ + # + class DupHash + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:duphash, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `dupn` duplicates the top `n` stack elements. + # + # ### Usage + # + # ~~~ruby + # Object::X ||= true + # ~~~ + # + class DupN + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:dupn, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + number * 2 + end + end + + # ### Summary + # + # `expandarray` looks at the top of the stack, and if the value is an array + # it replaces it on the stack with `number` elements of the array, or `nil` + # if the elements are missing. + # + # ### Usage + # + # ~~~ruby + # x, = [true, false, nil] + # ~~~ + # + class ExpandArray + attr_reader :number, :flags + + def initialize(number, flags) + @number = number + @flags = flags + end + + def to_a(_iseq) + [:expandarray, number, flags] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + number + end + end + + # ### Summary + # + # `getblockparam` is a similar instruction to `getlocal` in that it looks + # for a local variable in the current instruction sequence's local table and + # walks recursively up the parent instruction sequences until it finds it. + # The local it retrieves, however, is a special block local that was passed + # to the current method. It pushes the value of the block local onto the + # stack. + # + # ### Usage + # + # ~~~ruby + # def foo(&block) + # block + # end + # ~~~ + # + class GetBlockParam + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = iseq.parent_iseq } + [:getblockparam, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getblockparamproxy` is almost the same as `getblockparam` except that it + # pushes a proxy object onto the stack instead of the actual value of the + # block local. This is used when a method is being called on the block + # local. + # + # ### Usage + # + # ~~~ruby + # def foo(&block) + # block.call + # end + # ~~~ + # + class GetBlockParamProxy + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = iseq.parent_iseq } + [:getblockparamproxy, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getclassvariable` looks for a class variable in the current class and + # pushes its value onto the stack. It uses an inline cache to reduce the + # need to lookup the class variable in the class hierarchy every time. + # + # ### Usage + # + # ~~~ruby + # @@class_variable + # ~~~ + # + class GetClassVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def to_a(_iseq) + [:getclassvariable, name, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getclassvariable` looks for a class variable in the current class and + # pushes its value onto the stack. + # + # This version of the `getclassvariable` instruction is no longer used since + # in Ruby 3.0 it gained an inline cache.` + # + # ### Usage + # + # ~~~ruby + # @@class_variable + # ~~~ + # + class GetClassVariableUncached + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getclassvariable, name] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getconstant` performs a constant lookup and pushes the value of the + # constant onto the stack. It pops both the class it should look in and + # whether or not it should look globally as well. + # + # This instruction is no longer used since in Ruby 3.2 it was replaced by + # the consolidated `opt_getconstant_path` instruction. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class GetConstant + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getconstant, name] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getglobal` pushes the value of a global variables onto the stack. + # + # ### Usage + # + # ~~~ruby + # $$ + # ~~~ + # + class GetGlobal + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getglobal, name] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getinstancevariable` pushes the value of an instance variable onto the + # stack. It uses an inline cache to avoid having to look up the instance + # variable in the class hierarchy every time. + # + # This instruction has two forms, but both have the same structure. Before + # Ruby 3.2, the inline cache corresponded to both the get and set + # instructions and could be shared. Since Ruby 3.2, it uses object shapes + # instead so the caches are unique per instruction. + # + # ### Usage + # + # ~~~ruby + # @instance_variable + # ~~~ + # + class GetInstanceVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def to_a(_iseq) + [:getinstancevariable, name, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getlocal_WC_0` is a specialized version of the `getlocal` instruction. It + # fetches the value of a local variable from the current frame determined by + # the index given as its only argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # value + # ~~~ + # + class GetLocalWC0 + attr_reader :index + + def initialize(index) + @index = index + end + + def to_a(iseq) + [:getlocal_WC_0, iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getlocal_WC_1` is a specialized version of the `getlocal` instruction. It + # fetches the value of a local variable from the parent frame determined by + # the index given as its only argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # self.then { value } + # ~~~ + # + class GetLocalWC1 + attr_reader :index + + def initialize(index) + @index = index + end + + def to_a(iseq) + [:getlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getlocal` fetches the value of a local variable from a frame determined + # by the level and index arguments. The level is the number of frames back + # to look and the index is the index in the local table. It pushes the value + # it finds onto the stack. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # tap { tap { value } } + # ~~~ + # + class GetLocal + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = current.parent_iseq } + [:getlocal, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + end +end diff --git a/test/yarv_test.rb b/test/yarv_test.rb index da348224..55cdb657 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -26,7 +26,7 @@ class YARVTest < Minitest::Test "1 << 2" => "break 1 << 2\n", "1 >> 2" => "break 1.>>(2)\n", "1 ** 2" => "break 1.**(2)\n", - "a = 1; a" => "a = 1\nbreak a\n", + "a = 1; a" => "a = 1\nbreak a\n" }.freeze CASES.each do |source, expected| @@ -35,6 +35,15 @@ class YARVTest < Minitest::Test end end + def test_bf + hello_world = + "++++++++[>++++[>++>+++>+++>+<<<<-]>+>+>->>+[<]<-]" \ + ">>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++." + + iseq = YARV::Bf.new(hello_world).compile + Formatter.format(hello_world, YARV::Disassembler.new(iseq).to_ruby) + end + private def assert_disassembles(expected, source) From 441bc01d9f68e07c3acd891c915f950652f70176 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 22:00:09 -0500 Subject: [PATCH 054/104] opt_aref_with --- lib/syntax_tree/compiler.rb | 29 +++++++++++++++++++++++++++++ lib/syntax_tree/yarv.rb | 5 +++++ test/compiler_test.rb | 1 + 3 files changed, 35 insertions(+) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 8327a080..106c3ca3 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -158,6 +158,21 @@ def visit_tstring_content(node) node.value end + def visit_var_ref(node) + raise CompilationError unless node.value.is_a?(Kw) + + case node.value.value + when "nil" + nil + when "true" + true + when "false" + false + else + raise CompilationError + end + end + def visit_word(node) if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) node.parts.first.value @@ -258,6 +273,20 @@ def visit_alias(node) def visit_aref(node) visit(node.collection) + + if !frozen_string_literal && specialized_instruction && (node.index.parts.length == 1) + arg = node.index.parts.first + + if arg.is_a?(StringLiteral) && (arg.parts.length == 1) + string_part = arg.parts.first + + if string_part.is_a?(TStringContent) + iseq.opt_aref_with(string_part.value, :[], 1) + return + end + end + end + visit(node.index) iseq.send(:[], 1) end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index a29714a5..57a21f2c 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -527,6 +527,11 @@ def once(postexe_iseq, inline_storage) push([:once, postexe_iseq, inline_storage]) end + def opt_aref_with(object, method_id, argc, flag = VM_CALL_ARGS_SIMPLE) + stack.change_by(-1 + 1) + push([:opt_aref_with, object, call_data(method_id, argc, flag)]) + end + def opt_getconstant_path(names) if RUBY_VERSION >= "3.2" stack.change_by(+1) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 27bf993d..485e92fc 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -201,6 +201,7 @@ class CompilerTest < Minitest::Test "foo[bar] ||= 1", "foo[bar] <<= 1", "foo[bar] ^= 1", + "foo['true']", # Constants (single) "Foo", "Foo = 1", From cc24d7f4198beb08cb3c37e244535afee013554b Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 22:04:03 -0500 Subject: [PATCH 055/104] opt_aset_with --- lib/syntax_tree/compiler.rb | 18 ++++++++++++++++++ lib/syntax_tree/yarv.rb | 5 +++++ test/compiler_test.rb | 1 + 3 files changed, 24 insertions(+) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 106c3ca3..91ec3d30 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -337,6 +337,24 @@ def visit_array(node) def visit_assign(node) case node.target when ARefField + if !frozen_string_literal && specialized_instruction && (node.target.index.parts.length == 1) + arg = node.target.index.parts.first + + if arg.is_a?(StringLiteral) && (arg.parts.length == 1) + string_part = arg.parts.first + + if string_part.is_a?(TStringContent) + visit(node.target.collection) + visit(node.value) + iseq.swap + iseq.topn(1) + iseq.opt_aset_with(string_part.value, :[]=, 2) + iseq.pop + return + end + end + end + iseq.putnil visit(node.target.collection) visit(node.target.index) diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 57a21f2c..0c4c3fc9 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -532,6 +532,11 @@ def opt_aref_with(object, method_id, argc, flag = VM_CALL_ARGS_SIMPLE) push([:opt_aref_with, object, call_data(method_id, argc, flag)]) end + def opt_aset_with(object, method_id, argc, flag = VM_CALL_ARGS_SIMPLE) + stack.change_by(-2 + 1) + push([:opt_aset_with, object, call_data(method_id, argc, flag)]) + end + def opt_getconstant_path(names) if RUBY_VERSION >= "3.2" stack.change_by(+1) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 485e92fc..98559664 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -202,6 +202,7 @@ class CompilerTest < Minitest::Test "foo[bar] <<= 1", "foo[bar] ^= 1", "foo['true']", + "foo['true'] = 1", # Constants (single) "Foo", "Foo = 1", From 5bd3463db4f0c4b24fb7068c73be802c7b49e9fe Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 22:08:38 -0500 Subject: [PATCH 056/104] setblockparam --- lib/syntax_tree/compiler.rb | 9 +++++++-- lib/syntax_tree/yarv.rb | 9 +++++++-- test/compiler_test.rb | 1 + 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 91ec3d30..8e1a0eaf 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -415,8 +415,13 @@ def visit_assign(node) when GVar iseq.setglobal(node.target.value.value.to_sym) when Ident - local_variable = visit(node.target) - iseq.setlocal(local_variable.index, local_variable.level) + lookup = visit(node.target) + + if lookup.local.is_a?(YARV::LocalTable::BlockLocal) + iseq.setblockparam(lookup.index, lookup.level) + else + iseq.setlocal(lookup.index, lookup.level) + end when IVar iseq.setinstancevariable(node.target.value.value.to_sym) end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 0c4c3fc9..a204989e 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -252,13 +252,13 @@ def to_a insn when Array case insn[0] - when :setlocal_WC_0, :setlocal_WC_1, :setlocal + when :setlocal_WC_0, :setlocal_WC_1, :setlocal, :setblockparam iseq = self case insn[0] when :setlocal_WC_1 iseq = iseq.parent_iseq - when :setlocal + when :setlocal, :setblockparam insn[2].times { iseq = iseq.parent_iseq } end @@ -704,6 +704,11 @@ def send(method_id, argc, flag = VM_CALL_ARGS_SIMPLE, block_iseq = nil) end end + def setblockparam(index, level) + stack.change_by(-1) + push([:setblockparam, index, level]) + end + def setclassvariable(name) stack.change_by(-1) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 98559664..56e38577 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -361,6 +361,7 @@ class CompilerTest < Minitest::Test "def foo(bar, *baz, &qux); end", "def foo(&qux); qux; end", "def foo(&qux); qux.call; end", + "def foo(&qux); qux = bar; end", "def foo(bar:); end", "def foo(bar:, baz:); end", "def foo(bar: 1); end", From f35c452221590d1f3dcea49e99d2992d674952e6 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 22:21:32 -0500 Subject: [PATCH 057/104] setspecial --- lib/syntax_tree/compiler.rb | 52 +++++++++++++++++++++++++++---------- lib/syntax_tree/yarv.rb | 10 +++++++ test/compiler_test.rb | 1 + 3 files changed, 49 insertions(+), 14 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 8e1a0eaf..3a4af3da 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -438,7 +438,7 @@ def visit_assoc_splat(node) end def visit_backref(node) - iseq.getspecial(1, 2 * node.value[1..].to_i) + iseq.getspecial(YARV::VM_SVAR_BACKREF, 2 * node.value[1..].to_i) end def visit_bare_assoc_hash(node) @@ -888,25 +888,49 @@ def visit_heredoc(node) end def visit_if(node) - visit(node.predicate) - branchunless = iseq.branchunless(-1) - visit(node.statements) + if node.predicate.is_a?(RangeNode) + iseq.getspecial(YARV::VM_SVAR_FLIPFLOP_START, 0) + branchif = iseq.branchif(-1) - if last_statement? - iseq.leave - branchunless.patch!(iseq) + visit(node.predicate.left) + branchunless_true = iseq.branchunless(-1) - node.consequent ? visit(node.consequent) : iseq.putnil + iseq.putobject(true) + iseq.setspecial(YARV::VM_SVAR_FLIPFLOP_START) + branchif.patch!(iseq) + + visit(node.predicate.right) + branchunless_false = iseq.branchunless(-1) + + iseq.putobject(false) + iseq.setspecial(YARV::VM_SVAR_FLIPFLOP_START) + branchunless_false.patch!(iseq) + + visit(node.statements) + iseq.leave + branchunless_true.patch!(iseq) + iseq.putnil else - iseq.pop + visit(node.predicate) + branchunless = iseq.branchunless(-1) + visit(node.statements) - if node.consequent - jump = iseq.jump(-1) + if last_statement? + iseq.leave branchunless.patch!(iseq) - visit(node.consequent) - jump[1] = iseq.label + + node.consequent ? visit(node.consequent) : iseq.putnil else - branchunless.patch!(iseq) + iseq.pop + + if node.consequent + jump = iseq.jump(-1) + branchunless.patch!(iseq) + visit(node.consequent) + jump[1] = iseq.label + else + branchunless.patch!(iseq) + end end end end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index a204989e..6056fded 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -765,6 +765,11 @@ def setn(number) push([:setn, number]) end + def setspecial(key) + stack.change_by(-1) + push([:setspecial, key]) + end + def splatarray(flag) stack.change_by(-1 + 1) push([:splatarray, flag]) @@ -817,5 +822,10 @@ def call_data(method_id, argc, flag = VM_CALL_ARGS_SIMPLE) VM_CALL_ZSUPER = 1 << 10 VM_CALL_OPT_SEND = 1 << 11 VM_CALL_KW_SPLAT_MUT = 1 << 12 + + # These constants correspond to the setspecial instruction. + VM_SVAR_LASTLINE = 0 # $_ + VM_SVAR_BACKREF = 1 # $~ + VM_SVAR_FLIPFLOP_START = 2 # flipflop end end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 56e38577..c1dab39c 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -285,6 +285,7 @@ class CompilerTest < Minitest::Test "foo ? bar : baz", "case foo when bar then 1 end", "case foo when bar then 1 else 2 end", + "baz if (foo == 1) .. (bar == 1)", # Constructed values "foo..bar", "foo...bar", From 1262b52c781d35df4c911d87ed47be2322812b0d Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 21 Nov 2022 22:33:43 -0500 Subject: [PATCH 058/104] newarraykwsplat --- lib/syntax_tree/compiler.rb | 9 +++++++++ lib/syntax_tree/yarv.rb | 5 +++++ test/compiler_test.rb | 1 + 3 files changed, 15 insertions(+) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 3a4af3da..1b2c5987 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -311,6 +311,15 @@ def visit_args(node) def visit_array(node) if (compiled = RubyVisitor.compile(node)) iseq.duparray(compiled) + elsif node.contents && node.contents.parts.length == 1 && + node.contents.parts.first.is_a?(BareAssocHash) && + node.contents.parts.first.assocs.length == 1 && + node.contents.parts.first.assocs.first.is_a?(AssocSplat) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.newhash(0) + visit(node.contents.parts.first) + iseq.send(:"core#hash_merge_kwd", 2) + iseq.newarraykwsplat(1) else length = 0 diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 6056fded..b168a135 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -502,6 +502,11 @@ def newarray(length) push([:newarray, length]) end + def newarraykwsplat(length) + stack.change_by(-length + 1) + push([:newarraykwsplat, length]) + end + def newhash(length) stack.change_by(-length + 1) push([:newhash, length]) diff --git a/test/compiler_test.rb b/test/compiler_test.rb index c1dab39c..d44eef50 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -311,6 +311,7 @@ class CompilerTest < Minitest::Test "[1, 2, 3].min", "[foo, bar, baz].min", "[foo, bar, baz].min(1)", + "[**{ x: true }][0][:x]", # Core method calls "alias foo bar", "alias :foo :bar", From d4d7f0b4a65e94dc98b434ceec2c805fe62e8f1c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 22 Nov 2022 10:38:43 -0500 Subject: [PATCH 059/104] Pattern match for arrays --- lib/syntax_tree/compiler.rb | 122 +++++++++++++++++++++++++++ lib/syntax_tree/yarv.rb | 18 ++++ lib/syntax_tree/yarv/instructions.rb | 4 +- test/compiler_test.rb | 8 +- 4 files changed, 149 insertions(+), 3 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 1b2c5987..ac49f7e0 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -343,6 +343,101 @@ def visit_array(node) end end + def visit_aryptn(node) + match_failures = [] + jumps_to_exit = [] + + # If there's a constant, then check if we match against that constant or + # not first. Branch to failure if we don't. + if node.constant + iseq.dup + visit(node.constant) + iseq.checkmatch(YARV::VM_CHECKMATCH_TYPE_CASE) + match_failures << iseq.branchunless(-1) + end + + # First, check if the #deconstruct cache is nil. If it is, we're going to + # call #deconstruct on the object and cache the result. + iseq.topn(2) + branchnil = iseq.branchnil(-1) + + # Next, ensure that the cached value was cached correctly, otherwise fail + # the match. + iseq.topn(2) + match_failures << iseq.branchunless(-1) + + # Since we have a valid cached value, we can skip past the part where we + # call #deconstruct on the object. + iseq.pop + iseq.topn(1) + jump = iseq.jump(-1) + + # Check if the object responds to #deconstruct, fail the match otherwise. + branchnil.patch!(iseq) + iseq.dup + iseq.putobject(:deconstruct) + iseq.send(:respond_to?, 1) + iseq.setn(3) + match_failures << iseq.branchunless(-1) + + # Call #deconstruct and ensure that it's an array, raise an error + # otherwise. + iseq.send(:deconstruct, 0) + iseq.setn(2) + iseq.dup + iseq.checktype(YARV::VM_CHECKTYPE_ARRAY) + match_error = iseq.branchunless(-1) + + # Ensure that the deconstructed array has the correct size, fail the match + # otherwise. + jump[1] = iseq.label + iseq.dup + iseq.send(:length, 0) + iseq.putobject(node.requireds.length) + iseq.send(:==, 1) + match_failures << iseq.branchunless(-1) + + # For each required element, check if the deconstructed array contains the + # element, otherwise jump out to the top-level match failure. + iseq.dup + node.requireds.each_with_index do |required, index| + iseq.putobject(index) + iseq.send(:[], 1) + + case required + when VarField + lookup = visit(required) + iseq.setlocal(lookup.index, lookup.level) + else + visit(required) + iseq.checkmatch(YARV::VM_CHECKMATCH_TYPE_CASE) + match_failures << iseq.branchunless(-1) + end + + if index < node.requireds.length - 1 + iseq.dup + else + iseq.pop + jumps_to_exit << iseq.jump(-1) + end + end + + # Set up the routine here to raise an error to indicate that the type of + # the deconstructed array was incorrect. + match_error.patch!(iseq) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.putobject(TypeError) + iseq.putobject("deconstruct must return Array") + iseq.send(:"core#raise", 2) + iseq.pop + + # Patch all of the match failures to jump here so that we pop a final + # value before returning to the parent node. + match_failures.each { |match_failure| match_failure.patch!(iseq) } + iseq.pop + jumps_to_exit + end + def visit_assign(node) case node.target when ARefField @@ -1298,6 +1393,33 @@ def visit_range(node) end end + def visit_rassign(node) + if node.operator.is_a?(Kw) + iseq.putnil + visit(node.value) + iseq.dup + jumps = [] + + case node.pattern + when VarField + lookup = visit(node.pattern) + iseq.setlocal(lookup.index, lookup.level) + jumps << iseq.jump(-1) + else + jumps.concat(visit(node.pattern)) + end + + iseq.pop + iseq.pop + iseq.putobject(false) + iseq.leave + + jumps.each { |jump| jump[1] = iseq.label } + iseq.adjuststack(2) + iseq.putobject(true) + end + end + def visit_rational(node) iseq.putobject(node.accept(RubyVisitor.new)) end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index b168a135..2ca29de7 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -370,6 +370,16 @@ def checkkeyword(keyword_bits_index, keyword_index) push(CheckKeyword.new(keyword_bits_index, keyword_index)) end + def checkmatch(flag) + stack.change_by(-2 + 1) + push([:checkmatch, flag]) + end + + def checktype(type) + stack.change_by(-1 + 2) + push([:checktype, type]) + end + def concatarray push(ConcatArray.new) end @@ -832,5 +842,13 @@ def call_data(method_id, argc, flag = VM_CALL_ARGS_SIMPLE) VM_SVAR_LASTLINE = 0 # $_ VM_SVAR_BACKREF = 1 # $~ VM_SVAR_FLIPFLOP_START = 2 # flipflop + + # These constants correspond to the checktype instruction. + VM_CHECKTYPE_ARRAY = 7 + + # These constants correspond to the checkmatch instruction. + VM_CHECKMATCH_TYPE_WHEN = 1 + VM_CHECKMATCH_TYPE_CASE = 2 + VM_CHECKMATCH_TYPE_RESCUE = 3 end end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index c50c5c84..ccb7a345 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -632,11 +632,11 @@ def length end def pops - number + 0 end def pushes - number * 2 + number end end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index d44eef50..4f4fa9f3 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -416,7 +416,13 @@ class CompilerTest < Minitest::Test "-> {}", "-> (bar) do end", "-> (bar) {}", - "-> (bar; baz) { }" + "-> (bar; baz) { }", + # Pattern matching + "foo in bar", + "foo in [bar]", + "foo in [bar, baz]", + "foo in [1, 2, 3, bar, 4, 5, 6, baz]", + "foo in Foo[1, 2, 3, bar, 4, 5, 6, baz]", ] # These are the combinations of instructions that we're going to test. From 5abcb5a646fc3d4a9f22c2de085dc162e53b8ebd Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 22 Nov 2022 11:01:49 -0500 Subject: [PATCH 060/104] Handle => operator for rightward assignment --- lib/syntax_tree/compiler.rb | 77 ++++++++++++++++++++++++++++++++++++- test/compiler_test.rb | 1 + 2 files changed, 76 insertions(+), 2 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index ac49f7e0..4050f4c9 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -1394,11 +1394,13 @@ def visit_range(node) end def visit_rassign(node) + iseq.putnil + if node.operator.is_a?(Kw) - iseq.putnil + jumps = [] + visit(node.value) iseq.dup - jumps = [] case node.pattern when VarField @@ -1417,6 +1419,77 @@ def visit_rassign(node) jumps.each { |jump| jump[1] = iseq.label } iseq.adjuststack(2) iseq.putobject(true) + else + jumps_to_match = [] + + iseq.putnil + iseq.putobject(false) + iseq.putnil + iseq.putnil + visit(node.value) + iseq.dup + + # Visit the pattern. If it matches, + case node.pattern + when VarField + lookup = visit(node.pattern) + iseq.setlocal(lookup.index, lookup.level) + jumps_to_match << iseq.jump(-1) + else + jumps_to_match.concat(visit(node.pattern)) + end + + # First we're going to push the core onto the stack, then we'll check if + # the value to match is truthy. If it is, we'll jump down to raise + # NoMatchingPatternKeyError. Otherwise we'll raise + # NoMatchingPatternError. + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.topn(4) + branchif_no_key = iseq.branchif(-1) + + # Here we're going to raise NoMatchingPatternError. + iseq.putobject(NoMatchingPatternError) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.putobject("%p: %s") + iseq.topn(4) + iseq.topn(7) + iseq.send(:"core#sprintf", 3) + iseq.send(:"core#raise", 2) + jump_to_exit = iseq.jump(-1) + + # Here we're going to raise NoMatchingPatternKeyError. + branchif_no_key.patch!(iseq) + iseq.putobject(NoMatchingPatternKeyError) + iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) + iseq.putobject("%p: %s") + iseq.topn(4) + iseq.topn(7) + iseq.send(:"core#sprintf", 3) + iseq.topn(7) + iseq.topn(9) + + # Super special behavior here because of the weird kw_arg handling. + iseq.stack.change_by(-(1 + 1) + 1) + call_data = { mid: :new, flag: YARV::VM_CALL_KWARG, orig_argc: 1, kw_arg: [:matchee, :key] } + + if specialized_instruction + iseq.push([:opt_send_without_block, call_data]) + else + iseq.push([:send, call_data, nil]) + end + + iseq.send(:"core#raise", 1) + + # This runs when the pattern fails to match. + jump_to_exit[1] = iseq.label + iseq.adjuststack(7) + iseq.putnil + iseq.leave + + # This runs when the pattern matches successfully. + jumps_to_match.each { |jump| jump[1] = iseq.label } + iseq.adjuststack(6) + iseq.putnil end end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 4f4fa9f3..c2472432 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -423,6 +423,7 @@ class CompilerTest < Minitest::Test "foo in [bar, baz]", "foo in [1, 2, 3, bar, 4, 5, 6, baz]", "foo in Foo[1, 2, 3, bar, 4, 5, 6, baz]", + "foo => bar" ] # These are the combinations of instructions that we're going to test. From 8a0f1ecc1eae2943d50a3a86473ffc2c329e27be Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 22 Nov 2022 11:41:06 -0500 Subject: [PATCH 061/104] Create Legacy module for legacy YARV instructions --- lib/syntax_tree/compiler.rb | 24 +-- lib/syntax_tree/yarv.rb | 4 +- lib/syntax_tree/yarv/instructions.rb | 250 ++++++++++++++------------- 3 files changed, 141 insertions(+), 137 deletions(-) diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb index 4050f4c9..c4eb5194 100644 --- a/lib/syntax_tree/compiler.rb +++ b/lib/syntax_tree/compiler.rb @@ -870,18 +870,18 @@ def visit_defined(node) case value when Const iseq.putnil - iseq.defined(YARV::Defined::CONST, name, "constant") + iseq.defined(YARV::Defined::TYPE_CONST, name, "constant") when CVar iseq.putnil - iseq.defined(YARV::Defined::CVAR, name, "class variable") + iseq.defined(YARV::Defined::TYPE_CVAR, name, "class variable") when GVar iseq.putnil - iseq.defined(YARV::Defined::GVAR, name, "global-variable") + iseq.defined(YARV::Defined::TYPE_GVAR, name, "global-variable") when Ident iseq.putobject("local-variable") when IVar iseq.putnil - iseq.defined(YARV::Defined::IVAR, name, "instance-variable") + iseq.defined(YARV::Defined::TYPE_IVAR, name, "instance-variable") when Kw case name when :false @@ -898,13 +898,13 @@ def visit_defined(node) iseq.putself name = node.value.value.value.to_sym - iseq.defined(YARV::Defined::FUNC, name, "method") + iseq.defined(YARV::Defined::TYPE_FUNC, name, "method") when YieldNode iseq.putnil - iseq.defined(YARV::Defined::YIELD, false, "yield") + iseq.defined(YARV::Defined::TYPE_YIELD, false, "yield") when ZSuper iseq.putnil - iseq.defined(YARV::Defined::ZSUPER, false, "super") + iseq.defined(YARV::Defined::TYPE_ZSUPER, false, "super") else iseq.putobject("expression") end @@ -1875,24 +1875,24 @@ def opassign_defined(node) name = node.target.constant.value.to_sym iseq.dup - iseq.defined(YARV::Defined::CONST_FROM, name, true) + iseq.defined(YARV::Defined::TYPE_CONST_FROM, name, true) when TopConstField name = node.target.constant.value.to_sym iseq.putobject(Object) iseq.dup - iseq.defined(YARV::Defined::CONST_FROM, name, true) + iseq.defined(YARV::Defined::TYPE_CONST_FROM, name, true) when VarField name = node.target.value.value.to_sym iseq.putnil case node.target.value when Const - iseq.defined(YARV::Defined::CONST, name, true) + iseq.defined(YARV::Defined::TYPE_CONST, name, true) when CVar - iseq.defined(YARV::Defined::CVAR, name, true) + iseq.defined(YARV::Defined::TYPE_CVAR, name, true) when GVar - iseq.defined(YARV::Defined::GVAR, name, true) + iseq.defined(YARV::Defined::TYPE_GVAR, name, true) end end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 2ca29de7..89920c6a 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -434,14 +434,14 @@ def getblockparamproxy(index, level) def getclassvariable(name) if RUBY_VERSION < "3.0" - push(GetClassVariableUncached.new(name)) + push(Legacy::GetClassVariable.new(name)) else push(GetClassVariable.new(name, inline_storage_for(name))) end end def getconstant(name) - push(GetConstant.new(name)) + push(Legacy::GetConstant.new(name)) end def getglobal(name) diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index ccb7a345..e6853a87 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -333,44 +333,36 @@ def pushes # ### Summary # - # `defined` checks if the top value of the stack is defined. If it is, it - # pushes its value onto the stack. Otherwise it pushes `nil`. + # `defineclass` defines a class. First it pops the superclass off the + # stack, then it pops the object off the stack that the class should be + # defined under. It has three arguments: the name of the constant, the + # instruction sequence associated with the class, and various flags that + # indicate if it is a singleton class, a module, or a regular class. # # ### Usage # # ~~~ruby - # defined?(x) + # class Foo + # end # ~~~ # - class Defined - NIL = 1 - IVAR = 2 - LVAR = 3 - GVAR = 4 - CVAR = 5 - CONST = 6 - METHOD = 7 - YIELD = 8 - ZSUPER = 9 - SELF = 10 - TRUE = 11 - FALSE = 12 - ASGN = 13 - EXPR = 14 - REF = 15 - FUNC = 16 - CONST_FROM = 17 + class DefineClass + TYPE_CLASS = 0 + TYPE_SINGLETON_CLASS = 1 + TYPE_MODULE = 2 + FLAG_SCOPED = 8 + FLAG_HAS_SUPERCLASS = 16 - attr_reader :type, :name, :message + attr_reader :name, :class_iseq, :flags - def initialize(type, name, message) - @type = type + def initialize(name, class_iseq, flags) @name = name - @message = message + @class_iseq = class_iseq + @flags = flags end def to_a(_iseq) - [:defined, type, name, message] + [:defineclass, name, class_iseq.to_a, flags] end def length @@ -378,7 +370,7 @@ def length end def pops - 1 + 2 end def pushes @@ -388,36 +380,44 @@ def pushes # ### Summary # - # `defineclass` defines a class. First it pops the superclass off the - # stack, then it pops the object off the stack that the class should be - # defined under. It has three arguments: the name of the constant, the - # instruction sequence associated with the class, and various flags that - # indicate if it is a singleton class, a module, or a regular class. + # `defined` checks if the top value of the stack is defined. If it is, it + # pushes its value onto the stack. Otherwise it pushes `nil`. # # ### Usage # # ~~~ruby - # class Foo - # end + # defined?(x) # ~~~ # - class DefineClass - TYPE_CLASS = 0 - TYPE_SINGLETON_CLASS = 1 - TYPE_MODULE = 2 - FLAG_SCOPED = 8 - FLAG_HAS_SUPERCLASS = 16 + class Defined + TYPE_NIL = 1 + TYPE_IVAR = 2 + TYPE_LVAR = 3 + TYPE_GVAR = 4 + TYPE_CVAR = 5 + TYPE_CONST = 6 + TYPE_METHOD = 7 + TYPE_YIELD = 8 + TYPE_ZSUPER = 9 + TYPE_SELF = 10 + TYPE_TRUE = 11 + TYPE_FALSE = 12 + TYPE_ASGN = 13 + TYPE_EXPR = 14 + TYPE_REF = 15 + TYPE_FUNC = 16 + TYPE_CONST_FROM = 17 - attr_reader :name, :class_iseq, :flags + attr_reader :type, :name, :message - def initialize(name, class_iseq, flags) + def initialize(type, name, message) + @type = type @name = name - @class_iseq = class_iseq - @flags = flags + @message = message end def to_a(_iseq) - [:defineclass, name, class_iseq.to_a, flags] + [:defined, type, name, message] end def length @@ -425,7 +425,7 @@ def length end def pops - 2 + 1 end def pushes @@ -800,83 +800,6 @@ def pushes end end - # ### Summary - # - # `getclassvariable` looks for a class variable in the current class and - # pushes its value onto the stack. - # - # This version of the `getclassvariable` instruction is no longer used since - # in Ruby 3.0 it gained an inline cache.` - # - # ### Usage - # - # ~~~ruby - # @@class_variable - # ~~~ - # - class GetClassVariableUncached - attr_reader :name - - def initialize(name) - @name = name - end - - def to_a(_iseq) - [:getclassvariable, name] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - end - - # ### Summary - # - # `getconstant` performs a constant lookup and pushes the value of the - # constant onto the stack. It pops both the class it should look in and - # whether or not it should look globally as well. - # - # This instruction is no longer used since in Ruby 3.2 it was replaced by - # the consolidated `opt_getconstant_path` instruction. - # - # ### Usage - # - # ~~~ruby - # Constant - # ~~~ - # - class GetConstant - attr_reader :name - - def initialize(name) - @name = name - end - - def to_a(_iseq) - [:getconstant, name] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end - end - # ### Summary # # `getglobal` pushes the value of a global variables onto the stack. @@ -1067,5 +990,86 @@ def pushes 1 end end + + # This module contains the instructions that used to be a part of YARV but + # have been replaced or removed in more recent versions. + module Legacy + # ### Summary + # + # `getclassvariable` looks for a class variable in the current class and + # pushes its value onto the stack. + # + # This version of the `getclassvariable` instruction is no longer used + # since in Ruby 3.0 it gained an inline cache.` + # + # ### Usage + # + # ~~~ruby + # @@class_variable + # ~~~ + # + class GetClassVariable + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getclassvariable, name] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `getconstant` performs a constant lookup and pushes the value of the + # constant onto the stack. It pops both the class it should look in and + # whether or not it should look globally as well. + # + # This instruction is no longer used since in Ruby 3.2 it was replaced by + # the consolidated `opt_getconstant_path` instruction. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class GetConstant + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getconstant, name] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + end end end From ba8cad0d1485b5e039e669decc0d2f6dbb61fa07 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 22 Nov 2022 11:45:28 -0500 Subject: [PATCH 062/104] More instructions to classes --- .rubocop.yml | 3 + lib/syntax_tree.rb | 7 +- lib/syntax_tree/compiler.rb | 2131 -------------- lib/syntax_tree/yarv.rb | 851 +----- lib/syntax_tree/yarv/bf.rb | 30 +- lib/syntax_tree/yarv/compiler.rb | 2164 ++++++++++++++ lib/syntax_tree/yarv/disassembler.rb | 247 +- lib/syntax_tree/yarv/instruction_sequence.rb | 671 +++++ lib/syntax_tree/yarv/instructions.rb | 2688 +++++++++++++++++- lib/syntax_tree/yarv/legacy.rb | 169 ++ lib/syntax_tree/yarv/local_table.rb | 81 + test/compiler_test.rb | 5 +- test/yarv_test.rb | 4 +- 13 files changed, 5823 insertions(+), 3228 deletions(-) delete mode 100644 lib/syntax_tree/compiler.rb create mode 100644 lib/syntax_tree/yarv/compiler.rb create mode 100644 lib/syntax_tree/yarv/instruction_sequence.rb create mode 100644 lib/syntax_tree/yarv/legacy.rb create mode 100644 lib/syntax_tree/yarv/local_table.rb diff --git a/.rubocop.yml b/.rubocop.yml index 134a75dc..b7ba43e8 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -64,6 +64,9 @@ Style/CaseEquality: Style/CaseLikeIf: Enabled: false +Style/Documentation: + Enabled: false + Style/ExplicitBlockArgument: Enabled: false diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 792ba20c..b2ff8414 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -10,6 +10,7 @@ require_relative "syntax_tree/formatter" require_relative "syntax_tree/node" +require_relative "syntax_tree/dsl" require_relative "syntax_tree/version" require_relative "syntax_tree/basic_visitor" @@ -26,12 +27,14 @@ require_relative "syntax_tree/pattern" require_relative "syntax_tree/search" -require_relative "syntax_tree/dsl" require_relative "syntax_tree/yarv" -require_relative "syntax_tree/compiler" require_relative "syntax_tree/yarv/bf" +require_relative "syntax_tree/yarv/compiler" require_relative "syntax_tree/yarv/disassembler" +require_relative "syntax_tree/yarv/instruction_sequence" require_relative "syntax_tree/yarv/instructions" +require_relative "syntax_tree/yarv/legacy" +require_relative "syntax_tree/yarv/local_table" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the diff --git a/lib/syntax_tree/compiler.rb b/lib/syntax_tree/compiler.rb deleted file mode 100644 index c4eb5194..00000000 --- a/lib/syntax_tree/compiler.rb +++ /dev/null @@ -1,2131 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - # This class is an experiment in transforming Syntax Tree nodes into their - # corresponding YARV instruction sequences. It attempts to mirror the - # behavior of RubyVM::InstructionSequence.compile. - # - # You use this as with any other visitor. First you parse code into a tree, - # then you visit it with this compiler. Visiting the root node of the tree - # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. - # With that object you can call #to_a on it, which will return a serialized - # form of the instruction sequence as an array. This array _should_ mirror - # the array given by RubyVM::InstructionSequence#to_a. - # - # As an example, here is how you would compile a single expression: - # - # program = SyntaxTree.parse("1 + 2") - # program.accept(SyntaxTree::Visitor::Compiler.new).to_a - # - # [ - # "YARVInstructionSequence/SimpleDataFormat", - # 3, - # 1, - # 1, - # {:arg_size=>0, :local_size=>0, :stack_max=>2}, - # "", - # "", - # "", - # 1, - # :top, - # [], - # {}, - # [], - # [ - # [:putobject_INT2FIX_1_], - # [:putobject, 2], - # [:opt_plus, {:mid=>:+, :flag=>16, :orig_argc=>1}], - # [:leave] - # ] - # ] - # - # Note that this is the same output as calling: - # - # RubyVM::InstructionSequence.compile("1 + 2").to_a - # - class Compiler < BasicVisitor - # This visitor is responsible for converting Syntax Tree nodes into their - # corresponding Ruby structures. This is used to convert the operands of - # some instructions like putobject that push a Ruby object directly onto - # the stack. It is only used when the entire structure can be represented - # at compile-time, as opposed to constructed at run-time. - class RubyVisitor < BasicVisitor - # This error is raised whenever a node cannot be converted into a Ruby - # object at compile-time. - class CompilationError < StandardError - end - - # This will attempt to compile the given node. If it's possible, then - # it will return the compiled object. Otherwise it will return nil. - def self.compile(node) - node.accept(new) - rescue CompilationError - end - - def visit_array(node) - visit_all(node.contents.parts) - end - - def visit_bare_assoc_hash(node) - node.assocs.to_h do |assoc| - # We can only convert regular key-value pairs. A double splat ** - # operator means it has to be converted at run-time. - raise CompilationError unless assoc.is_a?(Assoc) - [visit(assoc.key), visit(assoc.value)] - end - end - - def visit_float(node) - node.value.to_f - end - - alias visit_hash visit_bare_assoc_hash - - def visit_imaginary(node) - node.value.to_c - end - - def visit_int(node) - node.value.to_i - end - - def visit_label(node) - node.value.chomp(":").to_sym - end - - def visit_mrhs(node) - visit_all(node.parts) - end - - def visit_qsymbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_qwords(node) - visit_all(node.elements) - end - - def visit_range(node) - left, right = [visit(node.left), visit(node.right)] - node.operator.value === ".." ? left..right : left...right - end - - def visit_rational(node) - node.value.to_r - end - - def visit_regexp_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) - else - # Any interpolation of expressions or variables will result in the - # regular expression being constructed at run-time. - raise CompilationError - end - end - - # This isn't actually a visit method, though maybe it should be. It is - # responsible for converting the set of string options on a regular - # expression into its equivalent integer. - def visit_regexp_literal_flags(node) - node - .options - .chars - .inject(0) do |accum, option| - accum | - case option - when "i" - Regexp::IGNORECASE - when "x" - Regexp::EXTENDED - when "m" - Regexp::MULTILINE - else - raise "Unknown regexp option: #{option}" - end - end - end - - def visit_symbol_literal(node) - node.value.value.to_sym - end - - def visit_symbols(node) - node.elements.map { |element| visit(element).to_sym } - end - - def visit_tstring_content(node) - node.value - end - - def visit_var_ref(node) - raise CompilationError unless node.value.is_a?(Kw) - - case node.value.value - when "nil" - nil - when "true" - true - when "false" - false - else - raise CompilationError - end - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - node.parts.first.value - else - # Any interpolation of expressions or variables will result in the - # string being constructed at run-time. - raise CompilationError - end - end - - def visit_words(node) - visit_all(node.elements) - end - - def visit_unsupported(_node) - raise CompilationError - end - - # Please forgive the metaprogramming here. This is used to create visit - # methods for every node that we did not explicitly handle. By default - # each of these methods will raise a CompilationError. - handled = instance_methods(false) - (Visitor.instance_methods(false) - handled).each do |method| - alias_method method, :visit_unsupported - end - end - - # These options mirror the compilation options that we currently support - # that can be also passed to RubyVM::InstructionSequence.compile. - attr_reader :frozen_string_literal, - :operands_unification, - :specialized_instruction - - # The current instruction sequence that is being compiled. - attr_reader :iseq - - # A boolean to track if we're currently compiling the last statement - # within a set of statements. This information is necessary to determine - # if we need to return the value of the last statement. - attr_reader :last_statement - - def initialize( - frozen_string_literal: false, - operands_unification: true, - specialized_instruction: true - ) - @frozen_string_literal = frozen_string_literal - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction - - @iseq = nil - @last_statement = false - end - - def visit_BEGIN(node) - visit(node.statements) - end - - def visit_CHAR(node) - if frozen_string_literal - iseq.putobject(node.value[1..]) - else - iseq.putstring(node.value[1..]) - end - end - - def visit_END(node) - once_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do - postexe_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do - iseq.event(:RUBY_EVENT_B_CALL) - - *statements, last_statement = node.statements.body - visit_all(statements) - with_last_statement { visit(last_statement) } - - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.send(:"core#set_postexe", 0, YARV::VM_CALL_FCALL, postexe_iseq) - iseq.leave - end - - iseq.once(once_iseq, iseq.inline_storage) - iseq.pop - end - - def visit_alias(node) - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) - visit(node.left) - visit(node.right) - iseq.send(:"core#set_method_alias", 3) - end - - def visit_aref(node) - visit(node.collection) - - if !frozen_string_literal && specialized_instruction && (node.index.parts.length == 1) - arg = node.index.parts.first - - if arg.is_a?(StringLiteral) && (arg.parts.length == 1) - string_part = arg.parts.first - - if string_part.is_a?(TStringContent) - iseq.opt_aref_with(string_part.value, :[], 1) - return - end - end - end - - visit(node.index) - iseq.send(:[], 1) - end - - def visit_arg_block(node) - visit(node.value) - end - - def visit_arg_paren(node) - visit(node.arguments) - end - - def visit_arg_star(node) - visit(node.value) - iseq.splatarray(false) - end - - def visit_args(node) - visit_all(node.parts) - end - - def visit_array(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - elsif node.contents && node.contents.parts.length == 1 && - node.contents.parts.first.is_a?(BareAssocHash) && - node.contents.parts.first.assocs.length == 1 && - node.contents.parts.first.assocs.first.is_a?(AssocSplat) - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.newhash(0) - visit(node.contents.parts.first) - iseq.send(:"core#hash_merge_kwd", 2) - iseq.newarraykwsplat(1) - else - length = 0 - - node.contents.parts.each do |part| - if part.is_a?(ArgStar) - if length > 0 - iseq.newarray(length) - length = 0 - end - - visit(part.value) - iseq.concatarray - else - visit(part) - length += 1 - end - end - - iseq.newarray(length) if length > 0 - iseq.concatarray if length > 0 && length != node.contents.parts.length - end - end - - def visit_aryptn(node) - match_failures = [] - jumps_to_exit = [] - - # If there's a constant, then check if we match against that constant or - # not first. Branch to failure if we don't. - if node.constant - iseq.dup - visit(node.constant) - iseq.checkmatch(YARV::VM_CHECKMATCH_TYPE_CASE) - match_failures << iseq.branchunless(-1) - end - - # First, check if the #deconstruct cache is nil. If it is, we're going to - # call #deconstruct on the object and cache the result. - iseq.topn(2) - branchnil = iseq.branchnil(-1) - - # Next, ensure that the cached value was cached correctly, otherwise fail - # the match. - iseq.topn(2) - match_failures << iseq.branchunless(-1) - - # Since we have a valid cached value, we can skip past the part where we - # call #deconstruct on the object. - iseq.pop - iseq.topn(1) - jump = iseq.jump(-1) - - # Check if the object responds to #deconstruct, fail the match otherwise. - branchnil.patch!(iseq) - iseq.dup - iseq.putobject(:deconstruct) - iseq.send(:respond_to?, 1) - iseq.setn(3) - match_failures << iseq.branchunless(-1) - - # Call #deconstruct and ensure that it's an array, raise an error - # otherwise. - iseq.send(:deconstruct, 0) - iseq.setn(2) - iseq.dup - iseq.checktype(YARV::VM_CHECKTYPE_ARRAY) - match_error = iseq.branchunless(-1) - - # Ensure that the deconstructed array has the correct size, fail the match - # otherwise. - jump[1] = iseq.label - iseq.dup - iseq.send(:length, 0) - iseq.putobject(node.requireds.length) - iseq.send(:==, 1) - match_failures << iseq.branchunless(-1) - - # For each required element, check if the deconstructed array contains the - # element, otherwise jump out to the top-level match failure. - iseq.dup - node.requireds.each_with_index do |required, index| - iseq.putobject(index) - iseq.send(:[], 1) - - case required - when VarField - lookup = visit(required) - iseq.setlocal(lookup.index, lookup.level) - else - visit(required) - iseq.checkmatch(YARV::VM_CHECKMATCH_TYPE_CASE) - match_failures << iseq.branchunless(-1) - end - - if index < node.requireds.length - 1 - iseq.dup - else - iseq.pop - jumps_to_exit << iseq.jump(-1) - end - end - - # Set up the routine here to raise an error to indicate that the type of - # the deconstructed array was incorrect. - match_error.patch!(iseq) - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.putobject(TypeError) - iseq.putobject("deconstruct must return Array") - iseq.send(:"core#raise", 2) - iseq.pop - - # Patch all of the match failures to jump here so that we pop a final - # value before returning to the parent node. - match_failures.each { |match_failure| match_failure.patch!(iseq) } - iseq.pop - jumps_to_exit - end - - def visit_assign(node) - case node.target - when ARefField - if !frozen_string_literal && specialized_instruction && (node.target.index.parts.length == 1) - arg = node.target.index.parts.first - - if arg.is_a?(StringLiteral) && (arg.parts.length == 1) - string_part = arg.parts.first - - if string_part.is_a?(TStringContent) - visit(node.target.collection) - visit(node.value) - iseq.swap - iseq.topn(1) - iseq.opt_aset_with(string_part.value, :[]=, 2) - iseq.pop - return - end - end - end - - iseq.putnil - visit(node.target.collection) - visit(node.target.index) - visit(node.value) - iseq.setn(3) - iseq.send(:[]=, 2) - iseq.pop - when ConstPathField - names = constant_names(node.target) - name = names.pop - - if RUBY_VERSION >= "3.2" - iseq.opt_getconstant_path(names) - visit(node.value) - iseq.swap - iseq.topn(1) - iseq.swap - iseq.setconstant(name) - else - visit(node.value) - iseq.dup if last_statement? - iseq.opt_getconstant_path(names) - iseq.setconstant(name) - end - when Field - iseq.putnil - visit(node.target) - visit(node.value) - iseq.setn(2) - iseq.send(:"#{node.target.name.value}=", 1) - iseq.pop - when TopConstField - name = node.target.constant.value.to_sym - - if RUBY_VERSION >= "3.2" - iseq.putobject(Object) - visit(node.value) - iseq.swap - iseq.topn(1) - iseq.swap - iseq.setconstant(name) - else - visit(node.value) - iseq.dup if last_statement? - iseq.putobject(Object) - iseq.setconstant(name) - end - when VarField - visit(node.value) - iseq.dup if last_statement? - - case node.target.value - when Const - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - iseq.setconstant(node.target.value.value.to_sym) - when CVar - iseq.setclassvariable(node.target.value.value.to_sym) - when GVar - iseq.setglobal(node.target.value.value.to_sym) - when Ident - lookup = visit(node.target) - - if lookup.local.is_a?(YARV::LocalTable::BlockLocal) - iseq.setblockparam(lookup.index, lookup.level) - else - iseq.setlocal(lookup.index, lookup.level) - end - when IVar - iseq.setinstancevariable(node.target.value.value.to_sym) - end - end - end - - def visit_assoc(node) - visit(node.key) - visit(node.value) - end - - def visit_assoc_splat(node) - visit(node.value) - end - - def visit_backref(node) - iseq.getspecial(YARV::VM_SVAR_BACKREF, 2 * node.value[1..].to_i) - end - - def visit_bare_assoc_hash(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duphash(compiled) - else - visit_all(node.assocs) - end - end - - def visit_binary(node) - case node.operator - when :"&&" - visit(node.left) - iseq.dup - - branchunless = iseq.branchunless(-1) - iseq.pop - - visit(node.right) - branchunless.patch!(iseq) - when :"||" - visit(node.left) - iseq.dup - - branchif = iseq.branchif(-1) - iseq.pop - - visit(node.right) - branchif.patch!(iseq) - else - visit(node.left) - visit(node.right) - iseq.send(node.operator, 1) - end - end - - def visit_block(node) - with_child_iseq(iseq.block_child_iseq(node.location)) do - iseq.event(:RUBY_EVENT_B_CALL) - visit(node.block_var) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - end - - def visit_block_var(node) - params = node.params - - if params.requireds.length == 1 && params.optionals.empty? && - !params.rest && params.posts.empty? && params.keywords.empty? && - !params.keyword_rest && !params.block - iseq.argument_options[:ambiguous_param0] = true - end - - visit(node.params) - - node.locals.each { |local| iseq.local_table.plain(local.value.to_sym) } - end - - def visit_blockarg(node) - iseq.argument_options[:block_start] = iseq.argument_size - iseq.local_table.block(node.name.value.to_sym) - iseq.argument_size += 1 - end - - def visit_bodystmt(node) - visit(node.statements) - end - - def visit_call(node) - if node.is_a?(CallNode) - return( - visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: nil, - location: node.location - ) - ) - ) - end - - arg_parts = argument_parts(node.arguments) - argc = arg_parts.length - - # First we're going to check if we're calling a method on an array - # literal without any arguments. In that case there are some - # specializations we might be able to perform. - if argc == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) - case node.receiver - when ArrayLiteral - parts = node.receiver.contents&.parts || [] - - if parts.none? { |part| part.is_a?(ArgStar) } && - RubyVisitor.compile(node.receiver).nil? - case node.message.value - when "max" - visit(node.receiver.contents) - iseq.opt_newarray_max(parts.length) - return - when "min" - visit(node.receiver.contents) - iseq.opt_newarray_min(parts.length) - return - end - end - when StringLiteral - if RubyVisitor.compile(node.receiver).nil? - case node.message.value - when "-@" - iseq.opt_str_uminus(node.receiver.parts.first.value) - return - when "freeze" - iseq.opt_str_freeze(node.receiver.parts.first.value) - return - end - end - end - end - - if node.receiver - if node.receiver.is_a?(VarRef) - lookup = iseq.local_variable(node.receiver.value.value.to_sym) - - if lookup.local.is_a?(YARV::LocalTable::BlockLocal) - iseq.getblockparamproxy(lookup.index, lookup.level) - else - visit(node.receiver) - end - else - visit(node.receiver) - end - else - iseq.putself - end - - branchnil = - if node.operator&.value == "&." - iseq.dup - iseq.branchnil(-1) - end - - flag = 0 - - arg_parts.each do |arg_part| - case arg_part - when ArgBlock - argc -= 1 - flag |= YARV::VM_CALL_ARGS_BLOCKARG - visit(arg_part) - when ArgStar - flag |= YARV::VM_CALL_ARGS_SPLAT - visit(arg_part) - when ArgsForward - flag |= YARV::VM_CALL_ARGS_SPLAT | YARV::VM_CALL_ARGS_BLOCKARG - - lookup = iseq.local_table.find(:*, 0) - iseq.getlocal(lookup.index, lookup.level) - iseq.splatarray(arg_parts.length != 1) - - lookup = iseq.local_table.find(:&, 0) - iseq.getblockparamproxy(lookup.index, lookup.level) - when BareAssocHash - flag |= YARV::VM_CALL_KW_SPLAT - visit(arg_part) - else - visit(arg_part) - end - end - - block_iseq = visit(node.block) if node.block - flag |= YARV::VM_CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 - flag |= YARV::VM_CALL_FCALL if node.receiver.nil? - - iseq.send(node.message.value.to_sym, argc, flag, block_iseq) - branchnil.patch!(iseq) if branchnil - end - - def visit_case(node) - visit(node.value) if node.value - - clauses = [] - else_clause = nil - current = node.consequent - - while current - clauses << current - - if (current = current.consequent).is_a?(Else) - else_clause = current - break - end - end - - branches = - clauses.map do |clause| - visit(clause.arguments) - iseq.topn(1) - iseq.send(:===, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) - [clause, iseq.branchif(:label_00)] - end - - iseq.pop - else_clause ? visit(else_clause) : iseq.putnil - iseq.leave - - branches.each_with_index do |(clause, branchif), index| - iseq.leave if index != 0 - branchif.patch!(iseq) - iseq.pop - visit(clause) - end - end - - def visit_class(node) - name = node.constant.constant.value.to_sym - class_iseq = - with_child_iseq(iseq.class_child_iseq(name, node.location)) do - iseq.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_END) - iseq.leave - end - - flags = YARV::DefineClass::TYPE_CLASS - - case node.constant - when ConstPathRef - flags |= YARV::DefineClass::FLAG_SCOPED - visit(node.constant.parent) - when ConstRef - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - when TopConstRef - flags |= YARV::DefineClass::FLAG_SCOPED - iseq.putobject(Object) - end - - if node.superclass - flags |= YARV::DefineClass::FLAG_HAS_SUPERCLASS - visit(node.superclass) - else - iseq.putnil - end - - iseq.defineclass(name, class_iseq, flags) - end - - def visit_command(node) - visit_call( - CommandCall.new( - receiver: nil, - operator: nil, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_command_call(node) - visit_call( - CommandCall.new( - receiver: node.receiver, - operator: node.operator, - message: node.message, - arguments: node.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_const_path_field(node) - visit(node.parent) - end - - def visit_const_path_ref(node) - names = constant_names(node) - iseq.opt_getconstant_path(names) - end - - def visit_def(node) - name = node.name.value.to_sym - method_iseq = iseq.method_child_iseq(name.to_s, node.location) - - with_child_iseq(method_iseq) do - visit(node.params) if node.params - iseq.event(:RUBY_EVENT_CALL) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_RETURN) - iseq.leave - end - - if node.target - visit(node.target) - iseq.definesmethod(name, method_iseq) - else - iseq.definemethod(name, method_iseq) - end - - iseq.putobject(name) - end - - def visit_defined(node) - case node.value - when Assign - # If we're assigning to a local variable, then we need to make sure - # that we put it into the local table. - if node.value.target.is_a?(VarField) && - node.value.target.value.is_a?(Ident) - iseq.local_table.plain(node.value.target.value.value.to_sym) - end - - iseq.putobject("assignment") - when VarRef - value = node.value.value - name = value.value.to_sym - - case value - when Const - iseq.putnil - iseq.defined(YARV::Defined::TYPE_CONST, name, "constant") - when CVar - iseq.putnil - iseq.defined(YARV::Defined::TYPE_CVAR, name, "class variable") - when GVar - iseq.putnil - iseq.defined(YARV::Defined::TYPE_GVAR, name, "global-variable") - when Ident - iseq.putobject("local-variable") - when IVar - iseq.putnil - iseq.defined(YARV::Defined::TYPE_IVAR, name, "instance-variable") - when Kw - case name - when :false - iseq.putobject("false") - when :nil - iseq.putobject("nil") - when :self - iseq.putobject("self") - when :true - iseq.putobject("true") - end - end - when VCall - iseq.putself - - name = node.value.value.value.to_sym - iseq.defined(YARV::Defined::TYPE_FUNC, name, "method") - when YieldNode - iseq.putnil - iseq.defined(YARV::Defined::TYPE_YIELD, false, "yield") - when ZSuper - iseq.putnil - iseq.defined(YARV::Defined::TYPE_ZSUPER, false, "super") - else - iseq.putobject("expression") - end - end - - def visit_dyna_symbol(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - iseq.putobject(node.parts.first.value.to_sym) - end - end - - def visit_else(node) - visit(node.statements) - iseq.pop unless last_statement? - end - - def visit_elsif(node) - visit_if( - IfNode.new( - predicate: node.predicate, - statements: node.statements, - consequent: node.consequent, - location: node.location - ) - ) - end - - def visit_field(node) - visit(node.parent) - end - - def visit_float(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_for(node) - visit(node.collection) - - name = node.index.value.value.to_sym - iseq.local_table.plain(name) - - block_iseq = - with_child_iseq(iseq.block_child_iseq(node.statements.location)) do - iseq.argument_options[:lead_num] ||= 0 - iseq.argument_options[:lead_num] += 1 - iseq.argument_options[:ambiguous_param0] = true - - iseq.argument_size += 1 - iseq.local_table.plain(2) - - iseq.getlocal(0, 0) - - local_variable = iseq.local_variable(name) - iseq.setlocal(local_variable.index, local_variable.level) - - iseq.event(:RUBY_EVENT_B_CALL) - iseq.nop - - visit(node.statements) - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - - iseq.send(:each, 0, 0, block_iseq) - end - - def visit_hash(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duphash(compiled) - else - visit_all(node.assocs) - iseq.newhash(node.assocs.length * 2) - end - end - - def visit_heredoc(node) - if node.beginning.value.end_with?("`") - visit_xstring_literal(node) - elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - iseq.concatstrings(length) - end - end - - def visit_if(node) - if node.predicate.is_a?(RangeNode) - iseq.getspecial(YARV::VM_SVAR_FLIPFLOP_START, 0) - branchif = iseq.branchif(-1) - - visit(node.predicate.left) - branchunless_true = iseq.branchunless(-1) - - iseq.putobject(true) - iseq.setspecial(YARV::VM_SVAR_FLIPFLOP_START) - branchif.patch!(iseq) - - visit(node.predicate.right) - branchunless_false = iseq.branchunless(-1) - - iseq.putobject(false) - iseq.setspecial(YARV::VM_SVAR_FLIPFLOP_START) - branchunless_false.patch!(iseq) - - visit(node.statements) - iseq.leave - branchunless_true.patch!(iseq) - iseq.putnil - else - visit(node.predicate) - branchunless = iseq.branchunless(-1) - visit(node.statements) - - if last_statement? - iseq.leave - branchunless.patch!(iseq) - - node.consequent ? visit(node.consequent) : iseq.putnil - else - iseq.pop - - if node.consequent - jump = iseq.jump(-1) - branchunless.patch!(iseq) - visit(node.consequent) - jump[1] = iseq.label - else - branchunless.patch!(iseq) - end - end - end - end - - def visit_if_op(node) - visit_if( - IfNode.new( - predicate: node.predicate, - statements: node.truthy, - consequent: - Else.new( - keyword: Kw.new(value: "else", location: Location.default), - statements: node.falsy, - location: Location.default - ), - location: Location.default - ) - ) - end - - def visit_imaginary(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_int(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_kwrest_param(node) - iseq.argument_options[:kwrest] = iseq.argument_size - iseq.argument_size += 1 - iseq.local_table.plain(node.name.value.to_sym) - end - - def visit_label(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_lambda(node) - lambda_iseq = - with_child_iseq(iseq.block_child_iseq(node.location)) do - iseq.event(:RUBY_EVENT_B_CALL) - visit(node.params) - visit(node.statements) - iseq.event(:RUBY_EVENT_B_RETURN) - iseq.leave - end - - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.send(:lambda, 0, YARV::VM_CALL_FCALL, lambda_iseq) - end - - def visit_lambda_var(node) - visit_block_var(node) - end - - def visit_massign(node) - visit(node.value) - iseq.dup - visit(node.target) - end - - def visit_method_add_block(node) - visit_call( - CommandCall.new( - receiver: node.call.receiver, - operator: node.call.operator, - message: node.call.message, - arguments: node.call.arguments, - block: node.block, - location: node.location - ) - ) - end - - def visit_mlhs(node) - lookups = [] - node.parts.each do |part| - case part - when VarField - lookups << visit(part) - end - end - - iseq.expandarray(lookups.length, 0) - lookups.each { |lookup| iseq.setlocal(lookup.index, lookup.level) } - end - - def visit_module(node) - name = node.constant.constant.value.to_sym - module_iseq = - with_child_iseq(iseq.module_child_iseq(name, node.location)) do - iseq.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_END) - iseq.leave - end - - flags = YARV::DefineClass::TYPE_MODULE - - case node.constant - when ConstPathRef - flags |= YARV::DefineClass::FLAG_SCOPED - visit(node.constant.parent) - when ConstRef - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - when TopConstRef - flags |= YARV::DefineClass::FLAG_SCOPED - iseq.putobject(Object) - end - - iseq.putnil - iseq.defineclass(name, module_iseq, flags) - end - - def visit_mrhs(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - else - visit_all(node.parts) - iseq.newarray(node.parts.length) - end - end - - def visit_not(node) - visit(node.statement) - iseq.send(:!, 0) - end - - def visit_opassign(node) - flag = YARV::VM_CALL_ARGS_SIMPLE - if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) - flag |= YARV::VM_CALL_FCALL - end - - case (operator = node.operator.value.chomp("=").to_sym) - when :"&&" - branchunless = nil - - with_opassign(node) do - iseq.dup - branchunless = iseq.branchunless(-1) - iseq.pop - visit(node.value) - end - - case node.target - when ARefField - iseq.leave - branchunless.patch!(iseq) - iseq.setn(3) - iseq.adjuststack(3) - when ConstPathField, TopConstField - branchunless.patch!(iseq) - iseq.swap - iseq.pop - else - branchunless.patch!(iseq) - end - when :"||" - if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) - opassign_defined(node) - iseq.swap - iseq.pop - elsif node.target.is_a?(VarField) && - [Const, CVar, GVar].include?(node.target.value.class) - opassign_defined(node) - else - branchif = nil - - with_opassign(node) do - iseq.dup - branchif = iseq.branchif(-1) - iseq.pop - visit(node.value) - end - - if node.target.is_a?(ARefField) - iseq.leave - branchif.patch!(iseq) - iseq.setn(3) - iseq.adjuststack(3) - else - branchif.patch!(iseq) - end - end - else - with_opassign(node) do - visit(node.value) - iseq.send(operator, 1, flag) - end - end - end - - def visit_params(node) - argument_options = iseq.argument_options - - if node.requireds.any? - argument_options[:lead_num] = 0 - - node.requireds.each do |required| - iseq.local_table.plain(required.value.to_sym) - iseq.argument_size += 1 - argument_options[:lead_num] += 1 - end - end - - node.optionals.each do |(optional, value)| - index = iseq.local_table.size - name = optional.value.to_sym - - iseq.local_table.plain(name) - iseq.argument_size += 1 - - argument_options[:opt] = [iseq.label] unless argument_options.key?(:opt) - - visit(value) - iseq.setlocal(index, 0) - iseq.argument_options[:opt] << iseq.label - end - - visit(node.rest) if node.rest - - if node.posts.any? - argument_options[:post_start] = iseq.argument_size - argument_options[:post_num] = 0 - - node.posts.each do |post| - iseq.local_table.plain(post.value.to_sym) - iseq.argument_size += 1 - argument_options[:post_num] += 1 - end - end - - if node.keywords.any? - argument_options[:kwbits] = 0 - argument_options[:keyword] = [] - - keyword_bits_name = node.keyword_rest ? 3 : 2 - iseq.argument_size += 1 - keyword_bits_index = iseq.local_table.locals.size + node.keywords.size - - node.keywords.each_with_index do |(keyword, value), keyword_index| - name = keyword.value.chomp(":").to_sym - index = iseq.local_table.size - - iseq.local_table.plain(name) - iseq.argument_size += 1 - argument_options[:kwbits] += 1 - - if value.nil? - argument_options[:keyword] << name - elsif (compiled = RubyVisitor.compile(value)) - argument_options[:keyword] << [name, compiled] - else - argument_options[:keyword] << [name] - iseq.checkkeyword(keyword_bits_index, keyword_index) - branchif = iseq.branchif(-1) - visit(value) - iseq.setlocal(index, 0) - branchif.patch!(iseq) - end - end - - iseq.local_table.plain(keyword_bits_name) - end - - if node.keyword_rest.is_a?(ArgsForward) - iseq.local_table.plain(:*) - iseq.local_table.plain(:&) - - iseq.argument_options[:rest_start] = iseq.argument_size - iseq.argument_options[:block_start] = iseq.argument_size + 1 - - iseq.argument_size += 2 - elsif node.keyword_rest - visit(node.keyword_rest) - end - - visit(node.block) if node.block - end - - def visit_paren(node) - visit(node.contents) - end - - def visit_program(node) - node.statements.body.each do |statement| - break unless statement.is_a?(Comment) - - if statement.value == "# frozen_string_literal: true" - @frozen_string_literal = true - end - end - - preexes = [] - statements = [] - - node.statements.body.each do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - # ignore - when BEGINBlock - preexes << statement - else - statements << statement - end - end - - top_iseq = - YARV::InstructionSequence.new( - :top, - "", - nil, - node.location, - frozen_string_literal: frozen_string_literal, - operands_unification: operands_unification, - specialized_instruction: specialized_instruction - ) - - with_child_iseq(top_iseq) do - visit_all(preexes) - - if statements.empty? - iseq.putnil - else - *statements, last_statement = statements - visit_all(statements) - with_last_statement { visit(last_statement) } - end - - iseq.leave - end - end - - def visit_qsymbols(node) - iseq.duparray(node.accept(RubyVisitor.new)) - end - - def visit_qwords(node) - if frozen_string_literal - iseq.duparray(node.accept(RubyVisitor.new)) - else - visit_all(node.elements) - iseq.newarray(node.elements.length) - end - end - - def visit_range(node) - if (compiled = RubyVisitor.compile(node)) - iseq.putobject(compiled) - else - visit(node.left) - visit(node.right) - iseq.newrange(node.operator.value == ".." ? 0 : 1) - end - end - - def visit_rassign(node) - iseq.putnil - - if node.operator.is_a?(Kw) - jumps = [] - - visit(node.value) - iseq.dup - - case node.pattern - when VarField - lookup = visit(node.pattern) - iseq.setlocal(lookup.index, lookup.level) - jumps << iseq.jump(-1) - else - jumps.concat(visit(node.pattern)) - end - - iseq.pop - iseq.pop - iseq.putobject(false) - iseq.leave - - jumps.each { |jump| jump[1] = iseq.label } - iseq.adjuststack(2) - iseq.putobject(true) - else - jumps_to_match = [] - - iseq.putnil - iseq.putobject(false) - iseq.putnil - iseq.putnil - visit(node.value) - iseq.dup - - # Visit the pattern. If it matches, - case node.pattern - when VarField - lookup = visit(node.pattern) - iseq.setlocal(lookup.index, lookup.level) - jumps_to_match << iseq.jump(-1) - else - jumps_to_match.concat(visit(node.pattern)) - end - - # First we're going to push the core onto the stack, then we'll check if - # the value to match is truthy. If it is, we'll jump down to raise - # NoMatchingPatternKeyError. Otherwise we'll raise - # NoMatchingPatternError. - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.topn(4) - branchif_no_key = iseq.branchif(-1) - - # Here we're going to raise NoMatchingPatternError. - iseq.putobject(NoMatchingPatternError) - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.putobject("%p: %s") - iseq.topn(4) - iseq.topn(7) - iseq.send(:"core#sprintf", 3) - iseq.send(:"core#raise", 2) - jump_to_exit = iseq.jump(-1) - - # Here we're going to raise NoMatchingPatternKeyError. - branchif_no_key.patch!(iseq) - iseq.putobject(NoMatchingPatternKeyError) - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.putobject("%p: %s") - iseq.topn(4) - iseq.topn(7) - iseq.send(:"core#sprintf", 3) - iseq.topn(7) - iseq.topn(9) - - # Super special behavior here because of the weird kw_arg handling. - iseq.stack.change_by(-(1 + 1) + 1) - call_data = { mid: :new, flag: YARV::VM_CALL_KWARG, orig_argc: 1, kw_arg: [:matchee, :key] } - - if specialized_instruction - iseq.push([:opt_send_without_block, call_data]) - else - iseq.push([:send, call_data, nil]) - end - - iseq.send(:"core#raise", 1) - - # This runs when the pattern fails to match. - jump_to_exit[1] = iseq.label - iseq.adjuststack(7) - iseq.putnil - iseq.leave - - # This runs when the pattern matches successfully. - jumps_to_match.each { |jump| jump[1] = iseq.label } - iseq.adjuststack(6) - iseq.putnil - end - end - - def visit_rational(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_regexp_literal(node) - if (compiled = RubyVisitor.compile(node)) - iseq.putobject(compiled) - else - flags = RubyVisitor.new.visit_regexp_literal_flags(node) - length = visit_string_parts(node) - iseq.toregexp(flags, length) - end - end - - def visit_rest_param(node) - iseq.local_table.plain(node.name.value.to_sym) - iseq.argument_options[:rest_start] = iseq.argument_size - iseq.argument_size += 1 - end - - def visit_sclass(node) - visit(node.target) - iseq.putnil - - singleton_iseq = - with_child_iseq(iseq.singleton_class_child_iseq(node.location)) do - iseq.event(:RUBY_EVENT_CLASS) - visit(node.bodystmt) - iseq.event(:RUBY_EVENT_END) - iseq.leave - end - - iseq.defineclass( - :singletonclass, - singleton_iseq, - YARV::DefineClass::TYPE_SINGLETON_CLASS - ) - end - - def visit_statements(node) - statements = - node.body.select do |statement| - case statement - when Comment, EmbDoc, EndContent, VoidStmt - false - else - true - end - end - - statements.empty? ? iseq.putnil : visit_all(statements) - end - - def visit_string_concat(node) - value = node.left.parts.first.value + node.right.parts.first.value - - visit_string_literal( - StringLiteral.new( - parts: [TStringContent.new(value: value, location: node.location)], - quote: node.left.quote, - location: node.location - ) - ) - end - - def visit_string_embexpr(node) - visit(node.statements) - end - - def visit_string_literal(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - iseq.concatstrings(length) - end - end - - def visit_super(node) - iseq.putself - visit(node.arguments) - iseq.invokesuper( - nil, - argument_parts(node.arguments).length, - YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE | YARV::VM_CALL_SUPER, - nil - ) - end - - def visit_symbol_literal(node) - iseq.putobject(node.accept(RubyVisitor.new)) - end - - def visit_symbols(node) - if (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - else - node.elements.each do |element| - if element.parts.length == 1 && - element.parts.first.is_a?(TStringContent) - iseq.putobject(element.parts.first.value.to_sym) - else - length = visit_string_parts(element) - iseq.concatstrings(length) - iseq.intern - end - end - - iseq.newarray(node.elements.length) - end - end - - def visit_top_const_ref(node) - iseq.opt_getconstant_path(constant_names(node)) - end - - def visit_tstring_content(node) - if frozen_string_literal - iseq.putobject(node.accept(RubyVisitor.new)) - else - iseq.putstring(node.accept(RubyVisitor.new)) - end - end - - def visit_unary(node) - method_id = - case node.operator - when "+", "-" - "#{node.operator}@" - else - node.operator - end - - visit_call( - CommandCall.new( - receiver: node.statement, - operator: nil, - message: Ident.new(value: method_id, location: Location.default), - arguments: nil, - block: nil, - location: Location.default - ) - ) - end - - def visit_undef(node) - node.symbols.each_with_index do |symbol, index| - iseq.pop if index != 0 - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_VMCORE) - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CBASE) - visit(symbol) - iseq.send(:"core#undef_method", 2) - end - end - - def visit_unless(node) - visit(node.predicate) - branchunless = iseq.branchunless(-1) - node.consequent ? visit(node.consequent) : iseq.putnil - - if last_statement? - iseq.leave - branchunless.patch!(iseq) - - visit(node.statements) - else - iseq.pop - - if node.consequent - jump = iseq.jump(-1) - branchunless.patch!(iseq) - visit(node.consequent) - jump[1] = iseq.label - else - branchunless.patch!(iseq) - end - end - end - - def visit_until(node) - jumps = [] - - jumps << iseq.jump(-1) - iseq.putnil - iseq.pop - jumps << iseq.jump(-1) - - label = iseq.label - visit(node.statements) - iseq.pop - jumps.each { |jump| jump[1] = iseq.label } - - visit(node.predicate) - iseq.branchunless(label) - iseq.putnil if last_statement? - end - - def visit_var_field(node) - case node.value - when CVar, IVar - name = node.value.value.to_sym - iseq.inline_storage_for(name) - when Ident - name = node.value.value.to_sym - - if (local_variable = iseq.local_variable(name)) - local_variable - else - iseq.local_table.plain(name) - iseq.local_variable(name) - end - end - end - - def visit_var_ref(node) - case node.value - when Const - iseq.opt_getconstant_path(constant_names(node)) - when CVar - name = node.value.value.to_sym - iseq.getclassvariable(name) - when GVar - iseq.getglobal(node.value.value.to_sym) - when Ident - lookup = iseq.local_variable(node.value.value.to_sym) - - case lookup.local - when YARV::LocalTable::BlockLocal - iseq.getblockparam(lookup.index, lookup.level) - when YARV::LocalTable::PlainLocal - iseq.getlocal(lookup.index, lookup.level) - end - when IVar - name = node.value.value.to_sym - iseq.getinstancevariable(name) - when Kw - case node.value.value - when "false" - iseq.putobject(false) - when "nil" - iseq.putnil - when "self" - iseq.putself - when "true" - iseq.putobject(true) - end - end - end - - def visit_vcall(node) - iseq.putself - - flag = - YARV::VM_CALL_FCALL | YARV::VM_CALL_VCALL | YARV::VM_CALL_ARGS_SIMPLE - iseq.send(node.value.value.to_sym, 0, flag) - end - - def visit_when(node) - visit(node.statements) - end - - def visit_while(node) - jumps = [] - - jumps << iseq.jump(-1) - iseq.putnil - iseq.pop - jumps << iseq.jump(-1) - - label = iseq.label - visit(node.statements) - iseq.pop - jumps.each { |jump| jump[1] = iseq.label } - - visit(node.predicate) - iseq.branchif(label) - iseq.putnil if last_statement? - end - - def visit_word(node) - if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) - visit(node.parts.first) - else - length = visit_string_parts(node) - iseq.concatstrings(length) - end - end - - def visit_words(node) - if frozen_string_literal && (compiled = RubyVisitor.compile(node)) - iseq.duparray(compiled) - else - visit_all(node.elements) - iseq.newarray(node.elements.length) - end - end - - def visit_xstring_literal(node) - iseq.putself - length = visit_string_parts(node) - iseq.concatstrings(node.parts.length) if length > 1 - iseq.send(:`, 1, YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE) - end - - def visit_yield(node) - parts = argument_parts(node.arguments) - visit_all(parts) - iseq.invokeblock(nil, parts.length) - end - - def visit_zsuper(_node) - iseq.putself - iseq.invokesuper( - nil, - 0, - YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE | YARV::VM_CALL_SUPER | - YARV::VM_CALL_ZSUPER, - nil - ) - end - - private - - # This is a helper that is used in places where arguments may be present - # or they may be wrapped in parentheses. It's meant to descend down the - # tree and return an array of argument nodes. - def argument_parts(node) - case node - when nil - [] - when Args - node.parts - when ArgParen - if node.arguments.is_a?(ArgsForward) - [node.arguments] - else - node.arguments.parts - end - when Paren - node.contents.parts - end - end - - # Constant names when they are being assigned or referenced come in as a - # tree, but it's more convenient to work with them as an array. This - # method converts them into that array. This is nice because it's the - # operand that goes to opt_getconstant_path in Ruby 3.2. - def constant_names(node) - current = node - names = [] - - while current.is_a?(ConstPathField) || current.is_a?(ConstPathRef) - names.unshift(current.constant.value.to_sym) - current = current.parent - end - - case current - when VarField, VarRef - names.unshift(current.value.value.to_sym) - when TopConstRef - names.unshift(current.constant.value.to_sym) - names.unshift(:"") - end - - names - end - - # For the most part when an OpAssign (operator assignment) node with a ||= - # operator is being compiled it's a matter of reading the target, checking - # if the value should be evaluated, evaluating it if so, and then writing - # the result back to the target. - # - # However, in certain kinds of assignments (X, ::X, X::Y, @@x, and $x) we - # first check if the value is defined using the defined instruction. I - # don't know why it is necessary, and suspect that it isn't. - def opassign_defined(node) - case node.target - when ConstPathField - visit(node.target.parent) - name = node.target.constant.value.to_sym - - iseq.dup - iseq.defined(YARV::Defined::TYPE_CONST_FROM, name, true) - when TopConstField - name = node.target.constant.value.to_sym - - iseq.putobject(Object) - iseq.dup - iseq.defined(YARV::Defined::TYPE_CONST_FROM, name, true) - when VarField - name = node.target.value.value.to_sym - iseq.putnil - - case node.target.value - when Const - iseq.defined(YARV::Defined::TYPE_CONST, name, true) - when CVar - iseq.defined(YARV::Defined::TYPE_CVAR, name, true) - when GVar - iseq.defined(YARV::Defined::TYPE_GVAR, name, true) - end - end - - branchunless = iseq.branchunless(-1) - - case node.target - when ConstPathField, TopConstField - iseq.dup - iseq.putobject(true) - iseq.getconstant(name) - when VarField - case node.target.value - when Const - iseq.opt_getconstant_path(constant_names(node.target)) - when CVar - iseq.getclassvariable(name) - when GVar - iseq.getglobal(name) - end - end - - iseq.dup - branchif = iseq.branchif(-1) - iseq.pop - - branchunless.patch!(iseq) - visit(node.value) - - case node.target - when ConstPathField, TopConstField - iseq.dupn(2) - iseq.swap - iseq.setconstant(name) - when VarField - iseq.dup - - case node.target.value - when Const - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - iseq.setconstant(name) - when CVar - iseq.setclassvariable(name) - when GVar - iseq.setglobal(name) - end - end - - branchif.patch!(iseq) - end - - # Whenever a value is interpolated into a string-like structure, these - # three instructions are pushed. - def push_interpolate - iseq.dup - iseq.objtostring( - :to_s, - 0, - YARV::VM_CALL_FCALL | YARV::VM_CALL_ARGS_SIMPLE - ) - iseq.anytostring - end - - # There are a lot of nodes in the AST that act as contains of parts of - # strings. This includes things like string literals, regular expressions, - # heredocs, etc. This method will visit all the parts of a string within - # those containers. - def visit_string_parts(node) - length = 0 - - unless node.parts.first.is_a?(TStringContent) - iseq.putobject("") - length += 1 - end - - node.parts.each do |part| - case part - when StringDVar - visit(part.variable) - push_interpolate - when StringEmbExpr - visit(part) - push_interpolate - when TStringContent - iseq.putobject(part.accept(RubyVisitor.new)) - end - - length += 1 - end - - length - end - - # The current instruction sequence that we're compiling is always stored - # on the compiler. When we descend into a node that has its own - # instruction sequence, this method can be called to temporarily set the - # new value of the instruction sequence, yield, and then set it back. - def with_child_iseq(child_iseq) - parent_iseq = iseq - - begin - @iseq = child_iseq - yield - child_iseq - ensure - @iseq = parent_iseq - end - end - - # When we're compiling the last statement of a set of statements within a - # scope, the instructions sometimes change from pops to leaves. These - # kinds of peephole optimizations can reduce the overall number of - # instructions. Therefore, we keep track of whether we're compiling the - # last statement of a scope and allow visit methods to query that - # information. - def with_last_statement - previous = @last_statement - @last_statement = true - - begin - yield - ensure - @last_statement = previous - end - end - - def last_statement? - @last_statement - end - - # OpAssign nodes can have a number of different kinds of nodes as their - # "target" (i.e., the left-hand side of the assignment). When compiling - # these nodes we typically need to first fetch the current value of the - # variable, then perform some kind of action, then store the result back - # into the variable. This method handles that by first fetching the value, - # then yielding to the block, then storing the result. - def with_opassign(node) - case node.target - when ARefField - iseq.putnil - visit(node.target.collection) - visit(node.target.index) - - iseq.dupn(2) - iseq.send(:[], 1) - - yield - - iseq.setn(3) - iseq.send(:[]=, 2) - iseq.pop - when ConstPathField - name = node.target.constant.value.to_sym - - visit(node.target.parent) - iseq.dup - iseq.putobject(true) - iseq.getconstant(name) - - yield - - if node.operator.value == "&&=" - iseq.dupn(2) - else - iseq.swap - iseq.topn(1) - end - - iseq.swap - iseq.setconstant(name) - when TopConstField - name = node.target.constant.value.to_sym - - iseq.putobject(Object) - iseq.dup - iseq.putobject(true) - iseq.getconstant(name) - - yield - - if node.operator.value == "&&=" - iseq.dupn(2) - else - iseq.swap - iseq.topn(1) - end - - iseq.swap - iseq.setconstant(name) - when VarField - case node.target.value - when Const - names = constant_names(node.target) - iseq.opt_getconstant_path(names) - - yield - - iseq.dup - iseq.putspecialobject(YARV::VM_SPECIAL_OBJECT_CONST_BASE) - iseq.setconstant(names.last) - when CVar - name = node.target.value.value.to_sym - iseq.getclassvariable(name) - - yield - - iseq.dup - iseq.setclassvariable(name) - when GVar - name = node.target.value.value.to_sym - iseq.getglobal(name) - - yield - - iseq.dup - iseq.setglobal(name) - when Ident - local_variable = visit(node.target) - iseq.getlocal(local_variable.index, local_variable.level) - - yield - - iseq.dup - iseq.setlocal(local_variable.index, local_variable.level) - when IVar - name = node.target.value.value.to_sym - iseq.getinstancevariable(name) - - yield - - iseq.dup - iseq.setinstancevariable(name) - end - end - end - end -end diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 89920c6a..df8bc3ce 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -1,854 +1,11 @@ # frozen_string_literal: true module SyntaxTree + # This module provides an object representation of the YARV bytecode. module YARV - # This object is used to track the size of the stack at any given time. It - # is effectively a mini symbolic interpreter. It's necessary because when - # instruction sequences get serialized they include a :stack_max field on - # them. This field is used to determine how much stack space to allocate - # for the instruction sequence. - class Stack - attr_reader :current_size, :maximum_size - - def initialize - @current_size = 0 - @maximum_size = 0 - end - - def change_by(value) - @current_size += value - @maximum_size = @current_size if @current_size > @maximum_size - end + # Compile the given source into a YARV instruction sequence. + def self.compile(source, **options) + SyntaxTree.parse(source).accept(Compiler.new(**options)) end - - # This represents every local variable associated with an instruction - # sequence. There are two kinds of locals: plain locals that are what you - # expect, and block proxy locals, which represent local variables - # associated with blocks that were passed into the current instruction - # sequence. - class LocalTable - # A local representing a block passed into the current instruction - # sequence. - class BlockLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # A regular local variable. - class PlainLocal - attr_reader :name - - def initialize(name) - @name = name - end - end - - # The result of looking up a local variable in the current local table. - class Lookup - attr_reader :local, :index, :level - - def initialize(local, index, level) - @local = local - @index = index - @level = level - end - end - - attr_reader :locals - - def initialize - @locals = [] - end - - def find(name, level) - index = locals.index { |local| local.name == name } - Lookup.new(locals[index], index, level) if index - end - - def has?(name) - locals.any? { |local| local.name == name } - end - - def names - locals.map(&:name) - end - - def size - locals.length - end - - # Add a BlockLocal to the local table. - def block(name) - locals << BlockLocal.new(name) unless has?(name) - end - - # Add a PlainLocal to the local table. - def plain(name) - locals << PlainLocal.new(name) unless has?(name) - end - - # This is the offset from the top of the stack where this local variable - # lives. - def offset(index) - size - (index - 3) - 1 - end - end - - # This class is meant to mirror RubyVM::InstructionSequence. It contains a - # list of instructions along with the metadata pertaining to them. It also - # functions as a builder for the instruction sequence. - class InstructionSequence - MAGIC = "YARVInstructionSequence/SimpleDataFormat" - - # This provides a handle to the rb_iseq_load function, which allows you to - # pass a serialized iseq to Ruby and have it return a - # RubyVM::InstructionSequence object. - ISEQ_LOAD = - Fiddle::Function.new( - Fiddle::Handle::DEFAULT["rb_iseq_load"], - [Fiddle::TYPE_VOIDP] * 3, - Fiddle::TYPE_VOIDP - ) - - # The type of the instruction sequence. - attr_reader :type - - # The name of the instruction sequence. - attr_reader :name - - # The parent instruction sequence, if there is one. - attr_reader :parent_iseq - - # The location of the root node of this instruction sequence. - attr_reader :location - - # This is the list of information about the arguments to this - # instruction sequence. - attr_accessor :argument_size - attr_reader :argument_options - - # The list of instructions for this instruction sequence. - attr_reader :insns - - # The table of local variables. - attr_reader :local_table - - # The hash of names of instance and class variables pointing to the - # index of their associated inline storage. - attr_reader :inline_storages - - # The index of the next inline storage that will be created. - attr_reader :storage_index - - # An object that will track the current size of the stack and the - # maximum size of the stack for this instruction sequence. - attr_reader :stack - - # These are various compilation options provided. - attr_reader :frozen_string_literal, - :operands_unification, - :specialized_instruction - - def initialize( - type, - name, - parent_iseq, - location, - frozen_string_literal: false, - operands_unification: true, - specialized_instruction: true - ) - @type = type - @name = name - @parent_iseq = parent_iseq - @location = location - - @argument_size = 0 - @argument_options = {} - - @local_table = LocalTable.new - @inline_storages = {} - @insns = [] - @storage_index = 0 - @stack = Stack.new - - @frozen_string_literal = frozen_string_literal - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction - end - - ########################################################################## - # Query methods - ########################################################################## - - def local_variable(name, level = 0) - if (lookup = local_table.find(name, level)) - lookup - elsif parent_iseq - parent_iseq.local_variable(name, level + 1) - end - end - - def inline_storage - storage = storage_index - @storage_index += 1 - storage - end - - def inline_storage_for(name) - inline_storages[name] = inline_storage unless inline_storages.key?(name) - - inline_storages[name] - end - - def length - insns.inject(0) do |sum, insn| - case insn - when Integer, Symbol - sum - else - sum + insn.length - end - end - end - - def eval - compiled = to_a - - # Temporary hack until we get these working. - compiled[4][:node_id] = 11 - compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] - - Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval - end - - def to_a - versions = RUBY_VERSION.split(".").map(&:to_i) - - [ - MAGIC, - versions[0], - versions[1], - 1, - { - arg_size: argument_size, - local_size: local_table.size, - stack_max: stack.maximum_size - }, - name, - "", - "", - location.start_line, - type, - local_table.names, - argument_options, - [], - insns.map do |insn| - case insn - when Integer, Symbol - insn - when Array - case insn[0] - when :setlocal_WC_0, :setlocal_WC_1, :setlocal, :setblockparam - iseq = self - - case insn[0] - when :setlocal_WC_1 - iseq = iseq.parent_iseq - when :setlocal, :setblockparam - insn[2].times { iseq = iseq.parent_iseq } - end - - # Here we need to map the local variable index to the offset - # from the top of the stack where it will be stored. - [insn[0], iseq.local_table.offset(insn[1]), *insn[2..]] - when :send - # For any instructions that push instruction sequences onto the - # stack, we need to call #to_a on them as well. - [insn[0], insn[1], (insn[2].to_a if insn[2])] - when :once - [insn[0], insn[1].to_a, insn[2]] - else - insn - end - else - insn.to_a(self) - end - end - ] - end - - ########################################################################## - # Child instruction sequence methods - ########################################################################## - - def child_iseq(type, name, location) - InstructionSequence.new( - type, - name, - self, - location, - frozen_string_literal: frozen_string_literal, - operands_unification: operands_unification, - specialized_instruction: specialized_instruction - ) - end - - def block_child_iseq(location) - current = self - current = current.parent_iseq while current.type == :block - child_iseq(:block, "block in #{current.name}", location) - end - - def class_child_iseq(name, location) - child_iseq(:class, "", location) - end - - def method_child_iseq(name, location) - child_iseq(:method, name, location) - end - - def module_child_iseq(name, location) - child_iseq(:class, "", location) - end - - def singleton_class_child_iseq(location) - child_iseq(:class, "singleton class", location) - end - - ########################################################################## - # Instruction push methods - ########################################################################## - - def push(insn) - insns << insn - - case insn - when Integer, Symbol, Array - insn - else - stack.change_by(-insn.pops + insn.pushes) - insn - end - end - - # This creates a new label at the current length of the instruction - # sequence. It is used as the operand for jump instructions. - def label - name = :"label_#{length}" - insns.last == name ? name : event(name) - end - - def event(name) - push(name) - end - - def adjuststack(number) - push(AdjustStack.new(number)) - end - - def anytostring - push(AnyToString.new) - end - - def branchif(label) - push(BranchIf.new(label)) - end - - def branchnil(label) - push(BranchNil.new(label)) - end - - def branchunless(label) - push(BranchUnless.new(label)) - end - - def checkkeyword(keyword_bits_index, keyword_index) - push(CheckKeyword.new(keyword_bits_index, keyword_index)) - end - - def checkmatch(flag) - stack.change_by(-2 + 1) - push([:checkmatch, flag]) - end - - def checktype(type) - stack.change_by(-1 + 2) - push([:checktype, type]) - end - - def concatarray - push(ConcatArray.new) - end - - def concatstrings(number) - push(ConcatStrings.new(number)) - end - - def defined(type, name, message) - push(Defined.new(type, name, message)) - end - - def defineclass(name, class_iseq, flags) - push(DefineClass.new(name, class_iseq, flags)) - end - - def definemethod(name, method_iseq) - push(DefineMethod.new(name, method_iseq)) - end - - def definesmethod(name, method_iseq) - push(DefineSMethod.new(name, method_iseq)) - end - - def dup - push(Dup.new) - end - - def duparray(object) - push(DupArray.new(object)) - end - - def duphash(object) - push(DupHash.new(object)) - end - - def dupn(number) - push(DupN.new(number)) - end - - def expandarray(length, flags) - push(ExpandArray.new(length, flags)) - end - - def getblockparam(index, level) - push(GetBlockParam.new(index, level)) - end - - def getblockparamproxy(index, level) - push(GetBlockParamProxy.new(index, level)) - end - - def getclassvariable(name) - if RUBY_VERSION < "3.0" - push(Legacy::GetClassVariable.new(name)) - else - push(GetClassVariable.new(name, inline_storage_for(name))) - end - end - - def getconstant(name) - push(Legacy::GetConstant.new(name)) - end - - def getglobal(name) - push(GetGlobal.new(name)) - end - - def getinstancevariable(name) - if RUBY_VERSION < "3.2" - push(GetInstanceVariable.new(name, inline_storage_for(name))) - else - push(GetInstanceVariable.new(name, inline_storage)) - end - end - - def getlocal(index, level) - if operands_unification - # Specialize the getlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will look at the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - push(GetLocalWC0.new(index)) - when 1 - push(GetLocalWC1.new(index)) - else - push(GetLocal.new(index, level)) - end - else - push(GetLocal.new(index, level)) - end - end - - def getspecial(key, type) - stack.change_by(-0 + 1) - push([:getspecial, key, type]) - end - - def intern - stack.change_by(-1 + 1) - push([:intern]) - end - - def invokeblock(method_id, argc, flag = VM_CALL_ARGS_SIMPLE) - stack.change_by(-argc + 1) - push([:invokeblock, call_data(method_id, argc, flag)]) - end - - def invokesuper(method_id, argc, flag, block_iseq) - stack.change_by(-(argc + 1) + 1) - - cdata = call_data(method_id, argc, flag) - push([:invokesuper, cdata, block_iseq]) - end - - def jump(index) - stack.change_by(0) - push([:jump, index]) - end - - def leave - stack.change_by(-1) - push([:leave]) - end - - def newarray(length) - stack.change_by(-length + 1) - push([:newarray, length]) - end - - def newarraykwsplat(length) - stack.change_by(-length + 1) - push([:newarraykwsplat, length]) - end - - def newhash(length) - stack.change_by(-length + 1) - push([:newhash, length]) - end - - def newrange(flag) - stack.change_by(-2 + 1) - push([:newrange, flag]) - end - - def nop - stack.change_by(0) - push([:nop]) - end - - def objtostring(method_id, argc, flag) - stack.change_by(-1 + 1) - push([:objtostring, call_data(method_id, argc, flag)]) - end - - def once(postexe_iseq, inline_storage) - stack.change_by(+1) - push([:once, postexe_iseq, inline_storage]) - end - - def opt_aref_with(object, method_id, argc, flag = VM_CALL_ARGS_SIMPLE) - stack.change_by(-1 + 1) - push([:opt_aref_with, object, call_data(method_id, argc, flag)]) - end - - def opt_aset_with(object, method_id, argc, flag = VM_CALL_ARGS_SIMPLE) - stack.change_by(-2 + 1) - push([:opt_aset_with, object, call_data(method_id, argc, flag)]) - end - - def opt_getconstant_path(names) - if RUBY_VERSION >= "3.2" - stack.change_by(+1) - push([:opt_getconstant_path, names]) - else - const_inline_storage = inline_storage - getinlinecache = opt_getinlinecache(-1, const_inline_storage) - - if names[0] == :"" - names.shift - pop - putobject(Object) - end - - names.each_with_index do |name, index| - putobject(index == 0) - getconstant(name) - end - - opt_setinlinecache(const_inline_storage) - getinlinecache[1] = label - end - end - - def opt_getinlinecache(offset, inline_storage) - stack.change_by(+1) - push([:opt_getinlinecache, offset, inline_storage]) - end - - def opt_newarray_max(length) - if specialized_instruction - stack.change_by(-length + 1) - push([:opt_newarray_max, length]) - else - newarray(length) - send(:max, 0) - end - end - - def opt_newarray_min(length) - if specialized_instruction - stack.change_by(-length + 1) - push([:opt_newarray_min, length]) - else - newarray(length) - send(:min, 0) - end - end - - def opt_setinlinecache(inline_storage) - stack.change_by(-1 + 1) - push([:opt_setinlinecache, inline_storage]) - end - - def opt_str_freeze(value) - if specialized_instruction - stack.change_by(+1) - push([:opt_str_freeze, value, call_data(:freeze, 0)]) - else - putstring(value) - send(:freeze, 0) - end - end - - def opt_str_uminus(value) - if specialized_instruction - stack.change_by(+1) - push([:opt_str_uminus, value, call_data(:-@, 0)]) - else - putstring(value) - send(:-@, 0) - end - end - - def pop - stack.change_by(-1) - push([:pop]) - end - - def putnil - stack.change_by(+1) - push([:putnil]) - end - - def putobject(object) - stack.change_by(+1) - - if operands_unification - # Specialize the putobject instruction based on the value of the - # object. If it's 0 or 1, then there's a specialized instruction - # that will push the object onto the stack and requires fewer - # operands. - if object.eql?(0) - push([:putobject_INT2FIX_0_]) - elsif object.eql?(1) - push([:putobject_INT2FIX_1_]) - else - push([:putobject, object]) - end - else - push([:putobject, object]) - end - end - - def putself - stack.change_by(+1) - push([:putself]) - end - - def putspecialobject(object) - stack.change_by(+1) - push([:putspecialobject, object]) - end - - def putstring(object) - stack.change_by(+1) - push([:putstring, object]) - end - - def send(method_id, argc, flag = VM_CALL_ARGS_SIMPLE, block_iseq = nil) - stack.change_by(-(argc + 1) + 1) - cdata = call_data(method_id, argc, flag) - - if specialized_instruction - # Specialize the send instruction. If it doesn't have a block - # attached, then we will replace it with an opt_send_without_block - # and do further specializations based on the called method and the - # number of arguments. - - # stree-ignore - if !block_iseq && (flag & VM_CALL_ARGS_BLOCKARG) == 0 - case [method_id, argc] - when [:length, 0] then push([:opt_length, cdata]) - when [:size, 0] then push([:opt_size, cdata]) - when [:empty?, 0] then push([:opt_empty_p, cdata]) - when [:nil?, 0] then push([:opt_nil_p, cdata]) - when [:succ, 0] then push([:opt_succ, cdata]) - when [:!, 0] then push([:opt_not, cdata]) - when [:+, 1] then push([:opt_plus, cdata]) - when [:-, 1] then push([:opt_minus, cdata]) - when [:*, 1] then push([:opt_mult, cdata]) - when [:/, 1] then push([:opt_div, cdata]) - when [:%, 1] then push([:opt_mod, cdata]) - when [:==, 1] then push([:opt_eq, cdata]) - when [:=~, 1] then push([:opt_regexpmatch2, cdata]) - when [:<, 1] then push([:opt_lt, cdata]) - when [:<=, 1] then push([:opt_le, cdata]) - when [:>, 1] then push([:opt_gt, cdata]) - when [:>=, 1] then push([:opt_ge, cdata]) - when [:<<, 1] then push([:opt_ltlt, cdata]) - when [:[], 1] then push([:opt_aref, cdata]) - when [:&, 1] then push([:opt_and, cdata]) - when [:|, 1] then push([:opt_or, cdata]) - when [:[]=, 2] then push([:opt_aset, cdata]) - when [:!=, 1] - push([:opt_neq, call_data(:==, 1), cdata]) - else - push([:opt_send_without_block, cdata]) - end - else - push([:send, cdata, block_iseq]) - end - else - push([:send, cdata, block_iseq]) - end - end - - def setblockparam(index, level) - stack.change_by(-1) - push([:setblockparam, index, level]) - end - - def setclassvariable(name) - stack.change_by(-1) - - if RUBY_VERSION >= "3.0" - push([:setclassvariable, name, inline_storage_for(name)]) - else - push([:setclassvariable, name]) - end - end - - def setconstant(name) - stack.change_by(-2) - push([:setconstant, name]) - end - - def setglobal(name) - stack.change_by(-1) - push([:setglobal, name]) - end - - def setinstancevariable(name) - stack.change_by(-1) - - if RUBY_VERSION >= "3.2" - push([:setinstancevariable, name, inline_storage]) - else - push([:setinstancevariable, name, inline_storage_for(name)]) - end - end - - def setlocal(index, level) - stack.change_by(-1) - - if operands_unification - # Specialize the setlocal instruction based on the level of the - # local variable. If it's 0 or 1, then there's a specialized - # instruction that will write to the current scope or the parent - # scope, respectively, and requires fewer operands. - case level - when 0 - push([:setlocal_WC_0, index]) - when 1 - push([:setlocal_WC_1, index]) - else - push([:setlocal, index, level]) - end - else - push([:setlocal, index, level]) - end - end - - def setn(number) - stack.change_by(-1 + 1) - push([:setn, number]) - end - - def setspecial(key) - stack.change_by(-1) - push([:setspecial, key]) - end - - def splatarray(flag) - stack.change_by(-1 + 1) - push([:splatarray, flag]) - end - - def swap - stack.change_by(-2 + 2) - push([:swap]) - end - - def topn(number) - stack.change_by(+1) - push([:topn, number]) - end - - def toregexp(options, length) - stack.change_by(-length + 1) - push([:toregexp, options, length]) - end - - private - - # This creates a call data object that is used as the operand for the - # send, invokesuper, and objtostring instructions. - def call_data(method_id, argc, flag = VM_CALL_ARGS_SIMPLE) - { mid: method_id, flag: flag, orig_argc: argc } - end - end - - # These constants correspond to the putspecialobject instruction. They are - # used to represent special objects that are pushed onto the stack. - VM_SPECIAL_OBJECT_VMCORE = 1 - VM_SPECIAL_OBJECT_CBASE = 2 - VM_SPECIAL_OBJECT_CONST_BASE = 3 - - # These constants correspond to the flag passed as part of the call data - # structure on the send instruction. They are used to represent various - # metadata about the callsite (e.g., were keyword arguments used?, was a - # block given?, etc.). - VM_CALL_ARGS_SPLAT = 1 << 0 - VM_CALL_ARGS_BLOCKARG = 1 << 1 - VM_CALL_FCALL = 1 << 2 - VM_CALL_VCALL = 1 << 3 - VM_CALL_ARGS_SIMPLE = 1 << 4 - VM_CALL_BLOCKISEQ = 1 << 5 - VM_CALL_KWARG = 1 << 6 - VM_CALL_KW_SPLAT = 1 << 7 - VM_CALL_TAILCALL = 1 << 8 - VM_CALL_SUPER = 1 << 9 - VM_CALL_ZSUPER = 1 << 10 - VM_CALL_OPT_SEND = 1 << 11 - VM_CALL_KW_SPLAT_MUT = 1 << 12 - - # These constants correspond to the setspecial instruction. - VM_SVAR_LASTLINE = 0 # $_ - VM_SVAR_BACKREF = 1 # $~ - VM_SVAR_FLIPFLOP_START = 2 # flipflop - - # These constants correspond to the checktype instruction. - VM_CHECKTYPE_ARRAY = 7 - - # These constants correspond to the checkmatch instruction. - VM_CHECKMATCH_TYPE_WHEN = 1 - VM_CHECKMATCH_TYPE_CASE = 2 - VM_CHECKMATCH_TYPE_RESCUE = 3 end end diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb index 05c05705..0fb27f7e 100644 --- a/lib/syntax_tree/yarv/bf.rb +++ b/lib/syntax_tree/yarv/bf.rb @@ -20,7 +20,7 @@ def compile iseq.setglobal(:$tape) iseq.getglobal(:$tape) iseq.putobject(0) - iseq.send(:default=, 1) + iseq.send(YARV.calldata(:default=, 1)) # Set up the $cursor global variable that will hold the current position # in the tape. @@ -99,17 +99,17 @@ def change_by(iseq, value) iseq.getglobal(:$tape) iseq.getglobal(:$cursor) - iseq.send(:[], 1) + iseq.send(YARV.calldata(:[], 1)) if value < 0 iseq.putobject(-value) - iseq.send(:-, 1) + iseq.send(YARV.calldata(:-, 1)) else iseq.putobject(value) - iseq.send(:+, 1) + iseq.send(YARV.calldata(:+, 1)) end - iseq.send(:[]=, 2) + iseq.send(YARV.calldata(:[]=, 2)) end # $cursor += value @@ -118,10 +118,10 @@ def shift_by(iseq, value) if value < 0 iseq.putobject(-value) - iseq.send(:-, 1) + iseq.send(YARV.calldata(:-, 1)) else iseq.putobject(value) - iseq.send(:+, 1) + iseq.send(YARV.calldata(:+, 1)) end iseq.setglobal(:$cursor) @@ -133,10 +133,10 @@ def output_char(iseq) iseq.getglobal(:$tape) iseq.getglobal(:$cursor) - iseq.send(:[], 1) - iseq.send(:chr, 0) + iseq.send(YARV.calldata(:[], 1)) + iseq.send(YARV.calldata(:chr)) - iseq.send(:putc, 1) + iseq.send(YARV.calldata(:putc, 1)) end # $tape[$cursor] = $stdin.getc.ord @@ -145,10 +145,10 @@ def input_char(iseq) iseq.getglobal(:$cursor) iseq.getglobal(:$stdin) - iseq.send(:getc, 0) - iseq.send(:ord, 0) + iseq.send(YARV.calldata(:getc)) + iseq.send(YARV.calldata(:ord)) - iseq.send(:[]=, 2) + iseq.send(YARV.calldata(:[]=, 2)) end # unless $tape[$cursor] == 0 @@ -157,10 +157,10 @@ def loop_start(iseq) iseq.getglobal(:$tape) iseq.getglobal(:$cursor) - iseq.send(:[], 1) + iseq.send(YARV.calldata(:[], 1)) iseq.putobject(0) - iseq.send(:==, 1) + iseq.send(YARV.calldata(:==, 1)) branchunless = iseq.branchunless(-1) [start_label, branchunless] diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb new file mode 100644 index 00000000..45f2bb59 --- /dev/null +++ b/lib/syntax_tree/yarv/compiler.rb @@ -0,0 +1,2164 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This class is an experiment in transforming Syntax Tree nodes into their + # corresponding YARV instruction sequences. It attempts to mirror the + # behavior of RubyVM::InstructionSequence.compile. + # + # You use this as with any other visitor. First you parse code into a tree, + # then you visit it with this compiler. Visiting the root node of the tree + # will return a SyntaxTree::Visitor::Compiler::InstructionSequence object. + # With that object you can call #to_a on it, which will return a serialized + # form of the instruction sequence as an array. This array _should_ mirror + # the array given by RubyVM::InstructionSequence#to_a. + # + # As an example, here is how you would compile a single expression: + # + # program = SyntaxTree.parse("1 + 2") + # program.accept(SyntaxTree::YARV::Compiler.new).to_a + # + # [ + # "YARVInstructionSequence/SimpleDataFormat", + # 3, + # 1, + # 1, + # {:arg_size=>0, :local_size=>0, :stack_max=>2}, + # "", + # "", + # "", + # 1, + # :top, + # [], + # {}, + # [], + # [ + # [:putobject_INT2FIX_1_], + # [:putobject, 2], + # [:opt_plus, {:mid=>:+, :flag=>16, :orig_argc=>1}], + # [:leave] + # ] + # ] + # + # Note that this is the same output as calling: + # + # RubyVM::InstructionSequence.compile("1 + 2").to_a + # + class Compiler < BasicVisitor + # This visitor is responsible for converting Syntax Tree nodes into their + # corresponding Ruby structures. This is used to convert the operands of + # some instructions like putobject that push a Ruby object directly onto + # the stack. It is only used when the entire structure can be represented + # at compile-time, as opposed to constructed at run-time. + class RubyVisitor < BasicVisitor + # This error is raised whenever a node cannot be converted into a Ruby + # object at compile-time. + class CompilationError < StandardError + end + + # This will attempt to compile the given node. If it's possible, then + # it will return the compiled object. Otherwise it will return nil. + def self.compile(node) + node.accept(new) + rescue CompilationError + end + + def visit_array(node) + visit_all(node.contents.parts) + end + + def visit_bare_assoc_hash(node) + node.assocs.to_h do |assoc| + # We can only convert regular key-value pairs. A double splat ** + # operator means it has to be converted at run-time. + raise CompilationError unless assoc.is_a?(Assoc) + [visit(assoc.key), visit(assoc.value)] + end + end + + def visit_float(node) + node.value.to_f + end + + alias visit_hash visit_bare_assoc_hash + + def visit_imaginary(node) + node.value.to_c + end + + def visit_int(node) + node.value.to_i + end + + def visit_label(node) + node.value.chomp(":").to_sym + end + + def visit_mrhs(node) + visit_all(node.parts) + end + + def visit_qsymbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_qwords(node) + visit_all(node.elements) + end + + def visit_range(node) + left, right = [visit(node.left), visit(node.right)] + node.operator.value === ".." ? left..right : left...right + end + + def visit_rational(node) + node.value.to_r + end + + def visit_regexp_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + Regexp.new(node.parts.first.value, visit_regexp_literal_flags(node)) + else + # Any interpolation of expressions or variables will result in the + # regular expression being constructed at run-time. + raise CompilationError + end + end + + # This isn't actually a visit method, though maybe it should be. It is + # responsible for converting the set of string options on a regular + # expression into its equivalent integer. + def visit_regexp_literal_flags(node) + node + .options + .chars + .inject(0) do |accum, option| + accum | + case option + when "i" + Regexp::IGNORECASE + when "x" + Regexp::EXTENDED + when "m" + Regexp::MULTILINE + else + raise "Unknown regexp option: #{option}" + end + end + end + + def visit_symbol_literal(node) + node.value.value.to_sym + end + + def visit_symbols(node) + node.elements.map { |element| visit(element).to_sym } + end + + def visit_tstring_content(node) + node.value + end + + def visit_var_ref(node) + raise CompilationError unless node.value.is_a?(Kw) + + case node.value.value + when "nil" + nil + when "true" + true + when "false" + false + else + raise CompilationError + end + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + node.parts.first.value + else + # Any interpolation of expressions or variables will result in the + # string being constructed at run-time. + raise CompilationError + end + end + + def visit_words(node) + visit_all(node.elements) + end + + def visit_unsupported(_node) + raise CompilationError + end + + # Please forgive the metaprogramming here. This is used to create visit + # methods for every node that we did not explicitly handle. By default + # each of these methods will raise a CompilationError. + handled = instance_methods(false) + (Visitor.instance_methods(false) - handled).each do |method| + alias_method method, :visit_unsupported + end + end + + # These options mirror the compilation options that we currently support + # that can be also passed to RubyVM::InstructionSequence.compile. + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + # The current instruction sequence that is being compiled. + attr_reader :iseq + + # A boolean to track if we're currently compiling the last statement + # within a set of statements. This information is necessary to determine + # if we need to return the value of the last statement. + attr_reader :last_statement + + def initialize( + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction + + @iseq = nil + @last_statement = false + end + + def visit_BEGIN(node) + visit(node.statements) + end + + def visit_CHAR(node) + if frozen_string_literal + iseq.putobject(node.value[1..]) + else + iseq.putstring(node.value[1..]) + end + end + + def visit_END(node) + once_iseq = + with_child_iseq(iseq.block_child_iseq(node.location)) do + postexe_iseq = + with_child_iseq(iseq.block_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_B_CALL) + + *statements, last_statement = node.statements.body + visit_all(statements) + with_last_statement { visit(last_statement) } + + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.send( + YARV.calldata(:"core#set_postexe", 0, CallData::CALL_FCALL), + postexe_iseq + ) + iseq.leave + end + + iseq.once(once_iseq, iseq.inline_storage) + iseq.pop + end + + def visit_alias(node) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putspecialobject(PutSpecialObject::OBJECT_CBASE) + visit(node.left) + visit(node.right) + iseq.send(YARV.calldata(:"core#set_method_alias", 3)) + end + + def visit_aref(node) + calldata = YARV.calldata(:[], 1) + visit(node.collection) + + if !frozen_string_literal && specialized_instruction && + (node.index.parts.length == 1) + arg = node.index.parts.first + + if arg.is_a?(StringLiteral) && (arg.parts.length == 1) + string_part = arg.parts.first + + if string_part.is_a?(TStringContent) + iseq.opt_aref_with(string_part.value, calldata) + return + end + end + end + + visit(node.index) + iseq.send(calldata) + end + + def visit_arg_block(node) + visit(node.value) + end + + def visit_arg_paren(node) + visit(node.arguments) + end + + def visit_arg_star(node) + visit(node.value) + iseq.splatarray(false) + end + + def visit_args(node) + visit_all(node.parts) + end + + def visit_array(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + elsif node.contents && node.contents.parts.length == 1 && + node.contents.parts.first.is_a?(BareAssocHash) && + node.contents.parts.first.assocs.length == 1 && + node.contents.parts.first.assocs.first.is_a?(AssocSplat) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.newhash(0) + visit(node.contents.parts.first) + iseq.send(YARV.calldata(:"core#hash_merge_kwd", 2)) + iseq.newarraykwsplat(1) + else + length = 0 + + node.contents.parts.each do |part| + if part.is_a?(ArgStar) + if length > 0 + iseq.newarray(length) + length = 0 + end + + visit(part.value) + iseq.concatarray + else + visit(part) + length += 1 + end + end + + iseq.newarray(length) if length > 0 + iseq.concatarray if length > 0 && length != node.contents.parts.length + end + end + + def visit_aryptn(node) + match_failures = [] + jumps_to_exit = [] + + # If there's a constant, then check if we match against that constant or + # not first. Branch to failure if we don't. + if node.constant + iseq.dup + visit(node.constant) + iseq.checkmatch(CheckMatch::TYPE_CASE) + match_failures << iseq.branchunless(-1) + end + + # First, check if the #deconstruct cache is nil. If it is, we're going + # to call #deconstruct on the object and cache the result. + iseq.topn(2) + branchnil = iseq.branchnil(-1) + + # Next, ensure that the cached value was cached correctly, otherwise + # fail the match. + iseq.topn(2) + match_failures << iseq.branchunless(-1) + + # Since we have a valid cached value, we can skip past the part where we + # call #deconstruct on the object. + iseq.pop + iseq.topn(1) + jump = iseq.jump(-1) + + # Check if the object responds to #deconstruct, fail the match + # otherwise. + branchnil.patch!(iseq) + iseq.dup + iseq.putobject(:deconstruct) + iseq.send(YARV.calldata(:respond_to?, 1)) + iseq.setn(3) + match_failures << iseq.branchunless(-1) + + # Call #deconstruct and ensure that it's an array, raise an error + # otherwise. + iseq.send(YARV.calldata(:deconstruct)) + iseq.setn(2) + iseq.dup + iseq.checktype(CheckType::TYPE_ARRAY) + match_error = iseq.branchunless(-1) + + # Ensure that the deconstructed array has the correct size, fail the + # match otherwise. + jump.patch!(iseq) + iseq.dup + iseq.send(YARV.calldata(:length)) + iseq.putobject(node.requireds.length) + iseq.send(YARV.calldata(:==, 1)) + match_failures << iseq.branchunless(-1) + + # For each required element, check if the deconstructed array contains + # the element, otherwise jump out to the top-level match failure. + iseq.dup + node.requireds.each_with_index do |required, index| + iseq.putobject(index) + iseq.send(YARV.calldata(:[], 1)) + + case required + when VarField + lookup = visit(required) + iseq.setlocal(lookup.index, lookup.level) + else + visit(required) + iseq.checkmatch(CheckMatch::TYPE_CASE) + match_failures << iseq.branchunless(-1) + end + + if index < node.requireds.length - 1 + iseq.dup + else + iseq.pop + jumps_to_exit << iseq.jump(-1) + end + end + + # Set up the routine here to raise an error to indicate that the type of + # the deconstructed array was incorrect. + match_error.patch!(iseq) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putobject(TypeError) + iseq.putobject("deconstruct must return Array") + iseq.send(YARV.calldata(:"core#raise", 2)) + iseq.pop + + # Patch all of the match failures to jump here so that we pop a final + # value before returning to the parent node. + match_failures.each { |match_failure| match_failure.patch!(iseq) } + iseq.pop + jumps_to_exit + end + + def visit_assign(node) + case node.target + when ARefField + calldata = YARV.calldata(:[]=, 2) + + if !frozen_string_literal && specialized_instruction && + (node.target.index.parts.length == 1) + arg = node.target.index.parts.first + + if arg.is_a?(StringLiteral) && (arg.parts.length == 1) + string_part = arg.parts.first + + if string_part.is_a?(TStringContent) + visit(node.target.collection) + visit(node.value) + iseq.swap + iseq.topn(1) + iseq.opt_aset_with(string_part.value, calldata) + iseq.pop + return + end + end + end + + iseq.putnil + visit(node.target.collection) + visit(node.target.index) + visit(node.value) + iseq.setn(3) + iseq.send(calldata) + iseq.pop + when ConstPathField + names = constant_names(node.target) + name = names.pop + + if RUBY_VERSION >= "3.2" + iseq.opt_getconstant_path(names) + visit(node.value) + iseq.swap + iseq.topn(1) + iseq.swap + iseq.setconstant(name) + else + visit(node.value) + iseq.dup if last_statement? + iseq.opt_getconstant_path(names) + iseq.setconstant(name) + end + when Field + iseq.putnil + visit(node.target) + visit(node.value) + iseq.setn(2) + iseq.send(YARV.calldata(:"#{node.target.name.value}=", 1)) + iseq.pop + when TopConstField + name = node.target.constant.value.to_sym + + if RUBY_VERSION >= "3.2" + iseq.putobject(Object) + visit(node.value) + iseq.swap + iseq.topn(1) + iseq.swap + iseq.setconstant(name) + else + visit(node.value) + iseq.dup if last_statement? + iseq.putobject(Object) + iseq.setconstant(name) + end + when VarField + visit(node.value) + iseq.dup if last_statement? + + case node.target.value + when Const + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + iseq.setconstant(node.target.value.value.to_sym) + when CVar + iseq.setclassvariable(node.target.value.value.to_sym) + when GVar + iseq.setglobal(node.target.value.value.to_sym) + when Ident + lookup = visit(node.target) + + if lookup.local.is_a?(LocalTable::BlockLocal) + iseq.setblockparam(lookup.index, lookup.level) + else + iseq.setlocal(lookup.index, lookup.level) + end + when IVar + iseq.setinstancevariable(node.target.value.value.to_sym) + end + end + end + + def visit_assoc(node) + visit(node.key) + visit(node.value) + end + + def visit_assoc_splat(node) + visit(node.value) + end + + def visit_backref(node) + iseq.getspecial(GetSpecial::SVAR_BACKREF, node.value[1..].to_i << 1) + end + + def visit_bare_assoc_hash(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duphash(compiled) + else + visit_all(node.assocs) + end + end + + def visit_binary(node) + case node.operator + when :"&&" + visit(node.left) + iseq.dup + + branchunless = iseq.branchunless(-1) + iseq.pop + + visit(node.right) + branchunless.patch!(iseq) + when :"||" + visit(node.left) + iseq.dup + + branchif = iseq.branchif(-1) + iseq.pop + + visit(node.right) + branchif.patch!(iseq) + else + visit(node.left) + visit(node.right) + iseq.send(YARV.calldata(node.operator, 1)) + end + end + + def visit_block(node) + with_child_iseq(iseq.block_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_B_CALL) + visit(node.block_var) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + end + + def visit_block_var(node) + params = node.params + + if params.requireds.length == 1 && params.optionals.empty? && + !params.rest && params.posts.empty? && params.keywords.empty? && + !params.keyword_rest && !params.block + iseq.argument_options[:ambiguous_param0] = true + end + + visit(node.params) + + node.locals.each { |local| iseq.local_table.plain(local.value.to_sym) } + end + + def visit_blockarg(node) + iseq.argument_options[:block_start] = iseq.argument_size + iseq.local_table.block(node.name.value.to_sym) + iseq.argument_size += 1 + end + + def visit_bodystmt(node) + visit(node.statements) + end + + def visit_call(node) + if node.is_a?(CallNode) + return( + visit_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: nil, + location: node.location + ) + ) + ) + end + + arg_parts = argument_parts(node.arguments) + argc = arg_parts.length + + # First we're going to check if we're calling a method on an array + # literal without any arguments. In that case there are some + # specializations we might be able to perform. + if argc == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) + case node.receiver + when ArrayLiteral + parts = node.receiver.contents&.parts || [] + + if parts.none? { |part| part.is_a?(ArgStar) } && + RubyVisitor.compile(node.receiver).nil? + case node.message.value + when "max" + visit(node.receiver.contents) + iseq.opt_newarray_max(parts.length) + return + when "min" + visit(node.receiver.contents) + iseq.opt_newarray_min(parts.length) + return + end + end + when StringLiteral + if RubyVisitor.compile(node.receiver).nil? + case node.message.value + when "-@" + iseq.opt_str_uminus(node.receiver.parts.first.value) + return + when "freeze" + iseq.opt_str_freeze(node.receiver.parts.first.value) + return + end + end + end + end + + if node.receiver + if node.receiver.is_a?(VarRef) + lookup = iseq.local_variable(node.receiver.value.value.to_sym) + + if lookup.local.is_a?(LocalTable::BlockLocal) + iseq.getblockparamproxy(lookup.index, lookup.level) + else + visit(node.receiver) + end + else + visit(node.receiver) + end + else + iseq.putself + end + + branchnil = + if node.operator&.value == "&." + iseq.dup + iseq.branchnil(-1) + end + + flag = 0 + + arg_parts.each do |arg_part| + case arg_part + when ArgBlock + argc -= 1 + flag |= CallData::CALL_ARGS_BLOCKARG + visit(arg_part) + when ArgStar + flag |= CallData::CALL_ARGS_SPLAT + visit(arg_part) + when ArgsForward + flag |= CallData::CALL_ARGS_SPLAT + flag |= CallData::CALL_ARGS_BLOCKARG + + lookup = iseq.local_table.find(:*) + iseq.getlocal(lookup.index, lookup.level) + iseq.splatarray(arg_parts.length != 1) + + lookup = iseq.local_table.find(:&) + iseq.getblockparamproxy(lookup.index, lookup.level) + when BareAssocHash + flag |= CallData::CALL_KW_SPLAT + visit(arg_part) + else + visit(arg_part) + end + end + + block_iseq = visit(node.block) if node.block + flag |= CallData::CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 + flag |= CallData::CALL_FCALL if node.receiver.nil? + + iseq.send( + YARV.calldata(node.message.value.to_sym, argc, flag), + block_iseq + ) + branchnil.patch!(iseq) if branchnil + end + + def visit_case(node) + visit(node.value) if node.value + + clauses = [] + else_clause = nil + current = node.consequent + + while current + clauses << current + + if (current = current.consequent).is_a?(Else) + else_clause = current + break + end + end + + branches = + clauses.map do |clause| + visit(clause.arguments) + iseq.topn(1) + iseq.send( + YARV.calldata( + :===, + 1, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE + ) + ) + [clause, iseq.branchif(:label_00)] + end + + iseq.pop + else_clause ? visit(else_clause) : iseq.putnil + iseq.leave + + branches.each_with_index do |(clause, branchif), index| + iseq.leave if index != 0 + branchif.patch!(iseq) + iseq.pop + visit(clause) + end + end + + def visit_class(node) + name = node.constant.constant.value.to_sym + class_iseq = + with_child_iseq(iseq.class_child_iseq(name, node.location)) do + iseq.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_END) + iseq.leave + end + + flags = DefineClass::TYPE_CLASS + + case node.constant + when ConstPathRef + flags |= DefineClass::FLAG_SCOPED + visit(node.constant.parent) + when ConstRef + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + when TopConstRef + flags |= DefineClass::FLAG_SCOPED + iseq.putobject(Object) + end + + if node.superclass + flags |= DefineClass::FLAG_HAS_SUPERCLASS + visit(node.superclass) + else + iseq.putnil + end + + iseq.defineclass(name, class_iseq, flags) + end + + def visit_command(node) + visit_call( + CommandCall.new( + receiver: nil, + operator: nil, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_command_call(node) + visit_call( + CommandCall.new( + receiver: node.receiver, + operator: node.operator, + message: node.message, + arguments: node.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_const_path_field(node) + visit(node.parent) + end + + def visit_const_path_ref(node) + names = constant_names(node) + iseq.opt_getconstant_path(names) + end + + def visit_def(node) + name = node.name.value.to_sym + method_iseq = iseq.method_child_iseq(name.to_s, node.location) + + with_child_iseq(method_iseq) do + visit(node.params) if node.params + iseq.event(:RUBY_EVENT_CALL) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_RETURN) + iseq.leave + end + + if node.target + visit(node.target) + iseq.definesmethod(name, method_iseq) + else + iseq.definemethod(name, method_iseq) + end + + iseq.putobject(name) + end + + def visit_defined(node) + case node.value + when Assign + # If we're assigning to a local variable, then we need to make sure + # that we put it into the local table. + if node.value.target.is_a?(VarField) && + node.value.target.value.is_a?(Ident) + iseq.local_table.plain(node.value.target.value.value.to_sym) + end + + iseq.putobject("assignment") + when VarRef + value = node.value.value + name = value.value.to_sym + + case value + when Const + iseq.putnil + iseq.defined(Defined::TYPE_CONST, name, "constant") + when CVar + iseq.putnil + iseq.defined(Defined::TYPE_CVAR, name, "class variable") + when GVar + iseq.putnil + iseq.defined(Defined::TYPE_GVAR, name, "global-variable") + when Ident + iseq.putobject("local-variable") + when IVar + iseq.putnil + iseq.defined(Defined::TYPE_IVAR, name, "instance-variable") + when Kw + case name + when :false + iseq.putobject("false") + when :nil + iseq.putobject("nil") + when :self + iseq.putobject("self") + when :true + iseq.putobject("true") + end + end + when VCall + iseq.putself + + name = node.value.value.value.to_sym + iseq.defined(Defined::TYPE_FUNC, name, "method") + when YieldNode + iseq.putnil + iseq.defined(Defined::TYPE_YIELD, false, "yield") + when ZSuper + iseq.putnil + iseq.defined(Defined::TYPE_ZSUPER, false, "super") + else + iseq.putobject("expression") + end + end + + def visit_dyna_symbol(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + iseq.putobject(node.parts.first.value.to_sym) + end + end + + def visit_else(node) + visit(node.statements) + iseq.pop unless last_statement? + end + + def visit_elsif(node) + visit_if( + IfNode.new( + predicate: node.predicate, + statements: node.statements, + consequent: node.consequent, + location: node.location + ) + ) + end + + def visit_field(node) + visit(node.parent) + end + + def visit_float(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_for(node) + visit(node.collection) + + name = node.index.value.value.to_sym + iseq.local_table.plain(name) + + block_iseq = + with_child_iseq(iseq.block_child_iseq(node.statements.location)) do + iseq.argument_options[:lead_num] ||= 0 + iseq.argument_options[:lead_num] += 1 + iseq.argument_options[:ambiguous_param0] = true + + iseq.argument_size += 1 + iseq.local_table.plain(2) + + iseq.getlocal(0, 0) + + local_variable = iseq.local_variable(name) + iseq.setlocal(local_variable.index, local_variable.level) + + iseq.event(:RUBY_EVENT_B_CALL) + iseq.nop + + visit(node.statements) + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + + iseq.send(YARV.calldata(:each, 0, 0), block_iseq) + end + + def visit_hash(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duphash(compiled) + else + visit_all(node.assocs) + iseq.newhash(node.assocs.length * 2) + end + end + + def visit_heredoc(node) + if node.beginning.value.end_with?("`") + visit_xstring_literal(node) + elsif node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + iseq.concatstrings(length) + end + end + + def visit_if(node) + if node.predicate.is_a?(RangeNode) + iseq.getspecial(GetSpecial::SVAR_FLIPFLOP_START, 0) + branchif = iseq.branchif(-1) + + visit(node.predicate.left) + branchunless_true = iseq.branchunless(-1) + + iseq.putobject(true) + iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) + branchif.patch!(iseq) + + visit(node.predicate.right) + branchunless_false = iseq.branchunless(-1) + + iseq.putobject(false) + iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) + branchunless_false.patch!(iseq) + + visit(node.statements) + iseq.leave + branchunless_true.patch!(iseq) + iseq.putnil + else + visit(node.predicate) + branchunless = iseq.branchunless(-1) + visit(node.statements) + + if last_statement? + iseq.leave + branchunless.patch!(iseq) + + node.consequent ? visit(node.consequent) : iseq.putnil + else + iseq.pop + + if node.consequent + jump = iseq.jump(-1) + branchunless.patch!(iseq) + visit(node.consequent) + jump.patch!(iseq) + else + branchunless.patch!(iseq) + end + end + end + end + + def visit_if_op(node) + visit_if( + IfNode.new( + predicate: node.predicate, + statements: node.truthy, + consequent: + Else.new( + keyword: Kw.new(value: "else", location: Location.default), + statements: node.falsy, + location: Location.default + ), + location: Location.default + ) + ) + end + + def visit_imaginary(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_int(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_kwrest_param(node) + iseq.argument_options[:kwrest] = iseq.argument_size + iseq.argument_size += 1 + iseq.local_table.plain(node.name.value.to_sym) + end + + def visit_label(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_lambda(node) + lambda_iseq = + with_child_iseq(iseq.block_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_B_CALL) + visit(node.params) + visit(node.statements) + iseq.event(:RUBY_EVENT_B_RETURN) + iseq.leave + end + + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.send(YARV.calldata(:lambda, 0, CallData::CALL_FCALL), lambda_iseq) + end + + def visit_lambda_var(node) + visit_block_var(node) + end + + def visit_massign(node) + visit(node.value) + iseq.dup + visit(node.target) + end + + def visit_method_add_block(node) + visit_call( + CommandCall.new( + receiver: node.call.receiver, + operator: node.call.operator, + message: node.call.message, + arguments: node.call.arguments, + block: node.block, + location: node.location + ) + ) + end + + def visit_mlhs(node) + lookups = [] + node.parts.each do |part| + case part + when VarField + lookups << visit(part) + end + end + + iseq.expandarray(lookups.length, 0) + lookups.each { |lookup| iseq.setlocal(lookup.index, lookup.level) } + end + + def visit_module(node) + name = node.constant.constant.value.to_sym + module_iseq = + with_child_iseq(iseq.module_child_iseq(name, node.location)) do + iseq.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_END) + iseq.leave + end + + flags = DefineClass::TYPE_MODULE + + case node.constant + when ConstPathRef + flags |= DefineClass::FLAG_SCOPED + visit(node.constant.parent) + when ConstRef + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + when TopConstRef + flags |= DefineClass::FLAG_SCOPED + iseq.putobject(Object) + end + + iseq.putnil + iseq.defineclass(name, module_iseq, flags) + end + + def visit_mrhs(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + else + visit_all(node.parts) + iseq.newarray(node.parts.length) + end + end + + def visit_not(node) + visit(node.statement) + iseq.send(YARV.calldata(:!)) + end + + def visit_opassign(node) + flag = CallData::CALL_ARGS_SIMPLE + if node.target.is_a?(ConstPathField) || node.target.is_a?(TopConstField) + flag |= CallData::CALL_FCALL + end + + case (operator = node.operator.value.chomp("=").to_sym) + when :"&&" + branchunless = nil + + with_opassign(node) do + iseq.dup + branchunless = iseq.branchunless(-1) + iseq.pop + visit(node.value) + end + + case node.target + when ARefField + iseq.leave + branchunless.patch!(iseq) + iseq.setn(3) + iseq.adjuststack(3) + when ConstPathField, TopConstField + branchunless.patch!(iseq) + iseq.swap + iseq.pop + else + branchunless.patch!(iseq) + end + when :"||" + if node.target.is_a?(ConstPathField) || + node.target.is_a?(TopConstField) + opassign_defined(node) + iseq.swap + iseq.pop + elsif node.target.is_a?(VarField) && + [Const, CVar, GVar].include?(node.target.value.class) + opassign_defined(node) + else + branchif = nil + + with_opassign(node) do + iseq.dup + branchif = iseq.branchif(-1) + iseq.pop + visit(node.value) + end + + if node.target.is_a?(ARefField) + iseq.leave + branchif.patch!(iseq) + iseq.setn(3) + iseq.adjuststack(3) + else + branchif.patch!(iseq) + end + end + else + with_opassign(node) do + visit(node.value) + iseq.send(YARV.calldata(operator, 1, flag)) + end + end + end + + def visit_params(node) + argument_options = iseq.argument_options + + if node.requireds.any? + argument_options[:lead_num] = 0 + + node.requireds.each do |required| + iseq.local_table.plain(required.value.to_sym) + iseq.argument_size += 1 + argument_options[:lead_num] += 1 + end + end + + node.optionals.each do |(optional, value)| + index = iseq.local_table.size + name = optional.value.to_sym + + iseq.local_table.plain(name) + iseq.argument_size += 1 + + argument_options[:opt] = [iseq.label] unless argument_options.key?( + :opt + ) + + visit(value) + iseq.setlocal(index, 0) + iseq.argument_options[:opt] << iseq.label + end + + visit(node.rest) if node.rest + + if node.posts.any? + argument_options[:post_start] = iseq.argument_size + argument_options[:post_num] = 0 + + node.posts.each do |post| + iseq.local_table.plain(post.value.to_sym) + iseq.argument_size += 1 + argument_options[:post_num] += 1 + end + end + + if node.keywords.any? + argument_options[:kwbits] = 0 + argument_options[:keyword] = [] + + keyword_bits_name = node.keyword_rest ? 3 : 2 + iseq.argument_size += 1 + keyword_bits_index = iseq.local_table.locals.size + node.keywords.size + + node.keywords.each_with_index do |(keyword, value), keyword_index| + name = keyword.value.chomp(":").to_sym + index = iseq.local_table.size + + iseq.local_table.plain(name) + iseq.argument_size += 1 + argument_options[:kwbits] += 1 + + if value.nil? + argument_options[:keyword] << name + elsif (compiled = RubyVisitor.compile(value)) + argument_options[:keyword] << [name, compiled] + else + argument_options[:keyword] << [name] + iseq.checkkeyword(keyword_bits_index, keyword_index) + branchif = iseq.branchif(-1) + visit(value) + iseq.setlocal(index, 0) + branchif.patch!(iseq) + end + end + + iseq.local_table.plain(keyword_bits_name) + end + + if node.keyword_rest.is_a?(ArgsForward) + iseq.local_table.plain(:*) + iseq.local_table.plain(:&) + + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_options[:block_start] = iseq.argument_size + 1 + + iseq.argument_size += 2 + elsif node.keyword_rest + visit(node.keyword_rest) + end + + visit(node.block) if node.block + end + + def visit_paren(node) + visit(node.contents) + end + + def visit_program(node) + node.statements.body.each do |statement| + break unless statement.is_a?(Comment) + + if statement.value == "# frozen_string_literal: true" + @frozen_string_literal = true + end + end + + preexes = [] + statements = [] + + node.statements.body.each do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + # ignore + when BEGINBlock + preexes << statement + else + statements << statement + end + end + + top_iseq = + InstructionSequence.new( + :top, + "", + nil, + node.location, + frozen_string_literal: frozen_string_literal, + operands_unification: operands_unification, + specialized_instruction: specialized_instruction + ) + + with_child_iseq(top_iseq) do + visit_all(preexes) + + if statements.empty? + iseq.putnil + else + *statements, last_statement = statements + visit_all(statements) + with_last_statement { visit(last_statement) } + end + + iseq.leave + end + end + + def visit_qsymbols(node) + iseq.duparray(node.accept(RubyVisitor.new)) + end + + def visit_qwords(node) + if frozen_string_literal + iseq.duparray(node.accept(RubyVisitor.new)) + else + visit_all(node.elements) + iseq.newarray(node.elements.length) + end + end + + def visit_range(node) + if (compiled = RubyVisitor.compile(node)) + iseq.putobject(compiled) + else + visit(node.left) + visit(node.right) + iseq.newrange(node.operator.value == ".." ? 0 : 1) + end + end + + def visit_rassign(node) + iseq.putnil + + if node.operator.is_a?(Kw) + jumps = [] + + visit(node.value) + iseq.dup + + case node.pattern + when VarField + lookup = visit(node.pattern) + iseq.setlocal(lookup.index, lookup.level) + jumps << iseq.jump(-1) + else + jumps.concat(visit(node.pattern)) + end + + iseq.pop + iseq.pop + iseq.putobject(false) + iseq.leave + + jumps.each { |jump| jump.patch!(iseq) } + iseq.adjuststack(2) + iseq.putobject(true) + else + jumps_to_match = [] + + iseq.putnil + iseq.putobject(false) + iseq.putnil + iseq.putnil + visit(node.value) + iseq.dup + + # Visit the pattern. If it matches, + case node.pattern + when VarField + lookup = visit(node.pattern) + iseq.setlocal(lookup.index, lookup.level) + jumps_to_match << iseq.jump(-1) + else + jumps_to_match.concat(visit(node.pattern)) + end + + # First we're going to push the core onto the stack, then we'll check + # if the value to match is truthy. If it is, we'll jump down to raise + # NoMatchingPatternKeyError. Otherwise we'll raise + # NoMatchingPatternError. + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.topn(4) + branchif_no_key = iseq.branchif(-1) + + # Here we're going to raise NoMatchingPatternError. + iseq.putobject(NoMatchingPatternError) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putobject("%p: %s") + iseq.topn(4) + iseq.topn(7) + iseq.send(YARV.calldata(:"core#sprintf", 3)) + iseq.send(YARV.calldata(:"core#raise", 2)) + jump_to_exit = iseq.jump(-1) + + # Here we're going to raise NoMatchingPatternKeyError. + branchif_no_key.patch!(iseq) + iseq.putobject(NoMatchingPatternKeyError) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putobject("%p: %s") + iseq.topn(4) + iseq.topn(7) + iseq.send(YARV.calldata(:"core#sprintf", 3)) + iseq.topn(7) + iseq.topn(9) + iseq.send( + YARV.calldata(:new, 1, CallData::CALL_KWARG, %i[matchee key]) + ) + iseq.send(YARV.calldata(:"core#raise", 1)) + + # This runs when the pattern fails to match. + jump_to_exit.patch!(iseq) + iseq.adjuststack(7) + iseq.putnil + iseq.leave + + # This runs when the pattern matches successfully. + jumps_to_match.each { |jump| jump.patch!(iseq) } + iseq.adjuststack(6) + iseq.putnil + end + end + + def visit_rational(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_regexp_literal(node) + if (compiled = RubyVisitor.compile(node)) + iseq.putobject(compiled) + else + flags = RubyVisitor.new.visit_regexp_literal_flags(node) + length = visit_string_parts(node) + iseq.toregexp(flags, length) + end + end + + def visit_rest_param(node) + iseq.local_table.plain(node.name.value.to_sym) + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_size += 1 + end + + def visit_sclass(node) + visit(node.target) + iseq.putnil + + singleton_iseq = + with_child_iseq(iseq.singleton_class_child_iseq(node.location)) do + iseq.event(:RUBY_EVENT_CLASS) + visit(node.bodystmt) + iseq.event(:RUBY_EVENT_END) + iseq.leave + end + + iseq.defineclass( + :singletonclass, + singleton_iseq, + DefineClass::TYPE_SINGLETON_CLASS + ) + end + + def visit_statements(node) + statements = + node.body.select do |statement| + case statement + when Comment, EmbDoc, EndContent, VoidStmt + false + else + true + end + end + + statements.empty? ? iseq.putnil : visit_all(statements) + end + + def visit_string_concat(node) + value = node.left.parts.first.value + node.right.parts.first.value + + visit_string_literal( + StringLiteral.new( + parts: [TStringContent.new(value: value, location: node.location)], + quote: node.left.quote, + location: node.location + ) + ) + end + + def visit_string_embexpr(node) + visit(node.statements) + end + + def visit_string_literal(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + iseq.concatstrings(length) + end + end + + def visit_super(node) + iseq.putself + visit(node.arguments) + iseq.invokesuper( + YARV.calldata( + nil, + argument_parts(node.arguments).length, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | + CallData::CALL_SUPER + ), + nil + ) + end + + def visit_symbol_literal(node) + iseq.putobject(node.accept(RubyVisitor.new)) + end + + def visit_symbols(node) + if (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + else + node.elements.each do |element| + if element.parts.length == 1 && + element.parts.first.is_a?(TStringContent) + iseq.putobject(element.parts.first.value.to_sym) + else + length = visit_string_parts(element) + iseq.concatstrings(length) + iseq.intern + end + end + + iseq.newarray(node.elements.length) + end + end + + def visit_top_const_ref(node) + iseq.opt_getconstant_path(constant_names(node)) + end + + def visit_tstring_content(node) + if frozen_string_literal + iseq.putobject(node.accept(RubyVisitor.new)) + else + iseq.putstring(node.accept(RubyVisitor.new)) + end + end + + def visit_unary(node) + method_id = + case node.operator + when "+", "-" + "#{node.operator}@" + else + node.operator + end + + visit_call( + CommandCall.new( + receiver: node.statement, + operator: nil, + message: Ident.new(value: method_id, location: Location.default), + arguments: nil, + block: nil, + location: Location.default + ) + ) + end + + def visit_undef(node) + node.symbols.each_with_index do |symbol, index| + iseq.pop if index != 0 + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putspecialobject(PutSpecialObject::OBJECT_CBASE) + visit(symbol) + iseq.send(YARV.calldata(:"core#undef_method", 2)) + end + end + + def visit_unless(node) + visit(node.predicate) + branchunless = iseq.branchunless(-1) + node.consequent ? visit(node.consequent) : iseq.putnil + + if last_statement? + iseq.leave + branchunless.patch!(iseq) + + visit(node.statements) + else + iseq.pop + + if node.consequent + jump = iseq.jump(-1) + branchunless.patch!(iseq) + visit(node.consequent) + jump.patch!(iseq.label) + else + branchunless.patch!(iseq) + end + end + end + + def visit_until(node) + jumps = [] + + jumps << iseq.jump(-1) + iseq.putnil + iseq.pop + jumps << iseq.jump(-1) + + label = iseq.label + visit(node.statements) + iseq.pop + jumps.each { |jump| jump.patch!(iseq) } + + visit(node.predicate) + iseq.branchunless(label) + iseq.putnil if last_statement? + end + + def visit_var_field(node) + case node.value + when CVar, IVar + name = node.value.value.to_sym + iseq.inline_storage_for(name) + when Ident + name = node.value.value.to_sym + + if (local_variable = iseq.local_variable(name)) + local_variable + else + iseq.local_table.plain(name) + iseq.local_variable(name) + end + end + end + + def visit_var_ref(node) + case node.value + when Const + iseq.opt_getconstant_path(constant_names(node)) + when CVar + name = node.value.value.to_sym + iseq.getclassvariable(name) + when GVar + iseq.getglobal(node.value.value.to_sym) + when Ident + lookup = iseq.local_variable(node.value.value.to_sym) + + case lookup.local + when LocalTable::BlockLocal + iseq.getblockparam(lookup.index, lookup.level) + when LocalTable::PlainLocal + iseq.getlocal(lookup.index, lookup.level) + end + when IVar + name = node.value.value.to_sym + iseq.getinstancevariable(name) + when Kw + case node.value.value + when "false" + iseq.putobject(false) + when "nil" + iseq.putnil + when "self" + iseq.putself + when "true" + iseq.putobject(true) + end + end + end + + def visit_vcall(node) + iseq.putself + iseq.send( + YARV.calldata( + node.value.value.to_sym, + 0, + CallData::CALL_FCALL | CallData::CALL_VCALL | + CallData::CALL_ARGS_SIMPLE + ) + ) + end + + def visit_when(node) + visit(node.statements) + end + + def visit_while(node) + jumps = [] + + jumps << iseq.jump(-1) + iseq.putnil + iseq.pop + jumps << iseq.jump(-1) + + label = iseq.label + visit(node.statements) + iseq.pop + jumps.each { |jump| jump.patch!(iseq) } + + visit(node.predicate) + iseq.branchif(label) + iseq.putnil if last_statement? + end + + def visit_word(node) + if node.parts.length == 1 && node.parts.first.is_a?(TStringContent) + visit(node.parts.first) + else + length = visit_string_parts(node) + iseq.concatstrings(length) + end + end + + def visit_words(node) + if frozen_string_literal && (compiled = RubyVisitor.compile(node)) + iseq.duparray(compiled) + else + visit_all(node.elements) + iseq.newarray(node.elements.length) + end + end + + def visit_xstring_literal(node) + iseq.putself + length = visit_string_parts(node) + iseq.concatstrings(node.parts.length) if length > 1 + iseq.send( + YARV.calldata( + :`, + 1, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE + ) + ) + end + + def visit_yield(node) + parts = argument_parts(node.arguments) + visit_all(parts) + iseq.invokeblock(YARV.calldata(nil, parts.length)) + end + + def visit_zsuper(_node) + iseq.putself + iseq.invokesuper( + YARV.calldata( + nil, + 0, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | + CallData::CALL_SUPER | CallData::CALL_ZSUPER + ), + nil + ) + end + + private + + # This is a helper that is used in places where arguments may be present + # or they may be wrapped in parentheses. It's meant to descend down the + # tree and return an array of argument nodes. + def argument_parts(node) + case node + when nil + [] + when Args + node.parts + when ArgParen + if node.arguments.is_a?(ArgsForward) + [node.arguments] + else + node.arguments.parts + end + when Paren + node.contents.parts + end + end + + # Constant names when they are being assigned or referenced come in as a + # tree, but it's more convenient to work with them as an array. This + # method converts them into that array. This is nice because it's the + # operand that goes to opt_getconstant_path in Ruby 3.2. + def constant_names(node) + current = node + names = [] + + while current.is_a?(ConstPathField) || current.is_a?(ConstPathRef) + names.unshift(current.constant.value.to_sym) + current = current.parent + end + + case current + when VarField, VarRef + names.unshift(current.value.value.to_sym) + when TopConstRef + names.unshift(current.constant.value.to_sym) + names.unshift(:"") + end + + names + end + + # For the most part when an OpAssign (operator assignment) node with a ||= + # operator is being compiled it's a matter of reading the target, checking + # if the value should be evaluated, evaluating it if so, and then writing + # the result back to the target. + # + # However, in certain kinds of assignments (X, ::X, X::Y, @@x, and $x) we + # first check if the value is defined using the defined instruction. I + # don't know why it is necessary, and suspect that it isn't. + def opassign_defined(node) + case node.target + when ConstPathField + visit(node.target.parent) + name = node.target.constant.value.to_sym + + iseq.dup + iseq.defined(Defined::TYPE_CONST_FROM, name, true) + when TopConstField + name = node.target.constant.value.to_sym + + iseq.putobject(Object) + iseq.dup + iseq.defined(Defined::TYPE_CONST_FROM, name, true) + when VarField + name = node.target.value.value.to_sym + iseq.putnil + + case node.target.value + when Const + iseq.defined(Defined::TYPE_CONST, name, true) + when CVar + iseq.defined(Defined::TYPE_CVAR, name, true) + when GVar + iseq.defined(Defined::TYPE_GVAR, name, true) + end + end + + branchunless = iseq.branchunless(-1) + + case node.target + when ConstPathField, TopConstField + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) + when VarField + case node.target.value + when Const + iseq.opt_getconstant_path(constant_names(node.target)) + when CVar + iseq.getclassvariable(name) + when GVar + iseq.getglobal(name) + end + end + + iseq.dup + branchif = iseq.branchif(-1) + iseq.pop + + branchunless.patch!(iseq) + visit(node.value) + + case node.target + when ConstPathField, TopConstField + iseq.dupn(2) + iseq.swap + iseq.setconstant(name) + when VarField + iseq.dup + + case node.target.value + when Const + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + iseq.setconstant(name) + when CVar + iseq.setclassvariable(name) + when GVar + iseq.setglobal(name) + end + end + + branchif.patch!(iseq) + end + + # Whenever a value is interpolated into a string-like structure, these + # three instructions are pushed. + def push_interpolate + iseq.dup + iseq.objtostring( + YARV.calldata( + :to_s, + 0, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE + ) + ) + iseq.anytostring + end + + # There are a lot of nodes in the AST that act as contains of parts of + # strings. This includes things like string literals, regular expressions, + # heredocs, etc. This method will visit all the parts of a string within + # those containers. + def visit_string_parts(node) + length = 0 + + unless node.parts.first.is_a?(TStringContent) + iseq.putobject("") + length += 1 + end + + node.parts.each do |part| + case part + when StringDVar + visit(part.variable) + push_interpolate + when StringEmbExpr + visit(part) + push_interpolate + when TStringContent + iseq.putobject(part.accept(RubyVisitor.new)) + end + + length += 1 + end + + length + end + + # The current instruction sequence that we're compiling is always stored + # on the compiler. When we descend into a node that has its own + # instruction sequence, this method can be called to temporarily set the + # new value of the instruction sequence, yield, and then set it back. + def with_child_iseq(child_iseq) + parent_iseq = iseq + + begin + @iseq = child_iseq + yield + child_iseq + ensure + @iseq = parent_iseq + end + end + + # When we're compiling the last statement of a set of statements within a + # scope, the instructions sometimes change from pops to leaves. These + # kinds of peephole optimizations can reduce the overall number of + # instructions. Therefore, we keep track of whether we're compiling the + # last statement of a scope and allow visit methods to query that + # information. + def with_last_statement + previous = @last_statement + @last_statement = true + + begin + yield + ensure + @last_statement = previous + end + end + + def last_statement? + @last_statement + end + + # OpAssign nodes can have a number of different kinds of nodes as their + # "target" (i.e., the left-hand side of the assignment). When compiling + # these nodes we typically need to first fetch the current value of the + # variable, then perform some kind of action, then store the result back + # into the variable. This method handles that by first fetching the value, + # then yielding to the block, then storing the result. + def with_opassign(node) + case node.target + when ARefField + iseq.putnil + visit(node.target.collection) + visit(node.target.index) + + iseq.dupn(2) + iseq.send(YARV.calldata(:[], 1)) + + yield + + iseq.setn(3) + iseq.send(YARV.calldata(:[]=, 2)) + iseq.pop + when ConstPathField + name = node.target.constant.value.to_sym + + visit(node.target.parent) + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) + + yield + + if node.operator.value == "&&=" + iseq.dupn(2) + else + iseq.swap + iseq.topn(1) + end + + iseq.swap + iseq.setconstant(name) + when TopConstField + name = node.target.constant.value.to_sym + + iseq.putobject(Object) + iseq.dup + iseq.putobject(true) + iseq.getconstant(name) + + yield + + if node.operator.value == "&&=" + iseq.dupn(2) + else + iseq.swap + iseq.topn(1) + end + + iseq.swap + iseq.setconstant(name) + when VarField + case node.target.value + when Const + names = constant_names(node.target) + iseq.opt_getconstant_path(names) + + yield + + iseq.dup + iseq.putspecialobject(PutSpecialObject::OBJECT_CONST_BASE) + iseq.setconstant(names.last) + when CVar + name = node.target.value.value.to_sym + iseq.getclassvariable(name) + + yield + + iseq.dup + iseq.setclassvariable(name) + when GVar + name = node.target.value.value.to_sym + iseq.getglobal(name) + + yield + + iseq.dup + iseq.setglobal(name) + when Ident + local_variable = visit(node.target) + iseq.getlocal(local_variable.index, local_variable.level) + + yield + + iseq.dup + iseq.setlocal(local_variable.index, local_variable.level) + when IVar + name = node.target.value.value.to_sym + iseq.getinstancevariable(name) + + yield + + iseq.dup + iseq.setinstancevariable(name) + end + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 7a6e8893..d606e3cc 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -87,166 +87,113 @@ def disassemble(iseq) when GetLocalWC0 local = iseq.local_table.locals[insn.index] clause << VarRef(Ident(local.name.to_s)) - when Array - case insn[0] - when :jump - clause << Assign(disasm_label.field, node_for(insn[1])) - clause << Next(Args([])) - when :leave - value = Args([clause.pop]) - clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) - when :opt_and - left, right = clause.pop(2) - clause << Binary(left, :&, right) - when :opt_aref - collection, arg = clause.pop(2) - clause << ARef(collection, Args([arg])) - when :opt_aset - collection, arg, value = clause.pop(3) + when Jump + clause << Assign(disasm_label.field, node_for(insn.label)) + clause << Next(Args([])) + when Leave + value = Args([clause.pop]) + clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) + when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT, + OptMinus, OptMod, OptMult, OptOr, OptPlus + left, right = clause.pop(2) + clause << Binary(left, insn.calldata.method, right) + when OptAref + collection, arg = clause.pop(2) + clause << ARef(collection, Args([arg])) + when OptAset + collection, arg, value = clause.pop(3) - clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && - collection === value.left.collection && - arg === value.left.index.parts[0] - OpAssign( - ARefField(collection, Args([arg])), - Op("#{value.operator}="), - value.right - ) - else - Assign(ARefField(collection, Args([arg])), value) - end - when :opt_div - left, right = clause.pop(2) - clause << Binary(left, :/, right) - when :opt_eq - left, right = clause.pop(2) - clause << Binary(left, :==, right) - when :opt_ge - left, right = clause.pop(2) - clause << Binary(left, :>=, right) - when :opt_gt - left, right = clause.pop(2) - clause << Binary(left, :>, right) - when :opt_le - left, right = clause.pop(2) - clause << Binary(left, :<=, right) - when :opt_lt - left, right = clause.pop(2) - clause << Binary(left, :<, right) - when :opt_ltlt - left, right = clause.pop(2) - clause << Binary(left, :<<, right) - when :opt_minus - left, right = clause.pop(2) - clause << Binary(left, :-, right) - when :opt_mod - left, right = clause.pop(2) - clause << Binary(left, :%, right) - when :opt_mult - left, right = clause.pop(2) - clause << Binary(left, :*, right) - when :opt_neq - left, right = clause.pop(2) - clause << Binary(left, :"!=", right) - when :opt_or - left, right = clause.pop(2) - clause << Binary(left, :|, right) - when :opt_plus - left, right = clause.pop(2) - clause << Binary(left, :+, right) - when :opt_send_without_block - if insn[1][:flag] & VM_CALL_FCALL > 0 - if insn[1][:orig_argc] == 0 - clause.pop - clause << CallNode(nil, nil, Ident(insn[1][:mid]), Args([])) - elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") - _receiver, argument = clause.pop(2) - clause << Assign( - CallNode(nil, nil, Ident(insn[1][:mid][0..-2]), nil), - argument - ) - else - _receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) - clause << CallNode( - nil, - nil, - Ident(insn[1][:mid]), - ArgParen(Args(arguments)) - ) - end - else - if insn[1][:orig_argc] == 0 - clause << CallNode( - clause.pop, - Period("."), - Ident(insn[1][:mid]), - nil - ) - elsif insn[1][:orig_argc] == 1 && insn[1][:mid].end_with?("=") - receiver, argument = clause.pop(2) - clause << Assign( - CallNode( - receiver, - Period("."), - Ident(insn[1][:mid][0..-2]), - nil - ), - argument - ) - else - receiver, *arguments = clause.pop(insn[1][:orig_argc] + 1) - clause << CallNode( - receiver, - Period("."), - Ident(insn[1][:mid]), - ArgParen(Args(arguments)) - ) - end - end - when :putobject - case insn[1] - when Float - clause << FloatLiteral(insn[1].inspect) - when Integer - clause << Int(insn[1].inspect) - else - raise "Unknown object type: #{insn[1].class.name}" - end - when :putobject_INT2FIX_0_ - clause << Int("0") - when :putobject_INT2FIX_1_ - clause << Int("1") - when :putself - clause << VarRef(Kw("self")) - when :setglobal - target = GVar(insn[1].to_s) - value = clause.pop + clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && + collection === value.left.collection && + arg === value.left.index.parts[0] + OpAssign( + ARefField(collection, Args([arg])), + Op("#{value.operator}="), + value.right + ) + else + Assign(ARefField(collection, Args([arg])), value) + end + when OptNEq + left, right = clause.pop(2) + clause << Binary(left, :"!=", right) + when OptSendWithoutBlock + method = insn.calldata.method.to_s + argc = insn.calldata.argc - clause << if value.is_a?(Binary) && VarRef(target) === value.left - OpAssign( - VarField(target), - Op("#{value.operator}="), - value.right + if insn.calldata.flag?(CallData::CALL_FCALL) + if argc == 0 + clause.pop + clause << CallNode(nil, nil, Ident(method), Args([])) + elsif argc == 1 && method.end_with?("=") + _receiver, argument = clause.pop(2) + clause << Assign( + CallNode(nil, nil, Ident(method[0..-2]), nil), + argument ) else - Assign(VarField(target), value) + _receiver, *arguments = clause.pop(argc + 1) + clause << CallNode( + nil, + nil, + Ident(method), + ArgParen(Args(arguments)) + ) end - when :setlocal_WC_0 - target = Ident(local_name(insn[1], 0)) - value = clause.pop - - clause << if value.is_a?(Binary) && VarRef(target) === value.left - OpAssign( - VarField(target), - Op("#{value.operator}="), - value.right + else + if argc == 0 + clause << CallNode(clause.pop, Period("."), Ident(method), nil) + elsif argc == 1 && method.end_with?("=") + receiver, argument = clause.pop(2) + clause << Assign( + CallNode(receiver, Period("."), Ident(method[0..-2]), nil), + argument ) else - Assign(VarField(target), value) + receiver, *arguments = clause.pop(argc + 1) + clause << CallNode( + receiver, + Period("."), + Ident(method), + ArgParen(Args(arguments)) + ) end + end + when PutObject + case insn.object + when Float + clause << FloatLiteral(insn.object.inspect) + when Integer + clause << Int(insn.object.inspect) + else + raise "Unknown object type: #{insn.object.class.name}" + end + when PutObjectInt2Fix0 + clause << Int("0") + when PutObjectInt2Fix1 + clause << Int("1") + when PutSelf + clause << VarRef(Kw("self")) + when SetGlobal + target = GVar(insn.name.to_s) + value = clause.pop + + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign(VarField(target), Op("#{value.operator}="), value.right) + else + Assign(VarField(target), value) + end + when SetLocalWC0 + target = Ident(local_name(insn.index, 0)) + value = clause.pop + + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign(VarField(target), Op("#{value.operator}="), value.right) else - raise "Unknown instruction #{insn[0]}" + Assign(VarField(target), value) end + else + raise "Unknown instruction #{insn[0]}" end end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb new file mode 100644 index 00000000..c59d02c7 --- /dev/null +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -0,0 +1,671 @@ +# frozen_string_literal: true + +module SyntaxTree + # This module provides an object representation of the YARV bytecode. + module YARV + # This class is meant to mirror RubyVM::InstructionSequence. It contains a + # list of instructions along with the metadata pertaining to them. It also + # functions as a builder for the instruction sequence. + class InstructionSequence + MAGIC = "YARVInstructionSequence/SimpleDataFormat" + + # This provides a handle to the rb_iseq_load function, which allows you to + # pass a serialized iseq to Ruby and have it return a + # RubyVM::InstructionSequence object. + ISEQ_LOAD = + Fiddle::Function.new( + Fiddle::Handle::DEFAULT["rb_iseq_load"], + [Fiddle::TYPE_VOIDP] * 3, + Fiddle::TYPE_VOIDP + ) + + # This object is used to track the size of the stack at any given time. It + # is effectively a mini symbolic interpreter. It's necessary because when + # instruction sequences get serialized they include a :stack_max field on + # them. This field is used to determine how much stack space to allocate + # for the instruction sequence. + class Stack + attr_reader :current_size, :maximum_size + + def initialize + @current_size = 0 + @maximum_size = 0 + end + + def change_by(value) + @current_size += value + @maximum_size = @current_size if @current_size > @maximum_size + end + end + + # The type of the instruction sequence. + attr_reader :type + + # The name of the instruction sequence. + attr_reader :name + + # The parent instruction sequence, if there is one. + attr_reader :parent_iseq + + # The location of the root node of this instruction sequence. + attr_reader :location + + # This is the list of information about the arguments to this + # instruction sequence. + attr_accessor :argument_size + attr_reader :argument_options + + # The list of instructions for this instruction sequence. + attr_reader :insns + + # The table of local variables. + attr_reader :local_table + + # The hash of names of instance and class variables pointing to the + # index of their associated inline storage. + attr_reader :inline_storages + + # The index of the next inline storage that will be created. + attr_reader :storage_index + + # An object that will track the current size of the stack and the + # maximum size of the stack for this instruction sequence. + attr_reader :stack + + # These are various compilation options provided. + attr_reader :frozen_string_literal, + :operands_unification, + :specialized_instruction + + def initialize( + type, + name, + parent_iseq, + location, + frozen_string_literal: false, + operands_unification: true, + specialized_instruction: true + ) + @type = type + @name = name + @parent_iseq = parent_iseq + @location = location + + @argument_size = 0 + @argument_options = {} + + @local_table = LocalTable.new + @inline_storages = {} + @insns = [] + @storage_index = 0 + @stack = Stack.new + + @frozen_string_literal = frozen_string_literal + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction + end + + ########################################################################## + # Query methods + ########################################################################## + + def local_variable(name, level = 0) + if (lookup = local_table.find(name, level)) + lookup + elsif parent_iseq + parent_iseq.local_variable(name, level + 1) + end + end + + def inline_storage + storage = storage_index + @storage_index += 1 + storage + end + + def inline_storage_for(name) + inline_storages[name] = inline_storage unless inline_storages.key?(name) + + inline_storages[name] + end + + def length + insns.inject(0) do |sum, insn| + case insn + when Integer, Symbol + sum + else + sum + insn.length + end + end + end + + def eval + compiled = to_a + + # Temporary hack until we get these working. + compiled[4][:node_id] = 11 + compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] + + Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval + end + + def to_a + versions = RUBY_VERSION.split(".").map(&:to_i) + + [ + MAGIC, + versions[0], + versions[1], + 1, + { + arg_size: argument_size, + local_size: local_table.size, + stack_max: stack.maximum_size + }, + name, + "", + "", + location.start_line, + type, + local_table.names, + argument_options, + [], + insns.map do |insn| + insn.is_a?(Integer) || insn.is_a?(Symbol) ? insn : insn.to_a(self) + end + ] + end + + ########################################################################## + # Child instruction sequence methods + ########################################################################## + + def child_iseq(type, name, location) + InstructionSequence.new( + type, + name, + self, + location, + frozen_string_literal: frozen_string_literal, + operands_unification: operands_unification, + specialized_instruction: specialized_instruction + ) + end + + def block_child_iseq(location) + current = self + current = current.parent_iseq while current.type == :block + child_iseq(:block, "block in #{current.name}", location) + end + + def class_child_iseq(name, location) + child_iseq(:class, "", location) + end + + def method_child_iseq(name, location) + child_iseq(:method, name, location) + end + + def module_child_iseq(name, location) + child_iseq(:class, "", location) + end + + def singleton_class_child_iseq(location) + child_iseq(:class, "singleton class", location) + end + + ########################################################################## + # Instruction push methods + ########################################################################## + + def push(insn) + insns << insn + + case insn + when Integer, Symbol, Array + insn + else + stack.change_by(-insn.pops + insn.pushes) + insn + end + end + + # This creates a new label at the current length of the instruction + # sequence. It is used as the operand for jump instructions. + def label + name = :"label_#{length}" + insns.last == name ? name : event(name) + end + + def event(name) + push(name) + end + + def adjuststack(number) + push(AdjustStack.new(number)) + end + + def anytostring + push(AnyToString.new) + end + + def branchif(label) + push(BranchIf.new(label)) + end + + def branchnil(label) + push(BranchNil.new(label)) + end + + def branchunless(label) + push(BranchUnless.new(label)) + end + + def checkkeyword(keyword_bits_index, keyword_index) + push(CheckKeyword.new(keyword_bits_index, keyword_index)) + end + + def checkmatch(type) + push(CheckMatch.new(type)) + end + + def checktype(type) + push(CheckType.new(type)) + end + + def concatarray + push(ConcatArray.new) + end + + def concatstrings(number) + push(ConcatStrings.new(number)) + end + + def defined(type, name, message) + push(Defined.new(type, name, message)) + end + + def defineclass(name, class_iseq, flags) + push(DefineClass.new(name, class_iseq, flags)) + end + + def definemethod(name, method_iseq) + push(DefineMethod.new(name, method_iseq)) + end + + def definesmethod(name, method_iseq) + push(DefineSMethod.new(name, method_iseq)) + end + + def dup + push(Dup.new) + end + + def duparray(object) + push(DupArray.new(object)) + end + + def duphash(object) + push(DupHash.new(object)) + end + + def dupn(number) + push(DupN.new(number)) + end + + def expandarray(length, flags) + push(ExpandArray.new(length, flags)) + end + + def getblockparam(index, level) + push(GetBlockParam.new(index, level)) + end + + def getblockparamproxy(index, level) + push(GetBlockParamProxy.new(index, level)) + end + + def getclassvariable(name) + if RUBY_VERSION < "3.0" + push(Legacy::GetClassVariable.new(name)) + else + push(GetClassVariable.new(name, inline_storage_for(name))) + end + end + + def getconstant(name) + push(GetConstant.new(name)) + end + + def getglobal(name) + push(GetGlobal.new(name)) + end + + def getinstancevariable(name) + if RUBY_VERSION < "3.2" + push(GetInstanceVariable.new(name, inline_storage_for(name))) + else + push(GetInstanceVariable.new(name, inline_storage)) + end + end + + def getlocal(index, level) + if operands_unification + # Specialize the getlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will look at the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + push(GetLocalWC0.new(index)) + when 1 + push(GetLocalWC1.new(index)) + else + push(GetLocal.new(index, level)) + end + else + push(GetLocal.new(index, level)) + end + end + + def getspecial(key, type) + push(GetSpecial.new(key, type)) + end + + def intern + push(Intern.new) + end + + def invokeblock(calldata) + push(InvokeBlock.new(calldata)) + end + + def invokesuper(calldata, block_iseq) + push(InvokeSuper.new(calldata, block_iseq)) + end + + def jump(label) + push(Jump.new(label)) + end + + def leave + push(Leave.new) + end + + def newarray(number) + push(NewArray.new(number)) + end + + def newarraykwsplat(number) + push(NewArrayKwSplat.new(number)) + end + + def newhash(number) + push(NewHash.new(number)) + end + + def newrange(exclude_end) + push(NewRange.new(exclude_end)) + end + + def nop + push(Nop.new) + end + + def objtostring(calldata) + push(ObjToString.new(calldata)) + end + + def once(iseq, cache) + push(Once.new(iseq, cache)) + end + + def opt_aref_with(object, calldata) + push(OptArefWith.new(object, calldata)) + end + + def opt_aset_with(object, calldata) + push(OptAsetWith.new(object, calldata)) + end + + def opt_getconstant_path(names) + if RUBY_VERSION < "3.2" + cache = inline_storage + getinlinecache = opt_getinlinecache(-1, cache) + + if names[0] == :"" + names.shift + pop + putobject(Object) + end + + names.each_with_index do |name, index| + putobject(index == 0) + getconstant(name) + end + + opt_setinlinecache(cache) + getinlinecache.patch!(self) + else + push(OptGetConstantPath.new(names)) + end + end + + def opt_getinlinecache(label, cache) + push(Legacy::OptGetInlineCache.new(label, cache)) + end + + def opt_newarray_max(length) + if specialized_instruction + push(OptNewArrayMax.new(length)) + else + newarray(length) + send(YARV.calldata(:max)) + end + end + + def opt_newarray_min(length) + if specialized_instruction + push(OptNewArrayMin.new(length)) + else + newarray(length) + send(YARV.calldata(:min)) + end + end + + def opt_setinlinecache(cache) + push(Legacy::OptSetInlineCache.new(cache)) + end + + def opt_str_freeze(object) + if specialized_instruction + push(OptStrFreeze.new(object, YARV.calldata(:freeze))) + else + putstring(object) + send(YARV.calldata(:freeze)) + end + end + + def opt_str_uminus(object) + if specialized_instruction + push(OptStrUMinus.new(object, YARV.calldata(:-@))) + else + putstring(object) + send(YARV.calldata(:-@)) + end + end + + def pop + push(Pop.new) + end + + def putnil + push(PutNil.new) + end + + def putobject(object) + if operands_unification + # Specialize the putobject instruction based on the value of the + # object. If it's 0 or 1, then there's a specialized instruction + # that will push the object onto the stack and requires fewer + # operands. + if object.eql?(0) + push(PutObjectInt2Fix0.new) + elsif object.eql?(1) + push(PutObjectInt2Fix1.new) + else + push(PutObject.new(object)) + end + else + push(PutObject.new(object)) + end + end + + def putself + push(PutSelf.new) + end + + def putspecialobject(object) + push(PutSpecialObject.new(object)) + end + + def putstring(object) + push(PutString.new(object)) + end + + def send(calldata, block_iseq = nil) + if specialized_instruction && !block_iseq && + !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + # Specialize the send instruction. If it doesn't have a block + # attached, then we will replace it with an opt_send_without_block + # and do further specializations based on the called method and the + # number of arguments. + case [calldata.method, calldata.argc] + when [:length, 0] + push(OptLength.new(calldata)) + when [:size, 0] + push(OptSize.new(calldata)) + when [:empty?, 0] + push(OptEmptyP.new(calldata)) + when [:nil?, 0] + push(OptNilP.new(calldata)) + when [:succ, 0] + push(OptSucc.new(calldata)) + when [:!, 0] + push(OptNot.new(calldata)) + when [:+, 1] + push(OptPlus.new(calldata)) + when [:-, 1] + push(OptMinus.new(calldata)) + when [:*, 1] + push(OptMult.new(calldata)) + when [:/, 1] + push(OptDiv.new(calldata)) + when [:%, 1] + push(OptMod.new(calldata)) + when [:==, 1] + push(OptEq.new(calldata)) + when [:!=, 1] + push(OptNEq.new(YARV.calldata(:==, 1), calldata)) + when [:=~, 1] + push(OptRegExpMatch2.new(calldata)) + when [:<, 1] + push(OptLT.new(calldata)) + when [:<=, 1] + push(OptLE.new(calldata)) + when [:>, 1] + push(OptGT.new(calldata)) + when [:>=, 1] + push(OptGE.new(calldata)) + when [:<<, 1] + push(OptLTLT.new(calldata)) + when [:[], 1] + push(OptAref.new(calldata)) + when [:&, 1] + push(OptAnd.new(calldata)) + when [:|, 1] + push(OptOr.new(calldata)) + when [:[]=, 2] + push(OptAset.new(calldata)) + else + push(OptSendWithoutBlock.new(calldata)) + end + else + push(Send.new(calldata, block_iseq)) + end + end + + def setblockparam(index, level) + push(SetBlockParam.new(index, level)) + end + + def setclassvariable(name) + if RUBY_VERSION < "3.0" + push(Legacy::SetClassVariable.new(name)) + else + push(SetClassVariable.new(name, inline_storage_for(name))) + end + end + + def setconstant(name) + push(SetConstant.new(name)) + end + + def setglobal(name) + push(SetGlobal.new(name)) + end + + def setinstancevariable(name) + if RUBY_VERSION < "3.2" + push(SetInstanceVariable.new(name, inline_storage_for(name))) + else + push(SetInstanceVariable.new(name, inline_storage)) + end + end + + def setlocal(index, level) + if operands_unification + # Specialize the setlocal instruction based on the level of the + # local variable. If it's 0 or 1, then there's a specialized + # instruction that will write to the current scope or the parent + # scope, respectively, and requires fewer operands. + case level + when 0 + push(SetLocalWC0.new(index)) + when 1 + push(SetLocalWC1.new(index)) + else + push(SetLocal.new(index, level)) + end + else + push(SetLocal.new(index, level)) + end + end + + def setn(number) + push(SetN.new(number)) + end + + def setspecial(key) + push(SetSpecial.new(key)) + end + + def splatarray(flag) + push(SplatArray.new(flag)) + end + + def swap + push(Swap.new) + end + + def topn(number) + push(TopN.new(number)) + end + + def toregexp(options, length) + push(ToRegExp.new(options, length)) + end + end + end +end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index e6853a87..5a23bbf0 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -2,6 +2,58 @@ module SyntaxTree module YARV + # This is an operand to various YARV instructions that represents the + # information about a specific call site. + class CallData + CALL_ARGS_SPLAT = 1 << 0 + CALL_ARGS_BLOCKARG = 1 << 1 + CALL_FCALL = 1 << 2 + CALL_VCALL = 1 << 3 + CALL_ARGS_SIMPLE = 1 << 4 + CALL_BLOCKISEQ = 1 << 5 + CALL_KWARG = 1 << 6 + CALL_KW_SPLAT = 1 << 7 + CALL_TAILCALL = 1 << 8 + CALL_SUPER = 1 << 9 + CALL_ZSUPER = 1 << 10 + CALL_OPT_SEND = 1 << 11 + CALL_KW_SPLAT_MUT = 1 << 12 + + attr_reader :method, :argc, :flags, :kw_arg + + def initialize( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + @method = method + @argc = argc + @flags = flags + @kw_arg = kw_arg + end + + def flag?(mask) + (flags & mask) > 0 + end + + def to_h + result = { mid: method, flag: flags, orig_argc: argc } + result[:kw_arg] = kw_arg if kw_arg + result + end + end + + # A convenience method for creating a CallData object. + def self.calldata( + method, + argc = 0, + flags = CallData::CALL_ARGS_SIMPLE, + kw_arg = nil + ) + CallData.new(method, argc, flags, kw_arg) + end + # ### Summary # # `adjuststack` accepts a single integer argument and removes that many @@ -260,6 +312,109 @@ def pushes end end + # ### Summary + # + # `checkmatch` checks if the current pattern matches the current value. It + # pops the target and the pattern off the stack and pushes a boolean onto + # the stack if it matches or not. + # + # ### Usage + # + # ~~~ruby + # foo in Foo + # ~~~ + # + class CheckMatch + TYPE_WHEN = 1 + TYPE_CASE = 2 + TYPE_RESCUE = 3 + + attr_reader :type + + def initialize(type) + @type = type + end + + def to_a(_iseq) + [:checkmatch, type] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `checktype` checks if the value on top of the stack is of a certain type. + # The type is the only argument. It pops the value off the stack and pushes + # a boolean onto the stack indicating whether or not the value is of the + # given type. + # + # ### Usage + # + # ~~~ruby + # foo in [bar] + # ~~~ + # + class CheckType + TYPE_OBJECT = 0x01 + TYPE_CLASS = 0x02 + TYPE_MODULE = 0x03 + TYPE_FLOAT = 0x04 + TYPE_STRING = 0x05 + TYPE_REGEXP = 0x06 + TYPE_ARRAY = 0x07 + TYPE_HASH = 0x08 + TYPE_STRUCT = 0x09 + TYPE_BIGNUM = 0x0a + TYPE_FILE = 0x0b + TYPE_DATA = 0x0c + TYPE_MATCH = 0x0d + TYPE_COMPLEX = 0x0e + TYPE_RATIONAL = 0x0f + TYPE_NIL = 0x11 + TYPE_TRUE = 0x12 + TYPE_FALSE = 0x13 + TYPE_SYMBOL = 0x14 + TYPE_FIXNUM = 0x15 + TYPE_UNDEF = 0x16 + + attr_reader :type + + def initialize(type) + @type = type + end + + def to_a(_iseq) + [:checktype, type] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + # TODO: This is incorrect. The instruction only pushes a single value + # onto the stack. However, if this is set to 1, we no longer match the + # output of RubyVM::InstructionSequence. So leaving this here until we + # can investigate further. + 2 + end + end + # ### Summary # # `concatarray` concatenates the two Arrays on top of the stack. @@ -800,6 +955,42 @@ def pushes end end + # ### Summary + # + # `getconstant` performs a constant lookup and pushes the value of the + # constant onto the stack. It pops both the class it should look in and + # whether or not it should look globally as well. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class GetConstant + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getconstant, name] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + # ### Summary # # `getglobal` pushes the value of a global variables onto the stack. @@ -991,84 +1182,2425 @@ def pushes end end - # This module contains the instructions that used to be a part of YARV but - # have been replaced or removed in more recent versions. - module Legacy - # ### Summary - # - # `getclassvariable` looks for a class variable in the current class and - # pushes its value onto the stack. - # - # This version of the `getclassvariable` instruction is no longer used - # since in Ruby 3.0 it gained an inline cache.` - # - # ### Usage - # - # ~~~ruby - # @@class_variable - # ~~~ - # - class GetClassVariable - attr_reader :name - - def initialize(name) - @name = name - end - - def to_a(_iseq) - [:getclassvariable, name] - end - - def length - 2 - end - - def pops - 0 - end - - def pushes - 1 - end - end - - # ### Summary - # - # `getconstant` performs a constant lookup and pushes the value of the - # constant onto the stack. It pops both the class it should look in and - # whether or not it should look globally as well. - # - # This instruction is no longer used since in Ruby 3.2 it was replaced by - # the consolidated `opt_getconstant_path` instruction. - # - # ### Usage - # - # ~~~ruby - # Constant - # ~~~ - # - class GetConstant - attr_reader :name - - def initialize(name) - @name = name - end - - def to_a(_iseq) - [:getconstant, name] - end - - def length - 2 - end - - def pops - 2 - end - - def pushes - 1 - end + # ### Summary + # + # `getspecial` pushes the value of a special local variable onto the stack. + # + # ### Usage + # + # ~~~ruby + # [true] + # ~~~ + # + class GetSpecial + SVAR_LASTLINE = 0 # $_ + SVAR_BACKREF = 1 # $~ + SVAR_FLIPFLOP_START = 2 # flipflop + + attr_reader :key, :type + + def initialize(key, type) + @key = key + @type = type + end + + def to_a(_iseq) + [:getspecial, key, type] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `intern` converts the top element of the stack to a symbol and pushes the + # symbol onto the stack. + # + # ### Usage + # + # ~~~ruby + # :"#{"foo"}" + # ~~~ + # + class Intern + def to_a(_iseq) + [:intern] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `invokeblock` invokes the block given to the current method. It pops the + # arguments for the block off the stack and pushes the result of running the + # block onto the stack. + # + # ### Usage + # + # ~~~ruby + # def foo + # yield + # end + # ~~~ + # + class InvokeBlock + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:invokeblock, calldata.to_h] + end + + def length + 2 + end + + def pops + calldata.argc + end + + def pushes + 1 + end + end + + # ### Summary + # + # `invokesuper` is similar to the `send` instruction, except that it calls + # the super method. It pops the receiver and arguments off the stack and + # pushes the return value onto the stack. + # + # ### Usage + # + # ~~~ruby + # def foo + # super + # end + # ~~~ + # + class InvokeSuper + attr_reader :calldata, :block_iseq + + def initialize(calldata, block_iseq) + @calldata = calldata + @block_iseq = block_iseq + end + + def to_a(_iseq) + [:invokesuper, calldata.to_h, block_iseq&.to_a] + end + + def length + 1 + end + + def pops + argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) + argb + calldata.argc + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `jump` unconditionally jumps to the label given as its only argument. + # + # ### Usage + # + # ~~~ruby + # x = 0 + # if x == 0 + # puts "0" + # else + # puts "2" + # end + # ~~~ + # + class Jump + attr_reader :label + + def initialize(label) + @label = label + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:jump, label] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `leave` exits the current frame. + # + # ### Usage + # + # ~~~ruby + # ;; + # ~~~ + # + class Leave + def to_a(_iseq) + [:leave] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + # TODO: This is wrong. It should be 1. But it's 0 for now because + # otherwise the stack size is incorrectly calculated. + 0 + end + end + + # ### Summary + # + # `newarray` puts a new array initialized with `number` values from the + # stack. It pops `number` values off the stack and pushes the array onto the + # stack. + # + # ### Usage + # + # ~~~ruby + # ["string"] + # ~~~ + # + class NewArray + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:newarray, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `newarraykwsplat` is a specialized version of `newarray` that takes a ** + # splat argument. It pops `number` values off the stack and pushes the array + # onto the stack. + # + # ### Usage + # + # ~~~ruby + # ["string", **{ foo: "bar" }] + # ~~~ + # + class NewArrayKwSplat + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:newarraykwsplat, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `newhash` puts a new hash onto the stack, using `number` elements from the + # stack. `number` needs to be even. It pops `number` elements off the stack + # and pushes a hash onto the stack. + # + # ### Usage + # + # ~~~ruby + # def foo(key, value) + # { key => value } + # end + # ~~~ + # + class NewHash + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:newhash, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `newrange` creates a new range object from the top two values on the + # stack. It pops both of them off, and then pushes on the new range. It + # takes one argument which is 0 if the end is included or 1 if the end value + # is excluded. + # + # ### Usage + # + # ~~~ruby + # x = 0 + # y = 1 + # p (x..y), (x...y) + # ~~~ + # + class NewRange + attr_reader :exclude_end + + def initialize(exclude_end) + @exclude_end = exclude_end + end + + def to_a(_iseq) + [:newrange, exclude_end] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `nop` is a no-operation instruction. It is used to pad the instruction + # sequence so there is a place for other instructions to jump to. + # + # ### Usage + # + # ~~~ruby + # raise rescue true + # ~~~ + # + class Nop + def to_a(_iseq) + [:nop] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `objtostring` pops a value from the stack, calls `to_s` on that value and + # then pushes the result back to the stack. + # + # It has various fast paths for classes like String, Symbol, Module, Class, + # etc. For everything else it calls `to_s`. + # + # ### Usage + # + # ~~~ruby + # "#{5}" + # ~~~ + # + class ObjToString + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:objtostring, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `once` is an instruction that wraps an instruction sequence and ensures + # that is it only ever executed once for the lifetime of the program. It + # uses a cache to ensure that it is only executed once. It pushes the result + # of running the instruction sequence onto the stack. + # + # ### Usage + # + # ~~~ruby + # END { puts "END" } + # ~~~ + # + class Once + attr_reader :iseq, :cache + + def initialize(iseq, cache) + @iseq = iseq + @cache = cache + end + + def to_a(_iseq) + [:once, iseq.to_a, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_and` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `&` operator is used. There is a fast path for if + # both operands are integers. It pops both the receiver and the argument off + # the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 & 3 + # ~~~ + # + class OptAnd + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_and, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_aref` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `[]` operator is used. There are fast paths if the + # receiver is an integer, array, or hash. + # + # ### Usage + # + # ~~~ruby + # 7[2] + # ~~~ + # + class OptAref + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_aref, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_aref_with` is a specialization of the `opt_aref` instruction that + # occurs when the `[]` operator is used with a string argument known at + # compile time. There are fast paths if the receiver is a hash. It pops the + # receiver off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # { 'test' => true }['test'] + # ~~~ + # + class OptArefWith + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def to_a(_iseq) + [:opt_aref_with, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_aset` is an instruction for setting the hash value by the key in + # the `recv[obj] = set` format. It is a specialization of the + # `opt_send_without_block` instruction. It pops the receiver, the key, and + # the value off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # {}[:key] = value + # ~~~ + # + class OptAset + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_aset, calldata.to_h] + end + + def length + 2 + end + + def pops + 3 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_aset_with` is an instruction for setting the hash value by the known + # string key in the `recv[obj] = set` format. It pops the receiver and the + # value off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # {}["key"] = value + # ~~~ + # + class OptAsetWith + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def to_a(_iseq) + [:opt_aset_with, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_div` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `/` operator is used. There are fast paths for if + # both operands are integers, or if both operands are floats. It pops both + # the receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 / 3 + # ~~~ + # + class OptDiv + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_div, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_empty_p` is an optimization applied when the method `empty?` is + # called. It pops the receiver off the stack and pushes on the result of the + # method call. + # + # ### Usage + # + # ~~~ruby + # "".empty? + # ~~~ + # + class OptEmptyP + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_empty_p, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_eq` is a specialization of the `opt_send_without_block` instruction + # that occurs when the == operator is used. Fast paths exist when both + # operands are integers, floats, symbols or strings. It pops both the + # receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 == 2 + # ~~~ + # + class OptEq + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_eq, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_ge` is a specialization of the `opt_send_without_block` instruction + # that occurs when the >= operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 4 >= 3 + # ~~~ + # + class OptGE + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_ge, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_getconstant_path` performs a constant lookup on a chain of constant + # names. It accepts as its argument an array of constant names, and pushes + # the value of the constant onto the stack. + # + # ### Usage + # + # ~~~ruby + # ::Object + # ~~~ + # + class OptGetConstantPath + attr_reader :names + + def initialize(names) + @names = names + end + + def to_a(_iseq) + [:opt_getconstant_path, names] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_gt` is a specialization of the `opt_send_without_block` instruction + # that occurs when the > operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 4 > 3 + # ~~~ + # + class OptGT + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_gt, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_le` is a specialization of the `opt_send_without_block` instruction + # that occurs when the <= operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 3 <= 4 + # ~~~ + # + class OptLE + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_le, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_length` is a specialization of `opt_send_without_block`, when the + # `length` method is called. There are fast paths when the receiver is + # either a string, hash, or array. It pops the receiver off the stack and + # pushes on the result of the method call. + # + # ### Usage + # + # ~~~ruby + # "".length + # ~~~ + # + class OptLength + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_length, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_lt` is a specialization of the `opt_send_without_block` instruction + # that occurs when the < operator is used. Fast paths exist when both + # operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 3 < 4 + # ~~~ + # + class OptLT + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_lt, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_ltlt` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `<<` operator is used. Fast paths exists when the + # receiver is either a String or an Array. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # "" << 2 + # ~~~ + # + class OptLTLT + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_ltlt, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_minus` is a specialization of the `opt_send_without_block` + # instruction that occurs when the `-` operator is used. There are fast + # paths for if both operands are integers or if both operands are floats. It + # pops both the receiver and the argument off the stack and pushes on the + # result. + # + # ### Usage + # + # ~~~ruby + # 3 - 2 + # ~~~ + # + class OptMinus + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_minus, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_mod` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `%` operator is used. There are fast paths for if + # both operands are integers or if both operands are floats. It pops both + # the receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 4 % 2 + # ~~~ + # + class OptMod + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_mod, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_mult` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `*` operator is used. There are fast paths for if + # both operands are integers or floats. It pops both the receiver and the + # argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 3 * 2 + # ~~~ + # + class OptMult + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_mult, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_neq` is an optimization that tests whether two values at the top of + # the stack are not equal by testing their equality and calling the `!` on + # the result. This allows `opt_neq` to use the fast paths optimized in + # `opt_eq` when both operands are Integers, Floats, Symbols, or Strings. It + # pops both the receiver and the argument off the stack and pushes on the + # result. + # + # ### Usage + # + # ~~~ruby + # 2 != 2 + # ~~~ + # + class OptNEq + attr_reader :eq_calldata, :neq_calldata + + def initialize(eq_calldata, neq_calldata) + @eq_calldata = eq_calldata + @neq_calldata = neq_calldata + end + + def to_a(_iseq) + [:opt_neq, eq_calldata.to_h, neq_calldata.to_h] + end + + def length + 3 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_newarray_max` is a specialization that occurs when the `max` method + # is called on an array literal. It pops the values of the array off the + # stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # [1, 2, 3].max + # ~~~ + # + class OptNewArrayMax + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:opt_newarray_max, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_newarray_min` is a specialization that occurs when the `min` method + # is called on an array literal. It pops the values of the array off the + # stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # [1, 2, 3].min + # ~~~ + # + class OptNewArrayMin + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:opt_newarray_min, number] + end + + def length + 2 + end + + def pops + number + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_nil_p` is an optimization applied when the method `nil?` is called. + # It returns true immediately when the receiver is `nil` and defers to the + # `nil?` method in other cases. It pops the receiver off the stack and + # pushes on the result. + # + # ### Usage + # + # ~~~ruby + # "".nil? + # ~~~ + # + class OptNilP + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_nil_p, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_not` negates the value on top of the stack by calling the `!` method + # on it. It pops the receiver off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # !true + # ~~~ + # + class OptNot + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_not, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_or` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `|` operator is used. There is a fast path for if + # both operands are integers. It pops both the receiver and the argument off + # the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 | 3 + # ~~~ + # + class OptOr + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_or, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_plus` is a specialization of the `opt_send_without_block` instruction + # that occurs when the `+` operator is used. There are fast paths for if + # both operands are integers, floats, strings, or arrays. It pops both the + # receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # 2 + 3 + # ~~~ + # + class OptPlus + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_plus, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_regexpmatch2` is a specialization of the `opt_send_without_block` + # instruction that occurs when the `=~` operator is used. It pops both the + # receiver and the argument off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # /a/ =~ "a" + # ~~~ + # + class OptRegExpMatch2 + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_regexpmatch2, calldata.to_h] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_send_without_block` is a specialization of the send instruction that + # occurs when a method is being called without a block. It pops the receiver + # and the arguments off the stack and pushes on the result. + # + # ### Usage + # + # ~~~ruby + # puts "Hello, world!" + # ~~~ + # + class OptSendWithoutBlock + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_send_without_block, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + calldata.argc + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_size` is a specialization of `opt_send_without_block`, when the + # `size` method is called. There are fast paths when the receiver is either + # a string, hash, or array. It pops the receiver off the stack and pushes on + # the result. + # + # ### Usage + # + # ~~~ruby + # "".size + # ~~~ + # + class OptSize + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_size, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_str_freeze` pushes a frozen known string value with no interpolation + # onto the stack using the #freeze method. If the method gets overridden, + # this will fall back to a send. + # + # ### Usage + # + # ~~~ruby + # "hello".freeze + # ~~~ + # + class OptStrFreeze + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def to_a(_iseq) + [:opt_str_freeze, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_str_uminus` pushes a frozen known string value with no interpolation + # onto the stack. If the method gets overridden, this will fall back to a + # send. + # + # ### Usage + # + # ~~~ruby + # -"string" + # ~~~ + # + class OptStrUMinus + attr_reader :object, :calldata + + def initialize(object, calldata) + @object = object + @calldata = calldata + end + + def to_a(_iseq) + [:opt_str_uminus, object, calldata.to_h] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_succ` is a specialization of the `opt_send_without_block` instruction + # when the method being called is `succ`. Fast paths exist when the receiver + # is either a String or a Fixnum. It pops the receiver off the stack and + # pushes on the result. + # + # ### Usage + # + # ~~~ruby + # "".succ + # ~~~ + # + class OptSucc + attr_reader :calldata + + def initialize(calldata) + @calldata = calldata + end + + def to_a(_iseq) + [:opt_succ, calldata.to_h] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `pop` pops the top value off the stack. + # + # ### Usage + # + # ~~~ruby + # a ||= 2 + # ~~~ + # + class Pop + def to_a(_iseq) + [:pop] + end + + def length + 1 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `putnil` pushes a global nil object onto the stack. + # + # ### Usage + # + # ~~~ruby + # nil + # ~~~ + # + class PutNil + def to_a(_iseq) + [:putnil] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putobject` pushes a known value onto the stack. + # + # ### Usage + # + # ~~~ruby + # 5 + # ~~~ + # + class PutObject + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:putobject, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putobject_INT2FIX_0_` pushes 0 on the stack. It is a specialized + # instruction resulting from the operand unification optimization. It is + # equivalent to `putobject 0`. + # + # ### Usage + # + # ~~~ruby + # 0 + # ~~~ + # + class PutObjectInt2Fix0 + def to_a(_iseq) + [:putobject_INT2FIX_0_] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putobject_INT2FIX_1_` pushes 1 on the stack. It is a specialized + # instruction resulting from the operand unification optimization. It is + # equivalent to `putobject 1`. + # + # ### Usage + # + # ~~~ruby + # 1 + # ~~~ + # + class PutObjectInt2Fix1 + def to_a(_iseq) + [:putobject_INT2FIX_1_] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putself` pushes the current value of self onto the stack. + # + # ### Usage + # + # ~~~ruby + # puts "Hello, world!" + # ~~~ + # + class PutSelf + def to_a(_iseq) + [:putself] + end + + def length + 1 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putspecialobject` pushes one of three special objects onto the stack. + # These are either the VM core special object, the class base special + # object, or the constant base special object. + # + # ### Usage + # + # ~~~ruby + # alias foo bar + # ~~~ + # + class PutSpecialObject + OBJECT_VMCORE = 1 + OBJECT_CBASE = 2 + OBJECT_CONST_BASE = 3 + + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:putspecialobject, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `putstring` pushes an unfrozen string literal onto the stack. + # + # ### Usage + # + # ~~~ruby + # "foo" + # ~~~ + # + class PutString + attr_reader :object + + def initialize(object) + @object = object + end + + def to_a(_iseq) + [:putstring, object] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `send` invokes a method with an optional block. It pops its receiver and + # the arguments for the method off the stack and pushes the return value + # onto the stack. It has two arguments: the calldata for the call site and + # the optional block instruction sequence. + # + # ### Usage + # + # ~~~ruby + # "hello".tap { |i| p i } + # ~~~ + # + class Send + attr_reader :calldata, :block_iseq + + def initialize(calldata, block_iseq) + @calldata = calldata + @block_iseq = block_iseq + end + + def to_a(_iseq) + [:send, calldata.to_h, block_iseq&.to_a] + end + + def length + 3 + end + + def pops + argb = (calldata.flag?(CallData::CALL_ARGS_BLOCKARG) ? 1 : 0) + argb + calldata.argc + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `setblockparam` sets the value of a block local variable on a frame + # determined by the level and index arguments. The level is the number of + # frames back to look and the index is the index in the local table. It pops + # the value it is setting off the stack. + # + # ### Usage + # + # ~~~ruby + # def foo(&bar) + # bar = baz + # end + # ~~~ + # + class SetBlockParam + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = current.parent_iseq } + [:setblockparam, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setclassvariable` looks for a class variable in the current class and + # sets its value to the value it pops off the top of the stack. It uses an + # inline cache to reduce the need to lookup the class variable in the class + # hierarchy every time. + # + # ### Usage + # + # ~~~ruby + # @@class_variable = 1 + # ~~~ + # + class SetClassVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def to_a(_iseq) + [:setclassvariable, name, cache] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setconstant` pops two values off the stack: the value to set the + # constant to and the constant base to set it in. + # + # ### Usage + # + # ~~~ruby + # Constant = 1 + # ~~~ + # + class SetConstant + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:setconstant, name] + end + + def length + 2 + end + + def pops + 2 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setglobal` sets the value of a global variable to a value popped off the + # top of the stack. + # + # ### Usage + # + # ~~~ruby + # $global = 5 + # ~~~ + # + class SetGlobal + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:setglobal, name] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setinstancevariable` pops a value off the top of the stack and then sets + # the instance variable associated with the instruction to that value. + # + # This instruction has two forms, but both have the same structure. Before + # Ruby 3.2, the inline cache corresponded to both the get and set + # instructions and could be shared. Since Ruby 3.2, it uses object shapes + # instead so the caches are unique per instruction. + # + # ### Usage + # + # ~~~ruby + # @instance_variable = 1 + # ~~~ + # + class SetInstanceVariable + attr_reader :name, :cache + + def initialize(name, cache) + @name = name + @cache = cache + end + + def to_a(_iseq) + [:setinstancevariable, name, cache] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setlocal` sets the value of a local variable on a frame determined by the + # level and index arguments. The level is the number of frames back to + # look and the index is the index in the local table. It pops the value it + # is setting off the stack. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # tap { tap { value = 10 } } + # ~~~ + # + class SetLocal + attr_reader :index, :level + + def initialize(index, level) + @index = index + @level = level + end + + def to_a(iseq) + current = iseq + level.times { current = current.parent_iseq } + [:setlocal, current.local_table.offset(index), level] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setlocal_WC_0` is a specialized version of the `setlocal` instruction. It + # sets the value of a local variable on the current frame to the value at + # the top of the stack as determined by the index given as its only + # argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # ~~~ + # + class SetLocalWC0 + attr_reader :index + + def initialize(index) + @index = index + end + + def to_a(iseq) + [:setlocal_WC_0, iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setlocal_WC_1` is a specialized version of the `setlocal` instruction. It + # sets the value of a local variable on the parent frame to the value at the + # top of the stack as determined by the index given as its only argument. + # + # ### Usage + # + # ~~~ruby + # value = 5 + # self.then { value = 10 } + # ~~~ + # + class SetLocalWC1 + attr_reader :index + + def initialize(index) + @index = index + end + + def to_a(iseq) + [:setlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `setn` sets a value in the stack to a value popped off the top of the + # stack. It then pushes that value onto the top of the stack as well. + # + # ### Usage + # + # ~~~ruby + # {}[:key] = 'val' + # ~~~ + # + class SetN + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:setn, number] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `setspecial` pops a value off the top of the stack and sets a special + # local variable to that value. The special local variable is determined by + # the key given as its only argument. + # + # ### Usage + # + # ~~~ruby + # baz if (foo == 1) .. (bar == 1) + # ~~~ + # + class SetSpecial + attr_reader :key + + def initialize(key) + @key = key + end + + def to_a(_iseq) + [:setspecial, key] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + + # ### Summary + # + # `splatarray` coerces the array object at the top of the stack into Array + # by calling `to_a`. It pushes a duplicate of the array if there is a flag, + # and the original array if there isn't one. + # + # ### Usage + # + # ~~~ruby + # x = *(5) + # ~~~ + # + class SplatArray + attr_reader :flag + + def initialize(flag) + @flag = flag + end + + def to_a(_iseq) + [:splatarray, flag] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `swap` swaps the top two elements in the stack. + # + # ### TracePoint + # + # `swap` does not dispatch any events. + # + # ### Usage + # + # ~~~ruby + # !!defined?([[]]) + # ~~~ + # + class Swap + def to_a(_iseq) + [:swap] + end + + def length + 1 + end + + def pops + 2 + end + + def pushes + 2 + end + end + + # ### Summary + # + # `topn` pushes a single value onto the stack that is a copy of the value + # within the stack that is `number` of slots down from the top. + # + # ### Usage + # + # ~~~ruby + # case 3 + # when 1..5 + # puts "foo" + # end + # ~~~ + # + class TopN + attr_reader :number + + def initialize(number) + @number = number + end + + def to_a(_iseq) + [:topn, number] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `toregexp` pops a number of values off the stack, combines them into a new + # regular expression, and pushes the new regular expression onto the stack. + # + # ### Usage + # + # ~~~ruby + # /foo #{bar}/ + # ~~~ + # + class ToRegExp + attr_reader :options, :length + + def initialize(options, length) + @options = options + @length = length + end + + def to_a(_iseq) + [:toregexp, options, length] + end + + def pops + length + end + + def pushes + 1 end end end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb new file mode 100644 index 00000000..45dfe768 --- /dev/null +++ b/lib/syntax_tree/yarv/legacy.rb @@ -0,0 +1,169 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This module contains the instructions that used to be a part of YARV but + # have been replaced or removed in more recent versions. + module Legacy + # ### Summary + # + # `getclassvariable` looks for a class variable in the current class and + # pushes its value onto the stack. + # + # This version of the `getclassvariable` instruction is no longer used + # since in Ruby 3.0 it gained an inline cache.` + # + # ### Usage + # + # ~~~ruby + # @@class_variable + # ~~~ + # + class GetClassVariable + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:getclassvariable, name] + end + + def length + 2 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_getinlinecache` is a wrapper around a series of `putobject` and + # `getconstant` instructions that allows skipping past them if the inline + # cache is currently set. It pushes the value of the cache onto the stack + # if it is set, otherwise it pushes `nil`. + # + # This instruction is no longer used since in Ruby 3.2 it was replaced by + # the consolidated `opt_getconstant_path` instruction. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class OptGetInlineCache + attr_reader :label, :cache + + def initialize(label, cache) + @label = label + @cache = cache + end + + def patch!(iseq) + @label = iseq.label + end + + def to_a(_iseq) + [:opt_getinlinecache, label, cache] + end + + def length + 3 + end + + def pops + 0 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `opt_setinlinecache` sets an inline cache for a constant lookup. It pops + # the value it should set off the top of the stack. It then pushes that + # value back onto the top of the stack. + # + # This instruction is no longer used since in Ruby 3.2 it was replaced by + # the consolidated `opt_getconstant_path` instruction. + # + # ### Usage + # + # ~~~ruby + # Constant + # ~~~ + # + class OptSetInlineCache + attr_reader :cache + + def initialize(cache) + @cache = cache + end + + def to_a(_iseq) + [:opt_setinlinecache, cache] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + + # ### Summary + # + # `setclassvariable` looks for a class variable in the current class and + # sets its value to the value it pops off the top of the stack. + # + # This version of the `setclassvariable` instruction is no longer used + # since in Ruby 3.0 it gained an inline cache. + # + # ### Usage + # + # ~~~ruby + # @@class_variable = 1 + # ~~~ + # + class SetClassVariable + attr_reader :name + + def initialize(name) + @name = name + end + + def to_a(_iseq) + [:setclassvariable, name] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 0 + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/local_table.rb b/lib/syntax_tree/yarv/local_table.rb new file mode 100644 index 00000000..5eac346c --- /dev/null +++ b/lib/syntax_tree/yarv/local_table.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This represents every local variable associated with an instruction + # sequence. There are two kinds of locals: plain locals that are what you + # expect, and block proxy locals, which represent local variables + # associated with blocks that were passed into the current instruction + # sequence. + class LocalTable + # A local representing a block passed into the current instruction + # sequence. + class BlockLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # A regular local variable. + class PlainLocal + attr_reader :name + + def initialize(name) + @name = name + end + end + + # The result of looking up a local variable in the current local table. + class Lookup + attr_reader :local, :index, :level + + def initialize(local, index, level) + @local = local + @index = index + @level = level + end + end + + attr_reader :locals + + def initialize + @locals = [] + end + + def find(name, level = 0) + index = locals.index { |local| local.name == name } + Lookup.new(locals[index], index, level) if index + end + + def has?(name) + locals.any? { |local| local.name == name } + end + + def names + locals.map(&:name) + end + + def size + locals.length + end + + # Add a BlockLocal to the local table. + def block(name) + locals << BlockLocal.new(name) unless has?(name) + end + + # Add a PlainLocal to the local table. + def plain(name) + locals << PlainLocal.new(name) unless has?(name) + end + + # This is the offset from the top of the stack where this local variable + # lives. + def offset(index) + size - (index - 3) - 1 + end + end + end +end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index c2472432..6b185dea 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -485,13 +485,12 @@ def assert_compiles(source, **options) assert_equal( serialize_iseq(RubyVM::InstructionSequence.compile(source, **options)), - serialize_iseq(program.accept(Compiler.new(**options))) + serialize_iseq(program.accept(YARV::Compiler.new(**options))) ) end def assert_evaluates(expected, source, **options) - program = SyntaxTree.parse(source) - assert_equal expected, program.accept(Compiler.new(**options)).eval + assert_equal expected, YARV.compile(source, **options).eval end end end diff --git a/test/yarv_test.rb b/test/yarv_test.rb index 55cdb657..02514a93 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -47,8 +47,8 @@ def test_bf private def assert_disassembles(expected, source) - iseq = SyntaxTree.parse(source).accept(Compiler.new) - actual = Formatter.format(source, YARV::Disassembler.new(iseq).to_ruby) + ruby = YARV::Disassembler.new(YARV.compile(source)).to_ruby + actual = Formatter.format(source, ruby) assert_equal expected, actual end end From b6fb92ee9fe39bec7e547a307742c915e78bf5d4 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 22 Nov 2022 16:24:04 -0500 Subject: [PATCH 063/104] Get it working on TruffleRuby --- lib/syntax_tree/yarv/instruction_sequence.rb | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index c59d02c7..411f4692 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -13,11 +13,14 @@ class InstructionSequence # pass a serialized iseq to Ruby and have it return a # RubyVM::InstructionSequence object. ISEQ_LOAD = - Fiddle::Function.new( - Fiddle::Handle::DEFAULT["rb_iseq_load"], - [Fiddle::TYPE_VOIDP] * 3, - Fiddle::TYPE_VOIDP - ) + begin + Fiddle::Function.new( + Fiddle::Handle::DEFAULT["rb_iseq_load"], + [Fiddle::TYPE_VOIDP] * 3, + Fiddle::TYPE_VOIDP + ) + rescue NameError + end # This object is used to track the size of the stack at any given time. It # is effectively a mini symbolic interpreter. It's necessary because when @@ -141,6 +144,7 @@ def length end def eval + raise "Unsupported platform" if ISEQ_LOAD.nil? compiled = to_a # Temporary hack until we get these working. From be9465d49edf5fe71b470aefeff1893289d68070 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 22 Nov 2022 16:45:20 -0500 Subject: [PATCH 064/104] Handle inline_const_cache=false --- lib/syntax_tree/yarv/compiler.rb | 4 +++ lib/syntax_tree/yarv/instruction_sequence.rb | 32 +++++++++++++++----- test/compiler_test.rb | 1 + 3 files changed, 29 insertions(+), 8 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 45f2bb59..21d335ce 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -204,6 +204,7 @@ def visit_unsupported(_node) # These options mirror the compilation options that we currently support # that can be also passed to RubyVM::InstructionSequence.compile. attr_reader :frozen_string_literal, + :inline_const_cache, :operands_unification, :specialized_instruction @@ -217,10 +218,12 @@ def visit_unsupported(_node) def initialize( frozen_string_literal: false, + inline_const_cache: true, operands_unification: true, specialized_instruction: true ) @frozen_string_literal = frozen_string_literal + @inline_const_cache = inline_const_cache @operands_unification = operands_unification @specialized_instruction = specialized_instruction @@ -1374,6 +1377,7 @@ def visit_program(node) nil, node.location, frozen_string_literal: frozen_string_literal, + inline_const_cache: inline_const_cache, operands_unification: operands_unification, specialized_instruction: specialized_instruction ) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 411f4692..4754618e 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -77,6 +77,7 @@ def change_by(value) # These are various compilation options provided. attr_reader :frozen_string_literal, + :inline_const_cache, :operands_unification, :specialized_instruction @@ -86,6 +87,7 @@ def initialize( parent_iseq, location, frozen_string_literal: false, + inline_const_cache: true, operands_unification: true, specialized_instruction: true ) @@ -104,6 +106,7 @@ def initialize( @stack = Stack.new @frozen_string_literal = frozen_string_literal + @inline_const_cache = inline_const_cache @operands_unification = operands_unification @specialized_instruction = specialized_instruction end @@ -192,6 +195,7 @@ def child_iseq(type, name, location) self, location, frozen_string_literal: frozen_string_literal, + inline_const_cache: inline_const_cache, operands_unification: operands_unification, specialized_instruction: specialized_instruction ) @@ -434,14 +438,24 @@ def opt_aset_with(object, calldata) end def opt_getconstant_path(names) - if RUBY_VERSION < "3.2" - cache = inline_storage - getinlinecache = opt_getinlinecache(-1, cache) - - if names[0] == :"" + if RUBY_VERSION < "3.2" || !inline_const_cache + cache = nil + getinlinecache = nil + + if inline_const_cache + cache = inline_storage + getinlinecache = opt_getinlinecache(-1, cache) + + if names[0] == :"" + names.shift + pop + putobject(Object) + end + elsif names[0] == :"" names.shift - pop putobject(Object) + else + putnil end names.each_with_index do |name, index| @@ -449,8 +463,10 @@ def opt_getconstant_path(names) getconstant(name) end - opt_setinlinecache(cache) - getinlinecache.patch!(self) + if inline_const_cache + opt_setinlinecache(cache) + getinlinecache.patch!(self) + end else push(OptGetConstantPath.new(names)) end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 6b185dea..387a726d 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -432,6 +432,7 @@ class CompilerTest < Minitest::Test { frozen_string_literal: true }, { operands_unification: false }, { specialized_instruction: false }, + { inline_const_cache: false }, { operands_unification: false, specialized_instruction: false } ] From 4631b5c1708ac71fc53614924ccf1b6155203b94 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 23 Nov 2022 09:31:49 -0500 Subject: [PATCH 065/104] Convert options into an object --- lib/syntax_tree/yarv.rb | 4 +- lib/syntax_tree/yarv/compiler.rb | 92 ++++++++++++-------- lib/syntax_tree/yarv/instruction_sequence.rb | 56 ++++-------- test/compiler_test.rb | 22 ++--- 4 files changed, 87 insertions(+), 87 deletions(-) diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index df8bc3ce..1e759ad1 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -4,8 +4,8 @@ module SyntaxTree # This module provides an object representation of the YARV bytecode. module YARV # Compile the given source into a YARV instruction sequence. - def self.compile(source, **options) - SyntaxTree.parse(source).accept(Compiler.new(**options)) + def self.compile(source, options = Compiler::Options.new) + SyntaxTree.parse(source).accept(Compiler.new(options)) end end end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 21d335ce..5d717bd1 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -45,6 +45,53 @@ module YARV # RubyVM::InstructionSequence.compile("1 + 2").to_a # class Compiler < BasicVisitor + # This represents a set of options that can be passed to the compiler to + # control how it compiles the code. It mirrors the options that can be + # passed to RubyVM::InstructionSequence.compile, except it only includes + # options that actually change the behavior. + class Options + def initialize( + frozen_string_literal: false, + inline_const_cache: true, + operands_unification: true, + specialized_instruction: true + ) + @frozen_string_literal = frozen_string_literal + @inline_const_cache = inline_const_cache + @operands_unification = operands_unification + @specialized_instruction = specialized_instruction + end + + def to_hash + { + frozen_string_literal: @frozen_string_literal, + inline_const_cache: @inline_const_cache, + operands_unification: @operands_unification, + specialized_instruction: @specialized_instruction + } + end + + def frozen_string_literal! + @frozen_string_literal = true + end + + def frozen_string_literal? + @frozen_string_literal + end + + def inline_const_cache? + @inline_const_cache + end + + def operands_unification? + @operands_unification + end + + def specialized_instruction? + @specialized_instruction + end + end + # This visitor is responsible for converting Syntax Tree nodes into their # corresponding Ruby structures. This is used to convert the operands of # some instructions like putobject that push a Ruby object directly onto @@ -203,10 +250,7 @@ def visit_unsupported(_node) # These options mirror the compilation options that we currently support # that can be also passed to RubyVM::InstructionSequence.compile. - attr_reader :frozen_string_literal, - :inline_const_cache, - :operands_unification, - :specialized_instruction + attr_reader :options # The current instruction sequence that is being compiled. attr_reader :iseq @@ -216,17 +260,8 @@ def visit_unsupported(_node) # if we need to return the value of the last statement. attr_reader :last_statement - def initialize( - frozen_string_literal: false, - inline_const_cache: true, - operands_unification: true, - specialized_instruction: true - ) - @frozen_string_literal = frozen_string_literal - @inline_const_cache = inline_const_cache - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction - + def initialize(options) + @options = options @iseq = nil @last_statement = false end @@ -236,7 +271,7 @@ def visit_BEGIN(node) end def visit_CHAR(node) - if frozen_string_literal + if options.frozen_string_literal? iseq.putobject(node.value[1..]) else iseq.putstring(node.value[1..]) @@ -282,7 +317,7 @@ def visit_aref(node) calldata = YARV.calldata(:[], 1) visit(node.collection) - if !frozen_string_literal && specialized_instruction && + if !options.frozen_string_literal? && options.specialized_instruction? && (node.index.parts.length == 1) arg = node.index.parts.first @@ -453,7 +488,7 @@ def visit_assign(node) when ARefField calldata = YARV.calldata(:[]=, 2) - if !frozen_string_literal && specialized_instruction && + if !options.frozen_string_literal? && options.specialized_instruction? && (node.target.index.parts.length == 1) arg = node.target.index.parts.first @@ -1352,7 +1387,7 @@ def visit_program(node) break unless statement.is_a?(Comment) if statement.value == "# frozen_string_literal: true" - @frozen_string_literal = true + options.frozen_string_literal! end end @@ -1370,18 +1405,7 @@ def visit_program(node) end end - top_iseq = - InstructionSequence.new( - :top, - "", - nil, - node.location, - frozen_string_literal: frozen_string_literal, - inline_const_cache: inline_const_cache, - operands_unification: operands_unification, - specialized_instruction: specialized_instruction - ) - + top_iseq = InstructionSequence.new(:top, "", nil, node.location, options) with_child_iseq(top_iseq) do visit_all(preexes) @@ -1402,7 +1426,7 @@ def visit_qsymbols(node) end def visit_qwords(node) - if frozen_string_literal + if options.frozen_string_literal? iseq.duparray(node.accept(RubyVisitor.new)) else visit_all(node.elements) @@ -1632,7 +1656,7 @@ def visit_top_const_ref(node) end def visit_tstring_content(node) - if frozen_string_literal + if options.frozen_string_literal? iseq.putobject(node.accept(RubyVisitor.new)) else iseq.putstring(node.accept(RubyVisitor.new)) @@ -1808,7 +1832,7 @@ def visit_word(node) end def visit_words(node) - if frozen_string_literal && (compiled = RubyVisitor.compile(node)) + if options.frozen_string_literal? && (compiled = RubyVisitor.compile(node)) iseq.duparray(compiled) else visit_all(node.elements) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 4754618e..156070da 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -76,21 +76,9 @@ def change_by(value) attr_reader :stack # These are various compilation options provided. - attr_reader :frozen_string_literal, - :inline_const_cache, - :operands_unification, - :specialized_instruction - - def initialize( - type, - name, - parent_iseq, - location, - frozen_string_literal: false, - inline_const_cache: true, - operands_unification: true, - specialized_instruction: true - ) + attr_reader :options + + def initialize(type, name, parent_iseq, location, options = Compiler::Options.new) @type = type @name = name @parent_iseq = parent_iseq @@ -105,10 +93,7 @@ def initialize( @storage_index = 0 @stack = Stack.new - @frozen_string_literal = frozen_string_literal - @inline_const_cache = inline_const_cache - @operands_unification = operands_unification - @specialized_instruction = specialized_instruction + @options = options end ########################################################################## @@ -189,16 +174,7 @@ def to_a ########################################################################## def child_iseq(type, name, location) - InstructionSequence.new( - type, - name, - self, - location, - frozen_string_literal: frozen_string_literal, - inline_const_cache: inline_const_cache, - operands_unification: operands_unification, - specialized_instruction: specialized_instruction - ) + InstructionSequence.new(type, name, self, location, options) end def block_child_iseq(location) @@ -359,7 +335,7 @@ def getinstancevariable(name) end def getlocal(index, level) - if operands_unification + if options.operands_unification? # Specialize the getlocal instruction based on the level of the # local variable. If it's 0 or 1, then there's a specialized # instruction that will look at the current scope or the parent @@ -438,11 +414,11 @@ def opt_aset_with(object, calldata) end def opt_getconstant_path(names) - if RUBY_VERSION < "3.2" || !inline_const_cache + if RUBY_VERSION < "3.2" || !options.inline_const_cache? cache = nil getinlinecache = nil - if inline_const_cache + if options.inline_const_cache? cache = inline_storage getinlinecache = opt_getinlinecache(-1, cache) @@ -463,7 +439,7 @@ def opt_getconstant_path(names) getconstant(name) end - if inline_const_cache + if options.inline_const_cache? opt_setinlinecache(cache) getinlinecache.patch!(self) end @@ -477,7 +453,7 @@ def opt_getinlinecache(label, cache) end def opt_newarray_max(length) - if specialized_instruction + if options.specialized_instruction? push(OptNewArrayMax.new(length)) else newarray(length) @@ -486,7 +462,7 @@ def opt_newarray_max(length) end def opt_newarray_min(length) - if specialized_instruction + if options.specialized_instruction? push(OptNewArrayMin.new(length)) else newarray(length) @@ -499,7 +475,7 @@ def opt_setinlinecache(cache) end def opt_str_freeze(object) - if specialized_instruction + if options.specialized_instruction? push(OptStrFreeze.new(object, YARV.calldata(:freeze))) else putstring(object) @@ -508,7 +484,7 @@ def opt_str_freeze(object) end def opt_str_uminus(object) - if specialized_instruction + if options.specialized_instruction? push(OptStrUMinus.new(object, YARV.calldata(:-@))) else putstring(object) @@ -525,7 +501,7 @@ def putnil end def putobject(object) - if operands_unification + if options.operands_unification? # Specialize the putobject instruction based on the value of the # object. If it's 0 or 1, then there's a specialized instruction # that will push the object onto the stack and requires fewer @@ -555,7 +531,7 @@ def putstring(object) end def send(calldata, block_iseq = nil) - if specialized_instruction && !block_iseq && + if options.specialized_instruction? && !block_iseq && !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) # Specialize the send instruction. If it doesn't have a block # attached, then we will replace it with an opt_send_without_block @@ -645,7 +621,7 @@ def setinstancevariable(name) end def setlocal(index, level) - if operands_unification + if options.operands_unification? # Specialize the setlocal instruction based on the level of the # local variable. If it's 0 or 1, then there's a specialized # instruction that will write to the current scope or the parent diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 387a726d..5a602417 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -428,12 +428,12 @@ class CompilerTest < Minitest::Test # These are the combinations of instructions that we're going to test. OPTIONS = [ - {}, - { frozen_string_literal: true }, - { operands_unification: false }, - { specialized_instruction: false }, - { inline_const_cache: false }, - { operands_unification: false, specialized_instruction: false } + YARV::Compiler::Options.new, + YARV::Compiler::Options.new(frozen_string_literal: true), + YARV::Compiler::Options.new(operands_unification: false), + YARV::Compiler::Options.new(specialized_instruction: false), + YARV::Compiler::Options.new(inline_const_cache: false), + YARV::Compiler::Options.new(operands_unification: false, specialized_instruction: false) ] OPTIONS.each do |options| @@ -441,7 +441,7 @@ class CompilerTest < Minitest::Test CASES.each do |source| define_method(:"test_#{source}_#{suffix}") do - assert_compiles(source, **options) + assert_compiles(source, options) end end end @@ -481,17 +481,17 @@ def serialize_iseq(iseq) serialized end - def assert_compiles(source, **options) + def assert_compiles(source, options) program = SyntaxTree.parse(source) assert_equal( serialize_iseq(RubyVM::InstructionSequence.compile(source, **options)), - serialize_iseq(program.accept(YARV::Compiler.new(**options))) + serialize_iseq(program.accept(YARV::Compiler.new(options))) ) end - def assert_evaluates(expected, source, **options) - assert_equal expected, YARV.compile(source, **options).eval + def assert_evaluates(expected, source) + assert_equal expected, YARV.compile(source).eval end end end From da1e46604d56941de004ce561da5b56e7eae1bde Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 23 Nov 2022 09:38:33 -0500 Subject: [PATCH 066/104] Support the tailcall_optimization flag --- lib/syntax_tree/yarv/compiler.rb | 30 ++++++++++++++++++++++++++++-- test/compiler_test.rb | 6 +++--- 2 files changed, 31 insertions(+), 5 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 5d717bd1..4b0587fc 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -54,12 +54,14 @@ def initialize( frozen_string_literal: false, inline_const_cache: true, operands_unification: true, - specialized_instruction: true + specialized_instruction: true, + tailcall_optimization: false ) @frozen_string_literal = frozen_string_literal @inline_const_cache = inline_const_cache @operands_unification = operands_unification @specialized_instruction = specialized_instruction + @tailcall_optimization = tailcall_optimization end def to_hash @@ -67,7 +69,8 @@ def to_hash frozen_string_literal: @frozen_string_literal, inline_const_cache: @inline_const_cache, operands_unification: @operands_unification, - specialized_instruction: @specialized_instruction + specialized_instruction: @specialized_instruction, + tailcall_optimization: @tailcall_optimization } end @@ -90,6 +93,10 @@ def operands_unification? def specialized_instruction? @specialized_instruction end + + def tailcall_optimization? + @tailcall_optimization + end end # This visitor is responsible for converting Syntax Tree nodes into their @@ -716,12 +723,17 @@ def visit_call(node) end end + # Track whether or not this is a method call on a block proxy receiver. + # If it is, we can potentially do tailcall optimizations on it. + block_receiver = false + if node.receiver if node.receiver.is_a?(VarRef) lookup = iseq.local_variable(node.receiver.value.value.to_sym) if lookup.local.is_a?(LocalTable::BlockLocal) iseq.getblockparamproxy(lookup.index, lookup.level) + block_receiver = true else visit(node.receiver) end @@ -752,6 +764,7 @@ def visit_call(node) when ArgsForward flag |= CallData::CALL_ARGS_SPLAT flag |= CallData::CALL_ARGS_BLOCKARG + flag |= CallData::CALL_TAILCALL if options.tailcall_optimization? lookup = iseq.local_table.find(:*) iseq.getlocal(lookup.index, lookup.level) @@ -768,9 +781,22 @@ def visit_call(node) end block_iseq = visit(node.block) if node.block + + # If there's no block and we don't already have any special flags set, + # then we can safely call this simple arguments. Note that has to be the + # first flag we set after looking at the arguments to get the flags + # correct. flag |= CallData::CALL_ARGS_SIMPLE if block_iseq.nil? && flag == 0 + + # If there's no receiver, then this is an "fcall". flag |= CallData::CALL_FCALL if node.receiver.nil? + # If we're calling a method on the passed block object and we have + # tailcall optimizations turned on, then we can set the tailcall flag. + if block_receiver && options.tailcall_optimization? + flag |= CallData::CALL_TAILCALL + end + iseq.send( YARV.calldata(node.message.value.to_sym, argc, flag), block_iseq diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 5a602417..02343ca2 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -433,14 +433,14 @@ class CompilerTest < Minitest::Test YARV::Compiler::Options.new(operands_unification: false), YARV::Compiler::Options.new(specialized_instruction: false), YARV::Compiler::Options.new(inline_const_cache: false), - YARV::Compiler::Options.new(operands_unification: false, specialized_instruction: false) + YARV::Compiler::Options.new(tailcall_optimization: true) ] OPTIONS.each do |options| - suffix = options.inspect + suffix = options.to_hash.map { |k, v| "#{k}=#{v}" }.join("&") CASES.each do |source| - define_method(:"test_#{source}_#{suffix}") do + define_method(:"test_#{source}_(#{suffix})") do assert_compiles(source, options) end end From 85df98f85dc297e16bc27003f2202728c871687e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 23 Nov 2022 09:59:58 -0500 Subject: [PATCH 067/104] Provide shims for methods that should compile --- lib/syntax_tree/yarv/compiler.rb | 52 ++++++++++++++++++++++++++++++++ test/compiler_test.rb | 2 ++ 2 files changed, 54 insertions(+) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 4b0587fc..bdc31ab3 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -54,12 +54,14 @@ def initialize( frozen_string_literal: false, inline_const_cache: true, operands_unification: true, + peephole_optimization: true, specialized_instruction: true, tailcall_optimization: false ) @frozen_string_literal = frozen_string_literal @inline_const_cache = inline_const_cache @operands_unification = operands_unification + @peephole_optimization = peephole_optimization @specialized_instruction = specialized_instruction @tailcall_optimization = tailcall_optimization end @@ -69,6 +71,7 @@ def to_hash frozen_string_literal: @frozen_string_literal, inline_const_cache: @inline_const_cache, operands_unification: @operands_unification, + peephole_optimization: @peephole_optimization, specialized_instruction: @specialized_instruction, tailcall_optimization: @tailcall_optimization } @@ -90,6 +93,10 @@ def operands_unification? @operands_unification end + def peephole_optimization? + @peephole_optimization + end + def specialized_instruction? @specialized_instruction end @@ -608,6 +615,9 @@ def visit_bare_assoc_hash(node) end end + def visit_begin(node) + end + def visit_binary(node) case node.operator when :"&&" @@ -669,6 +679,9 @@ def visit_bodystmt(node) visit(node.statements) end + def visit_break(node) + end + def visit_call(node) if node.is_a?(CallNode) return( @@ -1016,6 +1029,9 @@ def visit_elsif(node) ) end + def visit_ensure(node) + end + def visit_field(node) visit(node.parent) end @@ -1024,6 +1040,9 @@ def visit_float(node) iseq.putobject(node.accept(RubyVisitor.new)) end + def visit_fndptn(node) + end + def visit_for(node) visit(node.collection) @@ -1064,6 +1083,9 @@ def visit_hash(node) end end + def visit_hshptn(node) + end + def visit_heredoc(node) if node.beginning.value.end_with?("`") visit_xstring_literal(node) @@ -1143,6 +1165,9 @@ def visit_imaginary(node) iseq.putobject(node.accept(RubyVisitor.new)) end + def visit_in(node) + end + def visit_int(node) iseq.putobject(node.accept(RubyVisitor.new)) end @@ -1243,6 +1268,9 @@ def visit_mrhs(node) end end + def visit_next(node) + end + def visit_not(node) visit(node.statement) iseq.send(YARV.calldata(:!)) @@ -1408,6 +1436,12 @@ def visit_paren(node) visit(node.contents) end + def visit_pinned_begin(node) + end + + def visit_pinned_var_ref(node) + end + def visit_program(node) node.statements.body.each do |statement| break unless statement.is_a?(Comment) @@ -1566,6 +1600,9 @@ def visit_rational(node) iseq.putobject(node.accept(RubyVisitor.new)) end + def visit_redo(node) + end + def visit_regexp_literal(node) if (compiled = RubyVisitor.compile(node)) iseq.putobject(compiled) @@ -1576,12 +1613,27 @@ def visit_regexp_literal(node) end end + def visit_rescue(node) + end + + def visit_rescue_ex(node) + end + + def visit_rescue_mod(node) + end + def visit_rest_param(node) iseq.local_table.plain(node.name.value.to_sym) iseq.argument_options[:rest_start] = iseq.argument_size iseq.argument_size += 1 end + def visit_retry(node) + end + + def visit_return(node) + end + def visit_sclass(node) visit(node.target) iseq.putnil diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 02343ca2..9ea7f21b 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -431,6 +431,8 @@ class CompilerTest < Minitest::Test YARV::Compiler::Options.new, YARV::Compiler::Options.new(frozen_string_literal: true), YARV::Compiler::Options.new(operands_unification: false), + # TODO: have this work when peephole optimizations are turned off. + # YARV::Compiler::Options.new(peephole_optimization: false), YARV::Compiler::Options.new(specialized_instruction: false), YARV::Compiler::Options.new(inline_const_cache: false), YARV::Compiler::Options.new(tailcall_optimization: true) From 83cdfbbc60adb200aa2d9fa7477c81ee7ab2e6c7 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 23 Nov 2022 10:05:31 -0500 Subject: [PATCH 068/104] Provide missing instructions --- lib/syntax_tree/yarv/instructions.rb | 84 ++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 5a23bbf0..3fcdadb3 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -1840,6 +1840,54 @@ def pushes end end + # ### Summary + # + # `opt_case_dispatch` is a branch instruction that moves the control flow + # for case statements that have clauses where they can all be used as hash + # keys for an internal hash. + # + # It has two arguments: the `case_dispatch_hash` and an `else_label`. It + # pops one value off the stack: a hash key. `opt_case_dispatch` looks up the + # key in the `case_dispatch_hash` and jumps to the corresponding label if + # there is one. If there is no value in the `case_dispatch_hash`, + # `opt_case_dispatch` jumps to the `else_label` index. + # + # ### Usage + # + # ~~~ruby + # case 1 + # when 1 + # puts "foo" + # else + # puts "bar" + # end + # ~~~ + # + class OptCaseDispatch + attr_reader :case_dispatch_hash, :else_label + + def initialize(case_dispatch_hash, else_label) + @case_dispatch_hash = case_dispatch_hash + @else_label = else_label + end + + def to_a(_iseq) + [:opt_case_dispatch, case_dispatch_hash, else_label] + end + + def length + 3 + end + + def pops + 1 + end + + def pushes + 0 + end + end + # ### Summary # # `opt_div` is a specialization of the `opt_send_without_block` instruction @@ -3534,6 +3582,42 @@ def pushes end end + # ### Summary + # + # `throw` pops a value off the top of the stack and throws it. It is caught + # using the instruction sequence's (or an ancestor's) catch table. It pushes + # on the result of throwing the value. + # + # ### Usage + # + # ~~~ruby + # [1, 2, 3].map { break 2 } + # ~~~ + # + class Throw + attr_reader :type + + def initialize(type) + @type = type + end + + def to_a(_iseq) + [:throw, type] + end + + def length + 2 + end + + def pops + 1 + end + + def pushes + 1 + end + end + # ### Summary # # `topn` pushes a single value onto the stack that is a copy of the value From a43005d8a04e277f57b9cbf88d925197de13a367 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 23 Nov 2022 11:06:26 -0500 Subject: [PATCH 069/104] Allow converting from compiled iseq to YARV iseq --- lib/syntax_tree/yarv/compiler.rb | 21 +- lib/syntax_tree/yarv/instruction_sequence.rb | 224 ++++++++++++++++++- lib/syntax_tree/yarv/instructions.rb | 9 + test/compiler_test.rb | 25 ++- 4 files changed, 267 insertions(+), 12 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index bdc31ab3..f876cb3b 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -331,8 +331,8 @@ def visit_aref(node) calldata = YARV.calldata(:[], 1) visit(node.collection) - if !options.frozen_string_literal? && options.specialized_instruction? && - (node.index.parts.length == 1) + if !options.frozen_string_literal? && + options.specialized_instruction? && (node.index.parts.length == 1) arg = node.index.parts.first if arg.is_a?(StringLiteral) && (arg.parts.length == 1) @@ -502,7 +502,8 @@ def visit_assign(node) when ARefField calldata = YARV.calldata(:[]=, 2) - if !options.frozen_string_literal? && options.specialized_instruction? && + if !options.frozen_string_literal? && + options.specialized_instruction? && (node.target.index.parts.length == 1) arg = node.target.index.parts.first @@ -1085,7 +1086,7 @@ def visit_hash(node) def visit_hshptn(node) end - + def visit_heredoc(node) if node.beginning.value.end_with?("`") visit_xstring_literal(node) @@ -1465,7 +1466,14 @@ def visit_program(node) end end - top_iseq = InstructionSequence.new(:top, "", nil, node.location, options) + top_iseq = + InstructionSequence.new( + :top, + "", + nil, + node.location, + options + ) with_child_iseq(top_iseq) do visit_all(preexes) @@ -1910,7 +1918,8 @@ def visit_word(node) end def visit_words(node) - if options.frozen_string_literal? && (compiled = RubyVisitor.compile(node)) + if options.frozen_string_literal? && + (compiled = RubyVisitor.compile(node)) iseq.duparray(compiled) else visit_all(node.elements) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 156070da..c6395f65 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -78,7 +78,13 @@ def change_by(value) # These are various compilation options provided. attr_reader :options - def initialize(type, name, parent_iseq, location, options = Compiler::Options.new) + def initialize( + type, + name, + parent_iseq, + location, + options = Compiler::Options.new + ) @type = type @name = name @parent_iseq = parent_iseq @@ -413,6 +419,10 @@ def opt_aset_with(object, calldata) push(OptAsetWith.new(object, calldata)) end + def opt_case_dispatch(case_dispatch_hash, else_label) + push(OptCaseDispatch.new(case_dispatch_hash, else_label)) + end + def opt_getconstant_path(names) if RUBY_VERSION < "3.2" || !options.inline_const_cache? cache = nil @@ -655,6 +665,10 @@ def swap push(Swap.new) end + def throw(type) + push(Throw.new(type)) + end + def topn(number) push(TopN.new(number)) end @@ -662,6 +676,214 @@ def topn(number) def toregexp(options, length) push(ToRegExp.new(options, length)) end + + # This method will create a new instruction sequence from a serialized + # RubyVM::InstructionSequence object. + def self.from(source, options = Compiler::Options.new, parent_iseq = nil) + iseq = new(source[9], source[5], parent_iseq, Location.default, options) + + # set up the correct argument size + iseq.argument_size = source[4][:arg_size] + + # set up all of the locals + source[10].each { |local| iseq.local_table.plain(local) } + + # set up the argument options + iseq.argument_options.merge!(source[11]) + + # set up all of the instructions + source[13].each do |insn| + # skip line numbers + next if insn.is_a?(Integer) + + # put events into the array and then continue + if insn.is_a?(Symbol) + iseq.event(insn) + next + end + + type, *opnds = insn + case type + when :adjuststack + iseq.adjuststack(opnds[0]) + when :anytostring + iseq.anytostring + when :branchif + iseq.branchif(opnds[0]) + when :branchnil + iseq.branchnil(opnds[0]) + when :branchunless + iseq.branchunless(opnds[0]) + when :checkkeyword + iseq.checkkeyword(iseq.local_table.size - opnds[0] + 2, opnds[1]) + when :checkmatch + iseq.checkmatch(opnds[0]) + when :checktype + iseq.checktype(opnds[0]) + when :concatarray + iseq.concatarray + when :concatstrings + iseq.concatstrings(opnds[0]) + when :defineclass + iseq.defineclass(opnds[0], from(opnds[1], options, iseq), opnds[2]) + when :defined + iseq.defined(opnds[0], opnds[1], opnds[2]) + when :definemethod + iseq.definemethod(opnds[0], from(opnds[1], options, iseq)) + when :definesmethod + iseq.definesmethod(opnds[0], from(opnds[1], options, iseq)) + when :dup + iseq.dup + when :duparray + iseq.duparray(opnds[0]) + when :duphash + iseq.duphash(opnds[0]) + when :dupn + iseq.dupn(opnds[0]) + when :expandarray + iseq.expandarray(opnds[0], opnds[1]) + when :getblockparam, :getblockparamproxy, :getlocal, :getlocal_WC_0, + :getlocal_WC_1, :setblockparam, :setlocal, :setlocal_WC_0, + :setlocal_WC_1 + current = iseq + level = 0 + + case type + when :getlocal_WC_1, :setlocal_WC_1 + level = 1 + when :getblockparam, :getblockparamproxy, :getlocal, :setblockparam, + :setlocal + level = opnds[1] + end + + level.times { current = current.parent_iseq } + index = current.local_table.size - opnds[0] + 2 + + case type + when :getblockparam + iseq.getblockparam(index, level) + when :getblockparamproxy + iseq.getblockparamproxy(index, level) + when :getlocal, :getlocal_WC_0, :getlocal_WC_1 + iseq.getlocal(index, level) + when :setblockparam + iseq.setblockparam(index, level) + when :setlocal, :setlocal_WC_0, :setlocal_WC_1 + iseq.setlocal(index, level) + end + when :getclassvariable + iseq.push(GetClassVariable.new(opnds[0], opnds[1])) + when :getconstant + iseq.getconstant(opnds[0]) + when :getglobal + iseq.getglobal(opnds[0]) + when :getinstancevariable + iseq.push(GetInstanceVariable.new(opnds[0], opnds[1])) + when :getspecial + iseq.getspecial(opnds[0], opnds[1]) + when :intern + iseq.intern + when :invokeblock + iseq.invokeblock(CallData.from(opnds[0])) + when :invokesuper + block_iseq = opnds[1] ? from(opnds[1], options, iseq) : nil + iseq.invokesuper(CallData.from(opnds[0]), block_iseq) + when :jump + iseq.jump(opnds[0]) + when :leave + iseq.leave + when :newarray + iseq.newarray(opnds[0]) + when :newarraykwsplat + iseq.newarraykwsplat(opnds[0]) + when :newhash + iseq.newhash(opnds[0]) + when :newrange + iseq.newrange(opnds[0]) + when :nop + iseq.nop + when :objtostring + iseq.objtostring(CallData.from(opnds[0])) + when :once + iseq.once(from(opnds[0], options, iseq), opnds[1]) + when :opt_and, :opt_aref, :opt_aset, :opt_div, :opt_empty_p, :opt_eq, + :opt_ge, :opt_gt, :opt_le, :opt_length, :opt_lt, :opt_ltlt, + :opt_minus, :opt_mod, :opt_mult, :opt_nil_p, :opt_not, :opt_or, + :opt_plus, :opt_regexpmatch2, :opt_send_without_block, :opt_size, + :opt_succ + iseq.send(CallData.from(opnds[0]), nil) + when :opt_aref_with + iseq.opt_aref_with(opnds[0], CallData.from(opnds[1])) + when :opt_aset_with + iseq.opt_aset_with(opnds[0], CallData.from(opnds[1])) + when :opt_case_dispatch + iseq.opt_case_dispatch(opnds[0], opnds[1]) + when :opt_getconstant_path + iseq.opt_getconstant_path(opnds[0]) + when :opt_getinlinecache + iseq.opt_getinlinecache(opnds[0], opnds[1]) + when :opt_newarray_max + iseq.opt_newarray_max(opnds[0]) + when :opt_newarray_min + iseq.opt_newarray_min(opnds[0]) + when :opt_neq + iseq.push( + OptNEq.new(CallData.from(opnds[0]), CallData.from(opnds[1])) + ) + when :opt_setinlinecache + iseq.opt_setinlinecache(opnds[0]) + when :opt_str_freeze + iseq.opt_str_freeze(opnds[0]) + when :opt_str_uminus + iseq.opt_str_uminus(opnds[0]) + when :pop + iseq.pop + when :putnil + iseq.putnil + when :putobject + iseq.putobject(opnds[0]) + when :putobject_INT2FIX_0_ + iseq.putobject(0) + when :putobject_INT2FIX_1_ + iseq.putobject(1) + when :putself + iseq.putself + when :putstring + iseq.putstring(opnds[0]) + when :putspecialobject + iseq.putspecialobject(opnds[0]) + when :send + block_iseq = opnds[1] ? from(opnds[1], options, iseq) : nil + iseq.send(CallData.from(opnds[0]), block_iseq) + when :setclassvariable + iseq.push(SetClassVariable.new(opnds[0], opnds[1])) + when :setconstant + iseq.setconstant(opnds[0]) + when :setglobal + iseq.setglobal(opnds[0]) + when :setinstancevariable + iseq.push(SetInstanceVariable.new(opnds[0], opnds[1])) + when :setn + iseq.setn(opnds[0]) + when :setspecial + iseq.setspecial(opnds[0]) + when :splatarray + iseq.splatarray(opnds[0]) + when :swap + iseq.swap + when :throw + iseq.throw(opnds[0]) + when :topn + iseq.topn(opnds[0]) + when :toregexp + iseq.toregexp(opnds[0], opnds[1]) + else + raise "Unknown instruction type: #{type}" + end + end + + iseq + end end end end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 3fcdadb3..9c816072 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -42,6 +42,15 @@ def to_h result[:kw_arg] = kw_arg if kw_arg result end + + def self.from(serialized) + new( + serialized[:mid], + serialized[:orig_argc], + serialized[:flag], + serialized[:kw_arg] + ) + end end # A convenience method for creating a CallData object. diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 9ea7f21b..1f4a5299 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -439,12 +439,16 @@ class CompilerTest < Minitest::Test ] OPTIONS.each do |options| - suffix = options.to_hash.map { |k, v| "#{k}=#{v}" }.join("&") + suffix = options.to_hash.map { |key, value| "#{key}=#{value}" }.join("&") CASES.each do |source| - define_method(:"test_#{source}_(#{suffix})") do + define_method(:"test_compiles_#{source}_(#{suffix})") do assert_compiles(source, options) end + + define_method(:"test_loads_#{source}_(#{suffix})") do + assert_loads(source, options) + end end end @@ -483,12 +487,23 @@ def serialize_iseq(iseq) serialized end + # Check that the compiled instruction sequence matches the expected + # instruction sequence. def assert_compiles(source, options) - program = SyntaxTree.parse(source) - assert_equal( serialize_iseq(RubyVM::InstructionSequence.compile(source, **options)), - serialize_iseq(program.accept(YARV::Compiler.new(options))) + serialize_iseq(YARV.compile(source, options)) + ) + end + + # Check that the compiled instruction sequence matches the instruction + # sequence created directly from the compiled instruction sequence. + def assert_loads(source, options) + compiled = RubyVM::InstructionSequence.compile(source, **options) + + assert_equal( + serialize_iseq(compiled), + serialize_iseq(YARV::InstructionSequence.from(compiled.to_a, options)) ) end From 5dcd6722b6ccec6e95ade74d08d3260fdd292a54 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 23 Nov 2022 12:37:45 -0500 Subject: [PATCH 070/104] Use label objects instead of symbols --- lib/syntax_tree/yarv/bf.rb | 2 +- lib/syntax_tree/yarv/compiler.rb | 86 +++++++++++--------- lib/syntax_tree/yarv/instruction_sequence.rb | 69 +++++++++++++--- lib/syntax_tree/yarv/instructions.rb | 18 ++-- lib/syntax_tree/yarv/legacy.rb | 2 +- 5 files changed, 115 insertions(+), 62 deletions(-) diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb index 0fb27f7e..9b037305 100644 --- a/lib/syntax_tree/yarv/bf.rb +++ b/lib/syntax_tree/yarv/bf.rb @@ -153,7 +153,7 @@ def input_char(iseq) # unless $tape[$cursor] == 0 def loop_start(iseq) - start_label = iseq.label + start_label = iseq.label_at_index iseq.getglobal(:$tape) iseq.getglobal(:$cursor) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index f876cb3b..5f4f6ac0 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -417,7 +417,8 @@ def visit_aryptn(node) # First, check if the #deconstruct cache is nil. If it is, we're going # to call #deconstruct on the object and cache the result. iseq.topn(2) - branchnil = iseq.branchnil(-1) + deconstruct_label = iseq.label + iseq.branchnil(deconstruct_label) # Next, ensure that the cached value was cached correctly, otherwise # fail the match. @@ -432,7 +433,7 @@ def visit_aryptn(node) # Check if the object responds to #deconstruct, fail the match # otherwise. - branchnil.patch!(iseq) + iseq.event(deconstruct_label) iseq.dup iseq.putobject(:deconstruct) iseq.send(YARV.calldata(:respond_to?, 1)) @@ -634,11 +635,12 @@ def visit_binary(node) visit(node.left) iseq.dup - branchif = iseq.branchif(-1) + skip_right_label = iseq.label + iseq.branchif(skip_right_label) iseq.pop visit(node.right) - branchif.patch!(iseq) + iseq.push(skip_right_label) else visit(node.left) visit(node.right) @@ -758,11 +760,12 @@ def visit_call(node) iseq.putself end - branchnil = - if node.operator&.value == "&." - iseq.dup - iseq.branchnil(-1) - end + after_call_label = nil + if node.operator&.value == "&." + iseq.dup + after_call_label = iseq.label + iseq.branchnil(after_call_label) + end flag = 0 @@ -815,7 +818,7 @@ def visit_call(node) YARV.calldata(node.message.value.to_sym, argc, flag), block_iseq ) - branchnil.patch!(iseq) if branchnil + iseq.event(after_call_label) if after_call_label end def visit_case(node) @@ -845,16 +848,19 @@ def visit_case(node) CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE ) ) - [clause, iseq.branchif(:label_00)] + + label = iseq.label + iseq.branchif(label) + [clause, label] end iseq.pop else_clause ? visit(else_clause) : iseq.putnil iseq.leave - branches.each_with_index do |(clause, branchif), index| + branches.each_with_index do |(clause, label), index| iseq.leave if index != 0 - branchif.patch!(iseq) + iseq.push(label) iseq.pop visit(clause) end @@ -1100,26 +1106,28 @@ def visit_heredoc(node) def visit_if(node) if node.predicate.is_a?(RangeNode) + true_label = iseq.label + iseq.getspecial(GetSpecial::SVAR_FLIPFLOP_START, 0) - branchif = iseq.branchif(-1) + iseq.branchif(true_label) visit(node.predicate.left) - branchunless_true = iseq.branchunless(-1) + end_branch = iseq.branchunless(-1) iseq.putobject(true) iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) - branchif.patch!(iseq) + iseq.push(true_label) visit(node.predicate.right) - branchunless_false = iseq.branchunless(-1) + false_branch = iseq.branchunless(-1) iseq.putobject(false) iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) - branchunless_false.patch!(iseq) + false_branch.patch!(iseq) visit(node.statements) iseq.leave - branchunless_true.patch!(iseq) + end_branch.patch!(iseq) iseq.putnil else visit(node.predicate) @@ -1317,22 +1325,22 @@ def visit_opassign(node) [Const, CVar, GVar].include?(node.target.value.class) opassign_defined(node) else - branchif = nil + skip_value_label = iseq.label with_opassign(node) do iseq.dup - branchif = iseq.branchif(-1) + iseq.branchif(skip_value_label) iseq.pop visit(node.value) end if node.target.is_a?(ARefField) iseq.leave - branchif.patch!(iseq) + iseq.push(skip_value_label) iseq.setn(3) iseq.adjuststack(3) else - branchif.patch!(iseq) + iseq.push(skip_value_label) end end else @@ -1363,13 +1371,11 @@ def visit_params(node) iseq.local_table.plain(name) iseq.argument_size += 1 - argument_options[:opt] = [iseq.label] unless argument_options.key?( - :opt - ) + argument_options[:opt] = [iseq.label_at_index] unless argument_options.key?(:opt) visit(value) iseq.setlocal(index, 0) - iseq.argument_options[:opt] << iseq.label + iseq.argument_options[:opt] << iseq.label_at_index end visit(node.rest) if node.rest @@ -1406,12 +1412,14 @@ def visit_params(node) elsif (compiled = RubyVisitor.compile(value)) argument_options[:keyword] << [name, compiled] else + skip_value_label = iseq.label + argument_options[:keyword] << [name] iseq.checkkeyword(keyword_bits_index, keyword_index) - branchif = iseq.branchif(-1) + iseq.branchif(skip_value_label) visit(value) iseq.setlocal(index, 0) - branchif.patch!(iseq) + iseq.push(skip_value_label) end end @@ -1558,13 +1566,15 @@ def visit_rassign(node) jumps_to_match.concat(visit(node.pattern)) end + no_key_label = iseq.label + # First we're going to push the core onto the stack, then we'll check # if the value to match is truthy. If it is, we'll jump down to raise # NoMatchingPatternKeyError. Otherwise we'll raise # NoMatchingPatternError. iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) iseq.topn(4) - branchif_no_key = iseq.branchif(-1) + iseq.branchif(no_key_label) # Here we're going to raise NoMatchingPatternError. iseq.putobject(NoMatchingPatternError) @@ -1577,7 +1587,7 @@ def visit_rassign(node) jump_to_exit = iseq.jump(-1) # Here we're going to raise NoMatchingPatternKeyError. - branchif_no_key.patch!(iseq) + iseq.push(no_key_label) iseq.putobject(NoMatchingPatternKeyError) iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) iseq.putobject("%p: %s") @@ -1797,7 +1807,7 @@ def visit_unless(node) jump = iseq.jump(-1) branchunless.patch!(iseq) visit(node.consequent) - jump.patch!(iseq.label) + jump.patch!(iseq.label_at_index) else branchunless.patch!(iseq) end @@ -1812,7 +1822,7 @@ def visit_until(node) iseq.pop jumps << iseq.jump(-1) - label = iseq.label + label = iseq.label_at_index visit(node.statements) iseq.pop jumps.each { |jump| jump.patch!(iseq) } @@ -1891,6 +1901,7 @@ def visit_when(node) end def visit_while(node) + repeat_label = iseq.label jumps = [] jumps << iseq.jump(-1) @@ -1898,13 +1909,13 @@ def visit_while(node) iseq.pop jumps << iseq.jump(-1) - label = iseq.label + iseq.push(repeat_label) visit(node.statements) iseq.pop jumps.each { |jump| jump.patch!(iseq) } visit(node.predicate) - iseq.branchif(label) + iseq.branchif(repeat_label) iseq.putnil if last_statement? end @@ -2060,7 +2071,8 @@ def opassign_defined(node) end iseq.dup - branchif = iseq.branchif(-1) + skip_value_label = iseq.label + iseq.branchif(skip_value_label) iseq.pop branchunless.patch!(iseq) @@ -2085,7 +2097,7 @@ def opassign_defined(node) end end - branchif.patch!(iseq) + iseq.push(skip_value_label) end # Whenever a value is interpolated into a string-like structure, these diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index c6395f65..e47a18ea 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -41,6 +41,21 @@ def change_by(value) end end + # This represents the destination of instructions that jump. Initially it + # does not track its position so that when we perform optimizations the + # indices don't get messed up. + class Label + attr_reader :name + + def initialize(name = nil) + @name = name + end + + def patch!(name) + @name = name + end + end + # The type of the instruction sequence. attr_reader :type @@ -129,7 +144,7 @@ def inline_storage_for(name) def length insns.inject(0) do |sum, insn| case insn - when Integer, Symbol + when Integer, Label, Symbol sum else sum + insn.length @@ -151,6 +166,20 @@ def eval def to_a versions = RUBY_VERSION.split(".").map(&:to_i) + # First, set it up so that all of the labels get their correct name. + insns.inject(0) do |length, insn| + case insn + when Integer, Symbol + length + when Label + insn.patch!(:"label_#{length}") + length + else + length + insn.length + end + end + + # Next, return the instruction sequence as an array. [ MAGIC, versions[0], @@ -170,7 +199,14 @@ def to_a argument_options, [], insns.map do |insn| - insn.is_a?(Integer) || insn.is_a?(Symbol) ? insn : insn.to_a(self) + case insn + when Integer, Symbol + insn + when Label + insn.name + else + insn.to_a(self) + end end ] end @@ -209,11 +245,15 @@ def singleton_class_child_iseq(location) # Instruction push methods ########################################################################## + def label + Label.new + end + def push(insn) insns << insn case insn - when Integer, Symbol, Array + when Array, Integer, Label, Symbol insn else stack.change_by(-insn.pops + insn.pushes) @@ -221,9 +261,7 @@ def push(insn) end end - # This creates a new label at the current length of the instruction - # sequence. It is used as the operand for jump instructions. - def label + def label_at_index name = :"label_#{length}" insns.last == name ? name : event(name) end @@ -691,27 +729,38 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) # set up the argument options iseq.argument_options.merge!(source[11]) + # set up the labels object so that the labels are shared between the + # location in the instruction sequence and the instructions that + # reference them + labels = Hash.new { |hash, name| hash[name] = Label.new(name) } + # set up all of the instructions source[13].each do |insn| # skip line numbers next if insn.is_a?(Integer) - # put events into the array and then continue + # add events and labels if insn.is_a?(Symbol) - iseq.event(insn) + if insn.start_with?("label_") + iseq.push(labels[insn]) + else + iseq.push(insn) + end next end + # add instructions, mapped to our own instruction classes type, *opnds = insn + case type when :adjuststack iseq.adjuststack(opnds[0]) when :anytostring iseq.anytostring when :branchif - iseq.branchif(opnds[0]) + iseq.branchif(labels[opnds[0]]) when :branchnil - iseq.branchnil(opnds[0]) + iseq.branchnil(labels[opnds[0]]) when :branchunless iseq.branchunless(opnds[0]) when :checkkeyword diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 9c816072..c340cd4e 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -159,12 +159,8 @@ def initialize(label) @label = label end - def patch!(iseq) - @label = iseq.label - end - def to_a(_iseq) - [:branchif, label] + [:branchif, label.name] end def length @@ -204,12 +200,8 @@ def initialize(label) @label = label end - def patch!(iseq) - @label = iseq.label - end - def to_a(_iseq) - [:branchnil, label] + [:branchnil, label.name] end def length @@ -249,7 +241,7 @@ def initialize(label) end def patch!(iseq) - @label = iseq.label + @label = iseq.label_at_index end def to_a(_iseq) @@ -297,7 +289,7 @@ def initialize(keyword_bits_index, keyword_index) end def patch!(iseq) - @label = iseq.label + @label = iseq.label_at_index end def to_a(iseq) @@ -1360,7 +1352,7 @@ def initialize(label) end def patch!(iseq) - @label = iseq.label + @label = iseq.label_at_index end def to_a(_iseq) diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index 45dfe768..20588974 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -68,7 +68,7 @@ def initialize(label, cache) end def patch!(iseq) - @label = iseq.label + @label = iseq.label_at_index end def to_a(_iseq) From 633ab9bea7f542b098c975296e7e6044faefdb51 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 23 Nov 2022 14:26:28 -0500 Subject: [PATCH 071/104] Start using labels for jumps --- lib/syntax_tree/yarv/bf.rb | 12 +- lib/syntax_tree/yarv/compiler.rb | 335 ++++++++++--------- lib/syntax_tree/yarv/disassembler.rb | 21 +- lib/syntax_tree/yarv/instruction_sequence.rb | 15 +- lib/syntax_tree/yarv/instructions.rb | 16 +- lib/syntax_tree/yarv/legacy.rb | 6 +- 6 files changed, 196 insertions(+), 209 deletions(-) diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb index 9b037305..78c01af5 100644 --- a/lib/syntax_tree/yarv/bf.rb +++ b/lib/syntax_tree/yarv/bf.rb @@ -153,23 +153,25 @@ def input_char(iseq) # unless $tape[$cursor] == 0 def loop_start(iseq) - start_label = iseq.label_at_index + start_label = iseq.label + end_label = iseq.label + iseq.push(start_label) iseq.getglobal(:$tape) iseq.getglobal(:$cursor) iseq.send(YARV.calldata(:[], 1)) iseq.putobject(0) iseq.send(YARV.calldata(:==, 1)) + iseq.branchunless(end_label) - branchunless = iseq.branchunless(-1) - [start_label, branchunless] + [start_label, end_label] end # Jump back to the start of the loop. - def loop_end(iseq, start_label, branchunless) + def loop_end(iseq, start_label, end_label) iseq.jump(start_label) - branchunless.patch!(iseq) + iseq.push(end_label) end end end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 5f4f6ac0..3bcfc598 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -402,100 +402,6 @@ def visit_array(node) end def visit_aryptn(node) - match_failures = [] - jumps_to_exit = [] - - # If there's a constant, then check if we match against that constant or - # not first. Branch to failure if we don't. - if node.constant - iseq.dup - visit(node.constant) - iseq.checkmatch(CheckMatch::TYPE_CASE) - match_failures << iseq.branchunless(-1) - end - - # First, check if the #deconstruct cache is nil. If it is, we're going - # to call #deconstruct on the object and cache the result. - iseq.topn(2) - deconstruct_label = iseq.label - iseq.branchnil(deconstruct_label) - - # Next, ensure that the cached value was cached correctly, otherwise - # fail the match. - iseq.topn(2) - match_failures << iseq.branchunless(-1) - - # Since we have a valid cached value, we can skip past the part where we - # call #deconstruct on the object. - iseq.pop - iseq.topn(1) - jump = iseq.jump(-1) - - # Check if the object responds to #deconstruct, fail the match - # otherwise. - iseq.event(deconstruct_label) - iseq.dup - iseq.putobject(:deconstruct) - iseq.send(YARV.calldata(:respond_to?, 1)) - iseq.setn(3) - match_failures << iseq.branchunless(-1) - - # Call #deconstruct and ensure that it's an array, raise an error - # otherwise. - iseq.send(YARV.calldata(:deconstruct)) - iseq.setn(2) - iseq.dup - iseq.checktype(CheckType::TYPE_ARRAY) - match_error = iseq.branchunless(-1) - - # Ensure that the deconstructed array has the correct size, fail the - # match otherwise. - jump.patch!(iseq) - iseq.dup - iseq.send(YARV.calldata(:length)) - iseq.putobject(node.requireds.length) - iseq.send(YARV.calldata(:==, 1)) - match_failures << iseq.branchunless(-1) - - # For each required element, check if the deconstructed array contains - # the element, otherwise jump out to the top-level match failure. - iseq.dup - node.requireds.each_with_index do |required, index| - iseq.putobject(index) - iseq.send(YARV.calldata(:[], 1)) - - case required - when VarField - lookup = visit(required) - iseq.setlocal(lookup.index, lookup.level) - else - visit(required) - iseq.checkmatch(CheckMatch::TYPE_CASE) - match_failures << iseq.branchunless(-1) - end - - if index < node.requireds.length - 1 - iseq.dup - else - iseq.pop - jumps_to_exit << iseq.jump(-1) - end - end - - # Set up the routine here to raise an error to indicate that the type of - # the deconstructed array was incorrect. - match_error.patch!(iseq) - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.putobject(TypeError) - iseq.putobject("deconstruct must return Array") - iseq.send(YARV.calldata(:"core#raise", 2)) - iseq.pop - - # Patch all of the match failures to jump here so that we pop a final - # value before returning to the parent node. - match_failures.each { |match_failure| match_failure.patch!(iseq) } - iseq.pop - jumps_to_exit end def visit_assign(node) @@ -623,14 +529,15 @@ def visit_begin(node) def visit_binary(node) case node.operator when :"&&" + done_label = iseq.label + visit(node.left) iseq.dup + iseq.branchunless(done_label) - branchunless = iseq.branchunless(-1) iseq.pop - visit(node.right) - branchunless.patch!(iseq) + iseq.push(done_label) when :"||" visit(node.left) iseq.dup @@ -1107,48 +1014,52 @@ def visit_heredoc(node) def visit_if(node) if node.predicate.is_a?(RangeNode) true_label = iseq.label + false_label = iseq.label + end_label = iseq.label iseq.getspecial(GetSpecial::SVAR_FLIPFLOP_START, 0) iseq.branchif(true_label) visit(node.predicate.left) - end_branch = iseq.branchunless(-1) + iseq.branchunless(end_label) iseq.putobject(true) iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) iseq.push(true_label) visit(node.predicate.right) - false_branch = iseq.branchunless(-1) + iseq.branchunless(false_label) iseq.putobject(false) iseq.setspecial(GetSpecial::SVAR_FLIPFLOP_START) - false_branch.patch!(iseq) + iseq.push(false_label) visit(node.statements) iseq.leave - end_branch.patch!(iseq) + iseq.push(end_label) iseq.putnil else + consequent_label = iseq.label + visit(node.predicate) - branchunless = iseq.branchunless(-1) + iseq.branchunless(consequent_label) visit(node.statements) if last_statement? iseq.leave - branchunless.patch!(iseq) - + iseq.push(consequent_label) node.consequent ? visit(node.consequent) : iseq.putnil else iseq.pop if node.consequent - jump = iseq.jump(-1) - branchunless.patch!(iseq) + done_label = iseq.label + iseq.jump(done_label) + iseq.push(consequent_label) visit(node.consequent) - jump.patch!(iseq) + iseq.push(done_label) else - branchunless.patch!(iseq) + iseq.push(consequent_label) end end end @@ -1174,9 +1085,6 @@ def visit_imaginary(node) iseq.putobject(node.accept(RubyVisitor.new)) end - def visit_in(node) - end - def visit_int(node) iseq.putobject(node.accept(RubyVisitor.new)) end @@ -1293,11 +1201,11 @@ def visit_opassign(node) case (operator = node.operator.value.chomp("=").to_sym) when :"&&" - branchunless = nil + done_label = iseq.label with_opassign(node) do iseq.dup - branchunless = iseq.branchunless(-1) + iseq.branchunless(done_label) iseq.pop visit(node.value) end @@ -1305,15 +1213,15 @@ def visit_opassign(node) case node.target when ARefField iseq.leave - branchunless.patch!(iseq) + iseq.push(done_label) iseq.setn(3) iseq.adjuststack(3) when ConstPathField, TopConstField - branchunless.patch!(iseq) + iseq.push(done_label) iseq.swap iseq.pop else - branchunless.patch!(iseq) + iseq.push(done_label) end when :"||" if node.target.is_a?(ConstPathField) || @@ -1524,30 +1432,25 @@ def visit_rassign(node) iseq.putnil if node.operator.is_a?(Kw) - jumps = [] + match_label = iseq.label visit(node.value) iseq.dup - case node.pattern - when VarField - lookup = visit(node.pattern) - iseq.setlocal(lookup.index, lookup.level) - jumps << iseq.jump(-1) - else - jumps.concat(visit(node.pattern)) - end + visit_pattern(node.pattern, match_label) iseq.pop iseq.pop iseq.putobject(false) iseq.leave - jumps.each { |jump| jump.patch!(iseq) } + iseq.push(match_label) iseq.adjuststack(2) iseq.putobject(true) else - jumps_to_match = [] + no_key_label = iseq.label + end_leave_label = iseq.label + end_label = iseq.label iseq.putnil iseq.putobject(false) @@ -1556,17 +1459,7 @@ def visit_rassign(node) visit(node.value) iseq.dup - # Visit the pattern. If it matches, - case node.pattern - when VarField - lookup = visit(node.pattern) - iseq.setlocal(lookup.index, lookup.level) - jumps_to_match << iseq.jump(-1) - else - jumps_to_match.concat(visit(node.pattern)) - end - - no_key_label = iseq.label + visit_pattern(node.pattern, end_label) # First we're going to push the core onto the stack, then we'll check # if the value to match is truthy. If it is, we'll jump down to raise @@ -1584,7 +1477,7 @@ def visit_rassign(node) iseq.topn(7) iseq.send(YARV.calldata(:"core#sprintf", 3)) iseq.send(YARV.calldata(:"core#raise", 2)) - jump_to_exit = iseq.jump(-1) + iseq.jump(end_leave_label) # Here we're going to raise NoMatchingPatternKeyError. iseq.push(no_key_label) @@ -1601,14 +1494,12 @@ def visit_rassign(node) ) iseq.send(YARV.calldata(:"core#raise", 1)) - # This runs when the pattern fails to match. - jump_to_exit.patch!(iseq) + iseq.push(end_leave_label) iseq.adjuststack(7) iseq.putnil iseq.leave - # This runs when the pattern matches successfully. - jumps_to_match.each { |jump| jump.patch!(iseq) } + iseq.push(end_label) iseq.adjuststack(6) iseq.putnil end @@ -1791,44 +1682,47 @@ def visit_undef(node) end def visit_unless(node) + statements_label = iseq.label + visit(node.predicate) - branchunless = iseq.branchunless(-1) + iseq.branchunless(statements_label) node.consequent ? visit(node.consequent) : iseq.putnil if last_statement? iseq.leave - branchunless.patch!(iseq) - + iseq.push(statements_label) visit(node.statements) else iseq.pop if node.consequent - jump = iseq.jump(-1) - branchunless.patch!(iseq) + done_label = iseq.label + iseq.jump(done_label) + iseq.push(statements_label) visit(node.consequent) - jump.patch!(iseq.label_at_index) + iseq.push(done_label) else - branchunless.patch!(iseq) + iseq.push(statements_label) end end end def visit_until(node) - jumps = [] + predicate_label = iseq.label + statements_label = iseq.label - jumps << iseq.jump(-1) + iseq.jump(predicate_label) iseq.putnil iseq.pop - jumps << iseq.jump(-1) + iseq.jump(predicate_label) - label = iseq.label_at_index + iseq.push(statements_label) visit(node.statements) iseq.pop - jumps.each { |jump| jump.patch!(iseq) } + iseq.push(predicate_label) visit(node.predicate) - iseq.branchunless(label) + iseq.branchunless(statements_label) iseq.putnil if last_statement? end @@ -1901,21 +1795,21 @@ def visit_when(node) end def visit_while(node) - repeat_label = iseq.label - jumps = [] + predicate_label = iseq.label + statements_label = iseq.label - jumps << iseq.jump(-1) + iseq.jump(predicate_label) iseq.putnil iseq.pop - jumps << iseq.jump(-1) + iseq.jump(predicate_label) - iseq.push(repeat_label) + iseq.push(statements_label) visit(node.statements) iseq.pop - jumps.each { |jump| jump.patch!(iseq) } + iseq.push(predicate_label) visit(node.predicate) - iseq.branchif(repeat_label) + iseq.branchif(statements_label) iseq.putnil if last_statement? end @@ -2025,6 +1919,9 @@ def constant_names(node) # first check if the value is defined using the defined instruction. I # don't know why it is necessary, and suspect that it isn't. def opassign_defined(node) + value_label = iseq.label + skip_value_label = iseq.label + case node.target when ConstPathField visit(node.target.parent) @@ -2052,7 +1949,7 @@ def opassign_defined(node) end end - branchunless = iseq.branchunless(-1) + iseq.branchunless(value_label) case node.target when ConstPathField, TopConstField @@ -2071,11 +1968,10 @@ def opassign_defined(node) end iseq.dup - skip_value_label = iseq.label iseq.branchif(skip_value_label) - iseq.pop - branchunless.patch!(iseq) + iseq.pop + iseq.push(value_label) visit(node.value) case node.target @@ -2114,6 +2010,111 @@ def push_interpolate iseq.anytostring end + # Visit a type of pattern in a pattern match. + def visit_pattern(node, end_label) + case node + when AryPtn + length_label = iseq.label + match_failure_label = iseq.label + match_error_label = iseq.label + + # If there's a constant, then check if we match against that constant or + # not first. Branch to failure if we don't. + if node.constant + iseq.dup + visit(node.constant) + iseq.checkmatch(CheckMatch::TYPE_CASE) + iseq.branchunless(match_failure_label) + end + + # First, check if the #deconstruct cache is nil. If it is, we're going + # to call #deconstruct on the object and cache the result. + iseq.topn(2) + deconstruct_label = iseq.label + iseq.branchnil(deconstruct_label) + + # Next, ensure that the cached value was cached correctly, otherwise + # fail the match. + iseq.topn(2) + iseq.branchunless(match_failure_label) + + # Since we have a valid cached value, we can skip past the part where we + # call #deconstruct on the object. + iseq.pop + iseq.topn(1) + iseq.jump(length_label) + + # Check if the object responds to #deconstruct, fail the match + # otherwise. + iseq.event(deconstruct_label) + iseq.dup + iseq.putobject(:deconstruct) + iseq.send(YARV.calldata(:respond_to?, 1)) + iseq.setn(3) + iseq.branchunless(match_failure_label) + + # Call #deconstruct and ensure that it's an array, raise an error + # otherwise. + iseq.send(YARV.calldata(:deconstruct)) + iseq.setn(2) + iseq.dup + iseq.checktype(CheckType::TYPE_ARRAY) + iseq.branchunless(match_error_label) + + # Ensure that the deconstructed array has the correct size, fail the + # match otherwise. + iseq.push(length_label) + iseq.dup + iseq.send(YARV.calldata(:length)) + iseq.putobject(node.requireds.length) + iseq.send(YARV.calldata(:==, 1)) + iseq.branchunless(match_failure_label) + + # For each required element, check if the deconstructed array contains + # the element, otherwise jump out to the top-level match failure. + iseq.dup + node.requireds.each_with_index do |required, index| + iseq.putobject(index) + iseq.send(YARV.calldata(:[], 1)) + + case required + when VarField + lookup = visit(required) + iseq.setlocal(lookup.index, lookup.level) + else + visit(required) + iseq.checkmatch(CheckMatch::TYPE_CASE) + iseq.branchunless(match_failure_label) + end + + if index < node.requireds.length - 1 + iseq.dup + else + iseq.pop + iseq.jump(end_label) + end + end + + # Set up the routine here to raise an error to indicate that the type of + # the deconstructed array was incorrect. + iseq.push(match_error_label) + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.putobject(TypeError) + iseq.putobject("deconstruct must return Array") + iseq.send(YARV.calldata(:"core#raise", 2)) + iseq.pop + + # Patch all of the match failures to jump here so that we pop a final + # value before returning to the parent node. + iseq.push(match_failure_label) + iseq.pop + when VarField + lookup = visit(node) + iseq.setlocal(lookup.index, lookup.level) + iseq.jump(end_label) + end + end + # There are a lot of nodes in the AST that act as contains of parts of # strings. This includes things like string literals, regular expressions, # heredocs, etc. This method will visit all the parts of a string within diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index d606e3cc..757b8b40 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -54,21 +54,20 @@ def disassemble(iseq) clauses = {} clause = [] + iseq.to_a iseq.insns.each do |insn| case insn - when Symbol - if insn.start_with?("label_") - unless clause.last.is_a?(Next) - clause << Assign(disasm_label.field, node_for(insn)) - end - - clauses[label] = clause - clause = [] - label = insn + when InstructionSequence::Label + unless clause.last.is_a?(Next) + clause << Assign(disasm_label.field, node_for(insn.name)) end + + clauses[label] = clause + clause = [] + label = insn.name when BranchUnless body = [ - Assign(disasm_label.field, node_for(insn.label)), + Assign(disasm_label.field, node_for(insn.label.name)), Next(Args([])) ] @@ -88,7 +87,7 @@ def disassemble(iseq) local = iseq.local_table.locals[insn.index] clause << VarRef(Ident(local.name.to_s)) when Jump - clause << Assign(disasm_label.field, node_for(insn.label)) + clause << Assign(disasm_label.field, node_for(insn.label.name)) clause << Next(Args([])) when Leave value = Args([clause.pop]) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index e47a18ea..097fda38 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -464,11 +464,12 @@ def opt_case_dispatch(case_dispatch_hash, else_label) def opt_getconstant_path(names) if RUBY_VERSION < "3.2" || !options.inline_const_cache? cache = nil - getinlinecache = nil + cache_filled_label = nil if options.inline_const_cache? cache = inline_storage - getinlinecache = opt_getinlinecache(-1, cache) + cache_filled_label = label + opt_getinlinecache(cache_filled_label, cache) if names[0] == :"" names.shift @@ -489,7 +490,7 @@ def opt_getconstant_path(names) if options.inline_const_cache? opt_setinlinecache(cache) - getinlinecache.patch!(self) + push(cache_filled_label) end else push(OptGetConstantPath.new(names)) @@ -762,7 +763,7 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) when :branchnil iseq.branchnil(labels[opnds[0]]) when :branchunless - iseq.branchunless(opnds[0]) + iseq.branchunless(labels[opnds[0]]) when :checkkeyword iseq.checkkeyword(iseq.local_table.size - opnds[0] + 2, opnds[1]) when :checkmatch @@ -838,7 +839,7 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) block_iseq = opnds[1] ? from(opnds[1], options, iseq) : nil iseq.invokesuper(CallData.from(opnds[0]), block_iseq) when :jump - iseq.jump(opnds[0]) + iseq.jump(labels[opnds[0]]) when :leave iseq.leave when :newarray @@ -866,11 +867,11 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) when :opt_aset_with iseq.opt_aset_with(opnds[0], CallData.from(opnds[1])) when :opt_case_dispatch - iseq.opt_case_dispatch(opnds[0], opnds[1]) + iseq.opt_case_dispatch(opnds[0], labels[opnds[1]]) when :opt_getconstant_path iseq.opt_getconstant_path(opnds[0]) when :opt_getinlinecache - iseq.opt_getinlinecache(opnds[0], opnds[1]) + iseq.opt_getinlinecache(labels[opnds[0]], opnds[1]) when :opt_newarray_max iseq.opt_newarray_max(opnds[0]) when :opt_newarray_min diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index c340cd4e..8ec1f068 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -240,12 +240,8 @@ def initialize(label) @label = label end - def patch!(iseq) - @label = iseq.label_at_index - end - def to_a(_iseq) - [:branchunless, label] + [:branchunless, label.name] end def length @@ -288,10 +284,6 @@ def initialize(keyword_bits_index, keyword_index) @keyword_index = keyword_index end - def patch!(iseq) - @label = iseq.label_at_index - end - def to_a(iseq) [ :checkkeyword, @@ -1351,12 +1343,8 @@ def initialize(label) @label = label end - def patch!(iseq) - @label = iseq.label_at_index - end - def to_a(_iseq) - [:jump, label] + [:jump, label.name] end def length diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index 20588974..82f7560d 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -67,12 +67,8 @@ def initialize(label, cache) @cache = cache end - def patch!(iseq) - @label = iseq.label_at_index - end - def to_a(_iseq) - [:opt_getinlinecache, label, cache] + [:opt_getinlinecache, label.name, cache] end def length From f87fc563b0127bbe661bb43b424ca379e3a20aa4 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 26 Nov 2022 13:20:21 -0500 Subject: [PATCH 072/104] Create a linked list for nodes --- lib/syntax_tree/yarv/compiler.rb | 28 +++---- lib/syntax_tree/yarv/instruction_sequence.rb | 78 ++++++++++++++++---- 2 files changed, 76 insertions(+), 30 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 3bcfc598..f6d40f30 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -1260,15 +1260,13 @@ def visit_opassign(node) end def visit_params(node) - argument_options = iseq.argument_options - if node.requireds.any? - argument_options[:lead_num] = 0 + iseq.argument_options[:lead_num] = 0 node.requireds.each do |required| iseq.local_table.plain(required.value.to_sym) iseq.argument_size += 1 - argument_options[:lead_num] += 1 + iseq.argument_options[:lead_num] += 1 end end @@ -1279,7 +1277,9 @@ def visit_params(node) iseq.local_table.plain(name) iseq.argument_size += 1 - argument_options[:opt] = [iseq.label_at_index] unless argument_options.key?(:opt) + unless iseq.argument_options.key?(:opt) + iseq.argument_options[:opt] = [iseq.label_at_index] + end visit(value) iseq.setlocal(index, 0) @@ -1289,19 +1289,19 @@ def visit_params(node) visit(node.rest) if node.rest if node.posts.any? - argument_options[:post_start] = iseq.argument_size - argument_options[:post_num] = 0 + iseq.argument_options[:post_start] = iseq.argument_size + iseq.argument_options[:post_num] = 0 node.posts.each do |post| iseq.local_table.plain(post.value.to_sym) iseq.argument_size += 1 - argument_options[:post_num] += 1 + iseq.argument_options[:post_num] += 1 end end if node.keywords.any? - argument_options[:kwbits] = 0 - argument_options[:keyword] = [] + iseq.argument_options[:kwbits] = 0 + iseq.argument_options[:keyword] = [] keyword_bits_name = node.keyword_rest ? 3 : 2 iseq.argument_size += 1 @@ -1313,16 +1313,16 @@ def visit_params(node) iseq.local_table.plain(name) iseq.argument_size += 1 - argument_options[:kwbits] += 1 + iseq.argument_options[:kwbits] += 1 if value.nil? - argument_options[:keyword] << name + iseq.argument_options[:keyword] << name elsif (compiled = RubyVisitor.compile(value)) - argument_options[:keyword] << [name, compiled] + iseq.argument_options[:keyword] << [name, compiled] else skip_value_label = iseq.label - argument_options[:keyword] << [name] + iseq.argument_options[:keyword] << [name] iseq.checkkeyword(keyword_bits_index, keyword_index) iseq.branchif(skip_value_label) visit(value) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 097fda38..42910266 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -7,6 +7,50 @@ module YARV # list of instructions along with the metadata pertaining to them. It also # functions as a builder for the instruction sequence. class InstructionSequence + # When the list of instructions is first being created, it's stored as a + # linked list. This is to make it easier to perform peephole optimizations + # and other transformations like instruction specialization. + class InstructionList + class Node + attr_reader :instruction + attr_accessor :next_node + + def initialize(instruction, next_node = nil) + @instruction = instruction + @next_node = next_node + end + end + + attr_reader :head_node, :tail_node + + def initialize + @head_node = nil + @tail_node = nil + end + + def each + return to_enum(__method__) unless block_given? + node = head_node + + while node + yield node.instruction + node = node.next_node + end + end + + def push(instruction) + node = Node.new(instruction) + + if head_node.nil? + @head_node = node + @tail_node = node + else + @tail_node.next_node = node + @tail_node = node + end + end + end + MAGIC = "YARVInstructionSequence/SimpleDataFormat" # This provides a handle to the rb_iseq_load function, which allows you to @@ -110,7 +154,7 @@ def initialize( @local_table = LocalTable.new @inline_storages = {} - @insns = [] + @insns = InstructionList.new @storage_index = 0 @stack = Stack.new @@ -142,7 +186,7 @@ def inline_storage_for(name) end def length - insns.inject(0) do |sum, insn| + insns.each.inject(0) do |sum, insn| case insn when Integer, Label, Symbol sum @@ -167,7 +211,7 @@ def to_a versions = RUBY_VERSION.split(".").map(&:to_i) # First, set it up so that all of the labels get their correct name. - insns.inject(0) do |length, insn| + insns.each.inject(0) do |length, insn| case insn when Integer, Symbol length @@ -179,6 +223,18 @@ def to_a end end + # Next, dump all of the instructions into a flat list. + dumped = insns.each.map do |insn| + case insn + when Integer, Symbol + insn + when Label + insn.name + else + insn.to_a(self) + end + end + # Next, return the instruction sequence as an array. [ MAGIC, @@ -198,16 +254,7 @@ def to_a local_table.names, argument_options, [], - insns.map do |insn| - case insn - when Integer, Symbol - insn - when Label - insn.name - else - insn.to_a(self) - end - end + dumped ] end @@ -250,7 +297,7 @@ def label end def push(insn) - insns << insn + insns.push(insn) case insn when Array, Integer, Label, Symbol @@ -262,8 +309,7 @@ def push(insn) end def label_at_index - name = :"label_#{length}" - insns.last == name ? name : event(name) + push(:"label_#{length}") end def event(name) From 2115177c7f74faafdf6760e9d926417c7c648bde Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 26 Nov 2022 13:27:18 -0500 Subject: [PATCH 073/104] Fix opt table to use labels --- lib/syntax_tree/yarv/compiler.rb | 9 ++++++-- lib/syntax_tree/yarv/instruction_sequence.rb | 22 +++++++++++--------- 2 files changed, 19 insertions(+), 12 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index f6d40f30..c0d89239 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -1278,12 +1278,17 @@ def visit_params(node) iseq.argument_size += 1 unless iseq.argument_options.key?(:opt) - iseq.argument_options[:opt] = [iseq.label_at_index] + start_label = iseq.label + iseq.push(start_label) + iseq.argument_options[:opt] = [start_label] end visit(value) iseq.setlocal(index, 0) - iseq.argument_options[:opt] << iseq.label_at_index + + arg_given_label = iseq.label + iseq.push(arg_given_label) + iseq.argument_options[:opt] << arg_given_label end visit(node.rest) if node.rest diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 42910266..63904923 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -235,6 +235,9 @@ def to_a end end + dumped_options = argument_options.dup + dumped_options[:opt].map!(&:name) if dumped_options[:opt] + # Next, return the instruction sequence as an array. [ MAGIC, @@ -252,7 +255,7 @@ def to_a location.start_line, type, local_table.names, - argument_options, + dumped_options, [], dumped ] @@ -308,10 +311,6 @@ def push(insn) end end - def label_at_index - push(:"label_#{length}") - end - def event(name) push(name) end @@ -767,6 +766,11 @@ def toregexp(options, length) def self.from(source, options = Compiler::Options.new, parent_iseq = nil) iseq = new(source[9], source[5], parent_iseq, Location.default, options) + # set up the labels object so that the labels are shared between the + # location in the instruction sequence and the instructions that + # reference them + labels = Hash.new { |hash, name| hash[name] = Label.new(name) } + # set up the correct argument size iseq.argument_size = source[4][:arg_size] @@ -775,11 +779,9 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) # set up the argument options iseq.argument_options.merge!(source[11]) - - # set up the labels object so that the labels are shared between the - # location in the instruction sequence and the instructions that - # reference them - labels = Hash.new { |hash, name| hash[name] = Label.new(name) } + if iseq.argument_options[:opt] + iseq.argument_options[:opt].map! { |opt| labels[opt] } + end # set up all of the instructions source[13].each do |insn| From 69d2dfa143361357c2684da17d3c2df3b5ed85c2 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 26 Nov 2022 13:52:27 -0500 Subject: [PATCH 074/104] Specialize in a separate pass --- lib/syntax_tree/yarv/compiler.rb | 11 -- lib/syntax_tree/yarv/instruction_sequence.rb | 185 ++++++++++--------- 2 files changed, 102 insertions(+), 94 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index c0d89239..362ce32f 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -632,17 +632,6 @@ def visit_call(node) return end end - when StringLiteral - if RubyVisitor.compile(node.receiver).nil? - case node.message.value - when "-@" - iseq.opt_str_uminus(node.receiver.parts.first.value) - return - when "freeze" - iseq.opt_str_freeze(node.receiver.parts.first.value) - return - end - end end end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 63904923..dc2f7da8 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -12,8 +12,7 @@ class InstructionSequence # and other transformations like instruction specialization. class InstructionList class Node - attr_reader :instruction - attr_accessor :next_node + attr_accessor :instruction, :next_node def initialize(instruction, next_node = nil) @instruction = instruction @@ -29,11 +28,16 @@ def initialize end def each + return to_enum(__method__) unless block_given? + each_node { |node| yield node.instruction } + end + + def each_node return to_enum(__method__) unless block_given? node = head_node while node - yield node.instruction + yield node node = node.next_node end end @@ -210,7 +214,10 @@ def eval def to_a versions = RUBY_VERSION.split(".").map(&:to_i) - # First, set it up so that all of the labels get their correct name. + # First, specialize any instructions that need to be specialized. + specialize_instructions! if options.specialized_instruction? + + # Next, set it up so that all of the labels get their correct name. insns.each.inject(0) do |length, insn| case insn when Integer, Symbol @@ -261,6 +268,92 @@ def to_a ] end + def specialize_instructions! + insns.each_node do |node| + case node.instruction + when PutObject, PutString + next unless node.next_node + next if node.instruction.is_a?(PutObject) && !node.instruction.object.is_a?(String) + + next_node = node.next_node + next unless next_node.instruction.is_a?(Send) + next if next_node.instruction.block_iseq + + calldata = next_node.instruction.calldata + next unless calldata.flags == CallData::CALL_ARGS_SIMPLE + + case calldata.method + when :freeze + node.instruction = OptStrFreeze.new(node.instruction.object, calldata) + node.next_node = next_node.next_node + when :-@ + node.instruction = OptStrUMinus.new(node.instruction.object, calldata) + node.next_node = next_node.next_node + end + when Send + calldata = node.instruction.calldata + + if !node.instruction.block_iseq && !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + # Specialize the send instruction. If it doesn't have a block + # attached, then we will replace it with an opt_send_without_block + # and do further specializations based on the called method and + # the number of arguments. + node.instruction = + case [calldata.method, calldata.argc] + when [:length, 0] + OptLength.new(calldata) + when [:size, 0] + OptSize.new(calldata) + when [:empty?, 0] + OptEmptyP.new(calldata) + when [:nil?, 0] + OptNilP.new(calldata) + when [:succ, 0] + OptSucc.new(calldata) + when [:!, 0] + OptNot.new(calldata) + when [:+, 1] + OptPlus.new(calldata) + when [:-, 1] + OptMinus.new(calldata) + when [:*, 1] + OptMult.new(calldata) + when [:/, 1] + OptDiv.new(calldata) + when [:%, 1] + OptMod.new(calldata) + when [:==, 1] + OptEq.new(calldata) + when [:!=, 1] + OptNEq.new(YARV.calldata(:==, 1), calldata) + when [:=~, 1] + OptRegExpMatch2.new(calldata) + when [:<, 1] + OptLT.new(calldata) + when [:<=, 1] + OptLE.new(calldata) + when [:>, 1] + OptGT.new(calldata) + when [:>=, 1] + OptGE.new(calldata) + when [:<<, 1] + OptLTLT.new(calldata) + when [:[], 1] + OptAref.new(calldata) + when [:&, 1] + OptAnd.new(calldata) + when [:|, 1] + OptOr.new(calldata) + when [:[]=, 2] + OptAset.new(calldata) + else + OptSendWithoutBlock.new(calldata) + end + end + end + end + end + ########################################################################## # Child instruction sequence methods ########################################################################## @@ -568,24 +661,6 @@ def opt_setinlinecache(cache) push(Legacy::OptSetInlineCache.new(cache)) end - def opt_str_freeze(object) - if options.specialized_instruction? - push(OptStrFreeze.new(object, YARV.calldata(:freeze))) - else - putstring(object) - send(YARV.calldata(:freeze)) - end - end - - def opt_str_uminus(object) - if options.specialized_instruction? - push(OptStrUMinus.new(object, YARV.calldata(:-@))) - else - putstring(object) - send(YARV.calldata(:-@)) - end - end - def pop push(Pop.new) end @@ -625,65 +700,7 @@ def putstring(object) end def send(calldata, block_iseq = nil) - if options.specialized_instruction? && !block_iseq && - !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) - # Specialize the send instruction. If it doesn't have a block - # attached, then we will replace it with an opt_send_without_block - # and do further specializations based on the called method and the - # number of arguments. - case [calldata.method, calldata.argc] - when [:length, 0] - push(OptLength.new(calldata)) - when [:size, 0] - push(OptSize.new(calldata)) - when [:empty?, 0] - push(OptEmptyP.new(calldata)) - when [:nil?, 0] - push(OptNilP.new(calldata)) - when [:succ, 0] - push(OptSucc.new(calldata)) - when [:!, 0] - push(OptNot.new(calldata)) - when [:+, 1] - push(OptPlus.new(calldata)) - when [:-, 1] - push(OptMinus.new(calldata)) - when [:*, 1] - push(OptMult.new(calldata)) - when [:/, 1] - push(OptDiv.new(calldata)) - when [:%, 1] - push(OptMod.new(calldata)) - when [:==, 1] - push(OptEq.new(calldata)) - when [:!=, 1] - push(OptNEq.new(YARV.calldata(:==, 1), calldata)) - when [:=~, 1] - push(OptRegExpMatch2.new(calldata)) - when [:<, 1] - push(OptLT.new(calldata)) - when [:<=, 1] - push(OptLE.new(calldata)) - when [:>, 1] - push(OptGT.new(calldata)) - when [:>=, 1] - push(OptGE.new(calldata)) - when [:<<, 1] - push(OptLTLT.new(calldata)) - when [:[], 1] - push(OptAref.new(calldata)) - when [:&, 1] - push(OptAnd.new(calldata)) - when [:|, 1] - push(OptOr.new(calldata)) - when [:[]=, 2] - push(OptAset.new(calldata)) - else - push(OptSendWithoutBlock.new(calldata)) - end - else - push(Send.new(calldata, block_iseq)) - end + push(Send.new(calldata, block_iseq)) end def setblockparam(index, level) @@ -931,9 +948,11 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) when :opt_setinlinecache iseq.opt_setinlinecache(opnds[0]) when :opt_str_freeze - iseq.opt_str_freeze(opnds[0]) + iseq.putstring(opnds[0]) + iseq.send(YARV.calldata(:freeze)) when :opt_str_uminus - iseq.opt_str_uminus(opnds[0]) + iseq.putstring(opnds[0]) + iseq.send(YARV.calldata(:-@)) when :pop iseq.pop when :putnil From 80de9c9d4e1ddfc73fab479df69d77ce7367de69 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 26 Nov 2022 14:03:46 -0500 Subject: [PATCH 075/104] Specialize using the linked list --- lib/syntax_tree/yarv/compiler.rb | 29 +---- lib/syntax_tree/yarv/instruction_sequence.rb | 115 +++++++++++-------- 2 files changed, 67 insertions(+), 77 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 362ce32f..9016c136 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -608,33 +608,6 @@ def visit_call(node) ) end - arg_parts = argument_parts(node.arguments) - argc = arg_parts.length - - # First we're going to check if we're calling a method on an array - # literal without any arguments. In that case there are some - # specializations we might be able to perform. - if argc == 0 && (node.message.is_a?(Ident) || node.message.is_a?(Op)) - case node.receiver - when ArrayLiteral - parts = node.receiver.contents&.parts || [] - - if parts.none? { |part| part.is_a?(ArgStar) } && - RubyVisitor.compile(node.receiver).nil? - case node.message.value - when "max" - visit(node.receiver.contents) - iseq.opt_newarray_max(parts.length) - return - when "min" - visit(node.receiver.contents) - iseq.opt_newarray_min(parts.length) - return - end - end - end - end - # Track whether or not this is a method call on a block proxy receiver. # If it is, we can potentially do tailcall optimizations on it. block_receiver = false @@ -663,6 +636,8 @@ def visit_call(node) iseq.branchnil(after_call_label) end + arg_parts = argument_parts(node.arguments) + argc = arg_parts.length flag = 0 arg_parts.each do |arg_part| diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index dc2f7da8..ff324d92 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -190,14 +190,16 @@ def inline_storage_for(name) end def length - insns.each.inject(0) do |sum, insn| - case insn - when Integer, Label, Symbol - sum - else - sum + insn.length + insns + .each + .inject(0) do |sum, insn| + case insn + when Integer, Label, Symbol + sum + else + sum + insn.length + end end - end end def eval @@ -218,29 +220,32 @@ def to_a specialize_instructions! if options.specialized_instruction? # Next, set it up so that all of the labels get their correct name. - insns.each.inject(0) do |length, insn| - case insn - when Integer, Symbol - length - when Label - insn.patch!(:"label_#{length}") - length - else - length + insn.length + insns + .each + .inject(0) do |length, insn| + case insn + when Integer, Symbol + length + when Label + insn.patch!(:"label_#{length}") + length + else + length + insn.length + end end - end # Next, dump all of the instructions into a flat list. - dumped = insns.each.map do |insn| - case insn - when Integer, Symbol - insn - when Label - insn.name - else - insn.to_a(self) + dumped = + insns.each.map do |insn| + case insn + when Integer, Symbol + insn + when Label + insn.name + else + insn.to_a(self) + end end - end dumped_options = argument_options.dup dumped_options[:opt].map!(&:name) if dumped_options[:opt] @@ -271,9 +276,31 @@ def to_a def specialize_instructions! insns.each_node do |node| case node.instruction + when NewArray + next unless node.next_node + + next_node = node.next_node + next unless next_node.instruction.is_a?(Send) + next if next_node.instruction.block_iseq + + calldata = next_node.instruction.calldata + next unless calldata.flags == CallData::CALL_ARGS_SIMPLE + next unless calldata.argc == 0 + + case calldata.method + when :max + node.instruction = OptNewArrayMax.new(node.instruction.number) + node.next_node = next_node.next_node + when :min + node.instruction = OptNewArrayMin.new(node.instruction.number) + node.next_node = next_node.next_node + end when PutObject, PutString next unless node.next_node - next if node.instruction.is_a?(PutObject) && !node.instruction.object.is_a?(String) + if node.instruction.is_a?(PutObject) && + !node.instruction.object.is_a?(String) + next + end next_node = node.next_node next unless next_node.instruction.is_a?(Send) @@ -281,19 +308,23 @@ def specialize_instructions! calldata = next_node.instruction.calldata next unless calldata.flags == CallData::CALL_ARGS_SIMPLE + next unless calldata.argc == 0 case calldata.method when :freeze - node.instruction = OptStrFreeze.new(node.instruction.object, calldata) + node.instruction = + OptStrFreeze.new(node.instruction.object, calldata) node.next_node = next_node.next_node when :-@ - node.instruction = OptStrUMinus.new(node.instruction.object, calldata) + node.instruction = + OptStrUMinus.new(node.instruction.object, calldata) node.next_node = next_node.next_node end when Send calldata = node.instruction.calldata - if !node.instruction.block_iseq && !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + if !node.instruction.block_iseq && + !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) # Specialize the send instruction. If it doesn't have a block # attached, then we will replace it with an opt_send_without_block # and do further specializations based on the called method and @@ -639,24 +670,6 @@ def opt_getinlinecache(label, cache) push(Legacy::OptGetInlineCache.new(label, cache)) end - def opt_newarray_max(length) - if options.specialized_instruction? - push(OptNewArrayMax.new(length)) - else - newarray(length) - send(YARV.calldata(:max)) - end - end - - def opt_newarray_min(length) - if options.specialized_instruction? - push(OptNewArrayMin.new(length)) - else - newarray(length) - send(YARV.calldata(:min)) - end - end - def opt_setinlinecache(cache) push(Legacy::OptSetInlineCache.new(cache)) end @@ -938,9 +951,11 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) when :opt_getinlinecache iseq.opt_getinlinecache(labels[opnds[0]], opnds[1]) when :opt_newarray_max - iseq.opt_newarray_max(opnds[0]) + iseq.newarray(opnds[0]) + iseq.send(YARV.calldata(:max)) when :opt_newarray_min - iseq.opt_newarray_min(opnds[0]) + iseq.newarray(opnds[0]) + iseq.send(YARV.calldata(:min)) when :opt_neq iseq.push( OptNEq.new(CallData.from(opnds[0]), CallData.from(opnds[1])) From f3ed30d2157dd6351d0cf2fce1d91148f1432318 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 26 Nov 2022 14:21:01 -0500 Subject: [PATCH 076/104] Have the instruction list point to values not necessarily instructions --- lib/syntax_tree/yarv/instruction_sequence.rb | 48 +++++++++----------- 1 file changed, 21 insertions(+), 27 deletions(-) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index ff324d92..a994c6d2 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -12,10 +12,10 @@ class InstructionSequence # and other transformations like instruction specialization. class InstructionList class Node - attr_accessor :instruction, :next_node + attr_accessor :value, :next_node - def initialize(instruction, next_node = nil) - @instruction = instruction + def initialize(value, next_node = nil) + @value = value @next_node = next_node end end @@ -29,7 +29,7 @@ def initialize def each return to_enum(__method__) unless block_given? - each_node { |node| yield node.instruction } + each_node { |node| yield node.value } end def each_node @@ -37,7 +37,7 @@ def each_node node = head_node while node - yield node + yield node, node.value node = node.next_node end end @@ -274,62 +274,56 @@ def to_a end def specialize_instructions! - insns.each_node do |node| - case node.instruction + insns.each_node do |node, value| + case value when NewArray next unless node.next_node next_node = node.next_node - next unless next_node.instruction.is_a?(Send) - next if next_node.instruction.block_iseq + next unless next_node.value.is_a?(Send) + next if next_node.value.block_iseq - calldata = next_node.instruction.calldata + calldata = next_node.value.calldata next unless calldata.flags == CallData::CALL_ARGS_SIMPLE next unless calldata.argc == 0 case calldata.method when :max - node.instruction = OptNewArrayMax.new(node.instruction.number) + node.value = OptNewArrayMax.new(value.number) node.next_node = next_node.next_node when :min - node.instruction = OptNewArrayMin.new(node.instruction.number) + node.value = OptNewArrayMin.new(value.number) node.next_node = next_node.next_node end when PutObject, PutString next unless node.next_node - if node.instruction.is_a?(PutObject) && - !node.instruction.object.is_a?(String) - next - end + next if value.is_a?(PutObject) && !value.object.is_a?(String) next_node = node.next_node - next unless next_node.instruction.is_a?(Send) - next if next_node.instruction.block_iseq + next unless next_node.value.is_a?(Send) + next if next_node.value.block_iseq - calldata = next_node.instruction.calldata + calldata = next_node.value.calldata next unless calldata.flags == CallData::CALL_ARGS_SIMPLE next unless calldata.argc == 0 case calldata.method when :freeze - node.instruction = - OptStrFreeze.new(node.instruction.object, calldata) + node.value = OptStrFreeze.new(value.object, calldata) node.next_node = next_node.next_node when :-@ - node.instruction = - OptStrUMinus.new(node.instruction.object, calldata) + node.value = OptStrUMinus.new(value.object, calldata) node.next_node = next_node.next_node end when Send - calldata = node.instruction.calldata + calldata = value.calldata - if !node.instruction.block_iseq && - !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + if !value.block_iseq && !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) # Specialize the send instruction. If it doesn't have a block # attached, then we will replace it with an opt_send_without_block # and do further specializations based on the called method and # the number of arguments. - node.instruction = + node.value = case [calldata.method, calldata.argc] when [:length, 0] OptLength.new(calldata) From b422b428f8089e723732b5a586d5d97bdc18ead6 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 26 Nov 2022 14:29:54 -0500 Subject: [PATCH 077/104] Give a reference on the labels to their container nodes --- lib/syntax_tree/yarv/instruction_sequence.rb | 30 +++++++++++--------- 1 file changed, 17 insertions(+), 13 deletions(-) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index a994c6d2..5469f6f7 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -95,12 +95,18 @@ def change_by(value) class Label attr_reader :name + # When we're serializing the instruction sequence, we need to be able to + # look up the label from the branch instructions and then access the + # subsequent node. So we'll store the reference here. + attr_reader :node + def initialize(name = nil) @name = name end - def patch!(name) + def patch!(name, node) @name = name + @node = node end end @@ -220,19 +226,17 @@ def to_a specialize_instructions! if options.specialized_instruction? # Next, set it up so that all of the labels get their correct name. - insns - .each - .inject(0) do |length, insn| - case insn - when Integer, Symbol - length - when Label - insn.patch!(:"label_#{length}") - length - else - length + insn.length - end + length = 0 + insns.each_node do |node, value| + case value + when Integer, Symbol + # skip + when Label + value.patch!(:"label_#{length}", node) + else + length += value.length end + end # Next, dump all of the instructions into a flat list. dumped = From 14df44ed9b4c01845e0402a9514c0d40e05bddd7 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 26 Nov 2022 16:03:06 -0500 Subject: [PATCH 078/104] Begin peephole optimizations --- lib/syntax_tree/yarv/instruction_sequence.rb | 50 +++++++++++++++----- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 5469f6f7..e8e30b3b 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -52,6 +52,8 @@ def push(instruction) @tail_node.next_node = node @tail_node = node end + + node end end @@ -98,15 +100,14 @@ class Label # When we're serializing the instruction sequence, we need to be able to # look up the label from the branch instructions and then access the # subsequent node. So we'll store the reference here. - attr_reader :node + attr_accessor :node def initialize(name = nil) @name = name end - def patch!(name, node) + def patch!(name) @name = name - @node = node end end @@ -222,8 +223,9 @@ def eval def to_a versions = RUBY_VERSION.split(".").map(&:to_i) - # First, specialize any instructions that need to be specialized. + # First, handle any compilation options that we need to. specialize_instructions! if options.specialized_instruction? + peephole_optimize! if options.peephole_optimization? # Next, set it up so that all of the labels get their correct name. length = 0 @@ -232,7 +234,7 @@ def to_a when Integer, Symbol # skip when Label - value.patch!(:"label_#{length}", node) + value.patch!(:"label_#{length}") else length += value.length end @@ -383,6 +385,27 @@ def specialize_instructions! end end + def peephole_optimize! + insns.each_node do |node, value| + case value + when Jump + # jump LABEL + # ... + # LABEL: + # leave + # => + # leave + # ... + # LABEL: + # leave + # case value.label.node.next_node&.value + # when Leave + # node.value = Leave.new + # end + end + end + end + ########################################################################## # Child instruction sequence methods ########################################################################## @@ -421,15 +444,18 @@ def label Label.new end - def push(insn) - insns.push(insn) + def push(value) + node = insns.push(value) - case insn - when Array, Integer, Label, Symbol - insn + case value + when Array, Integer, Symbol + value + when Label + value.node = node + value else - stack.change_by(-insn.pops + insn.pushes) - insn + stack.change_by(-value.pops + value.pushes) + value end end From b998a6ea9a5a9564dafc0cd422a77f03e3937c26 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Mon, 28 Nov 2022 11:15:38 -0500 Subject: [PATCH 079/104] Add a bit of execution --- .rubocop.yml | 18 + lib/syntax_tree/yarv.rb | 277 +++++ lib/syntax_tree/yarv/compiler.rb | 12 +- lib/syntax_tree/yarv/instruction_sequence.rb | 178 ++- lib/syntax_tree/yarv/instructions.rb | 1062 +++++++++++++++++- lib/syntax_tree/yarv/legacy.rb | 8 + 6 files changed, 1487 insertions(+), 68 deletions(-) diff --git a/.rubocop.yml b/.rubocop.yml index b7ba43e8..c81fdb59 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -31,12 +31,18 @@ Lint/InterpolationCheck: Lint/MissingSuper: Enabled: false +Lint/NonLocalExitFromIterator: + Enabled: false + Lint/RedundantRequireStatement: Enabled: false Lint/SuppressedException: Enabled: false +Lint/UnderscorePrefixedVariableName: + Enabled: false + Lint/UnusedMethodArgument: AllowUnusedKeywordArguments: true @@ -55,6 +61,9 @@ Naming/RescuedExceptionsVariableName: Naming/VariableNumber: Enabled: false +Security/Eval: + Enabled: false + Style/AccessorGrouping: Enabled: false @@ -64,9 +73,18 @@ Style/CaseEquality: Style/CaseLikeIf: Enabled: false +Style/ClassVars: + Enabled: false + +Style/DocumentDynamicEvalDefinition: + Enabled: false + Style/Documentation: Enabled: false +Style/EndBlock: + Enabled: false + Style/ExplicitBlockArgument: Enabled: false diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 1e759ad1..74f2598e 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -1,11 +1,288 @@ # frozen_string_literal: true +require "forwardable" + module SyntaxTree # This module provides an object representation of the YARV bytecode. module YARV + class VM + class Jump + attr_reader :name + + def initialize(name) + @name = name + end + end + + class Leave + attr_reader :value + + def initialize(value) + @value = value + end + end + + class Frame + attr_reader :iseq, :parent, :stack_index, :_self, :nesting, :svars + + def initialize(iseq, parent, stack_index, _self, nesting) + @iseq = iseq + @parent = parent + @stack_index = stack_index + @_self = _self + @nesting = nesting + @svars = {} + end + end + + class TopFrame < Frame + def initialize(iseq) + super(iseq, nil, 0, TOPLEVEL_BINDING.eval("self"), [Object]) + end + end + + class BlockFrame < Frame + def initialize(iseq, parent, stack_index) + super(iseq, parent, stack_index, parent._self, parent.nesting) + end + end + + class MethodFrame < Frame + attr_reader :name, :block + + def initialize(iseq, parent, stack_index, _self, name, block) + super(iseq, parent, stack_index, _self, parent.nesting) + @name = name + @block = block + end + end + + class ClassFrame < Frame + def initialize(iseq, parent, stack_index, _self) + super(iseq, parent, stack_index, _self, parent.nesting + [_self]) + end + end + + class FrozenCore + define_method("core#hash_merge_kwd") { |left, right| left.merge(right) } + + define_method("core#hash_merge_ptr") do |hash, *values| + hash.merge(values.each_slice(2).to_h) + end + + define_method("core#set_method_alias") do |clazz, new_name, old_name| + clazz.alias_method(new_name, old_name) + end + + define_method("core#set_variable_alias") do |new_name, old_name| + # Using eval here since there isn't a reflection API to be able to + # alias global variables. + eval("alias #{new_name} #{old_name}", binding, __FILE__, __LINE__) + end + + define_method("core#set_postexe") { |&block| END { block.call } } + + define_method("core#undef_method") do |clazz, name| + clazz.undef_method(name) + end + end + + FROZEN_CORE = FrozenCore.new.freeze + + extend Forwardable + + attr_reader :stack + def_delegators :stack, :push, :pop + + attr_reader :frame + def_delegators :frame, :_self + + def initialize + @stack = [] + @frame = nil + end + + ########################################################################## + # Helper methods for frames + ########################################################################## + + def run_frame(frame) + # First, set the current frame to the given value. + @frame = frame + + # Next, set up the local table for the frame. This is actually incorrect + # as it could use the values already on the stack, but for now we're + # just doing this for simplicity. + frame.iseq.local_table.size.times { push(nil) } + + # Yield so that some frame-specific setup can be done. + yield if block_given? + + # This hash is going to hold a mapping of label names to their + # respective indices in our instruction list. + labels = {} + + # This array is going to hold our instructions. + insns = [] + + # Here we're going to preprocess the instruction list from the + # instruction sequence to set up the labels hash and the insns array. + frame.iseq.insns.each do |insn| + case insn + when Integer, Symbol + # skip + when InstructionSequence::Label + labels[insn.name] = insns.length + else + insns << insn + end + end + + # Finally we can execute the instructions one at a time. If they return + # jumps or leaves we will handle those appropriately. + pc = 0 + while pc < insns.length + insn = insns[pc] + pc += 1 + + case (result = insn.call(self)) + when Jump + pc = labels[result.name] + when Leave + return result.value + end + end + ensure + @stack = stack[0...frame.stack_index] + @frame = frame.parent + end + + def run_top_frame(iseq) + run_frame(TopFrame.new(iseq)) + end + + def run_block_frame(iseq, *args, &block) + run_frame(BlockFrame.new(iseq, frame, stack.length)) do + locals = [*args, block] + iseq.local_table.size.times do |index| + local_set(index, 0, locals.shift) + end + end + end + + def run_class_frame(iseq, clazz) + run_frame(ClassFrame.new(iseq, frame, stack.length, clazz)) + end + + def run_method_frame(name, iseq, _self, *args, **kwargs, &block) + run_frame( + MethodFrame.new(iseq, frame, stack.length, _self, name, block) + ) do + locals = [*args, block] + + if iseq.argument_options[:keyword] + # First, set up the keyword bits array. + keyword_bits = + iseq.argument_options[:keyword].map do |config| + kwargs.key?(config.is_a?(Array) ? config[0] : config) + end + + iseq.local_table.locals.each_with_index do |local, index| + # If this is the keyword bits local, then set it appropriately. + if local.name == 2 + locals.insert(index, keyword_bits) + next + end + + # First, find the configuration for this local in the keywords + # list if it exists. + name = local.name + config = + iseq.argument_options[:keyword].find do |keyword| + keyword.is_a?(Array) ? keyword[0] == name : keyword == name + end + + # If the configuration doesn't exist, then the local is not a + # keyword local. + next unless config + + if !config.is_a?(Array) + # required keyword + locals.insert(index, kwargs.fetch(name)) + elsif !config[1].nil? + # optional keyword with embedded default value + locals.insert(index, kwargs.fetch(name, config[1])) + else + # optional keyword with expression default value + locals.insert(index, nil) + end + end + end + + iseq.local_table.size.times do |index| + local_set(index, 0, locals.shift) + end + end + end + + ########################################################################## + # Helper methods for instructions + ########################################################################## + + def const_base + frame.nesting.last + end + + def frame_at(level) + current = frame + level.times { current = current.parent } + current + end + + def frame_svar + current = frame + current = current.parent while current.is_a?(BlockFrame) + current + end + + def frame_yield + current = frame + current = current.parent until current.is_a?(MethodFrame) + current + end + + def frozen_core + FROZEN_CORE + end + + def jump(label) + Jump.new(label.name) + end + + def leave + Leave.new(pop) + end + + def local_get(index, level) + stack[frame_at(level).stack_index + index] + end + + def local_set(index, level, value) + stack[frame_at(level).stack_index + index] = value + end + end + # Compile the given source into a YARV instruction sequence. def self.compile(source, options = Compiler::Options.new) SyntaxTree.parse(source).accept(Compiler.new(options)) end + + # Compile and interpret the given source. + def self.interpret(source, options = Compiler::Options.new) + iseq = RubyVM::InstructionSequence.compile(source, **options) + iseq = InstructionSequence.from(iseq.to_a) + iseq.specialize_instructions! + VM.new.run_top_frame(iseq) + end end end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 9016c136..194b758b 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -1987,8 +1987,8 @@ def visit_pattern(node, end_label) match_failure_label = iseq.label match_error_label = iseq.label - # If there's a constant, then check if we match against that constant or - # not first. Branch to failure if we don't. + # If there's a constant, then check if we match against that constant + # or not first. Branch to failure if we don't. if node.constant iseq.dup visit(node.constant) @@ -2007,8 +2007,8 @@ def visit_pattern(node, end_label) iseq.topn(2) iseq.branchunless(match_failure_label) - # Since we have a valid cached value, we can skip past the part where we - # call #deconstruct on the object. + # Since we have a valid cached value, we can skip past the part where + # we call #deconstruct on the object. iseq.pop iseq.topn(1) iseq.jump(length_label) @@ -2064,8 +2064,8 @@ def visit_pattern(node, end_label) end end - # Set up the routine here to raise an error to indicate that the type of - # the deconstructed array was incorrect. + # Set up the routine here to raise an error to indicate that the type + # of the deconstructed array was incorrect. iseq.push(match_error_label) iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) iseq.putobject(TypeError) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index e8e30b3b..f20981df 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -20,6 +20,7 @@ def initialize(value, next_node = nil) end end + include Enumerable attr_reader :head_node, :tail_node def initialize @@ -109,6 +110,10 @@ def initialize(name = nil) def patch!(name) @name = name end + + def inspect + name.inspect + end end # The type of the instruction sequence. @@ -128,6 +133,9 @@ def patch!(name) attr_accessor :argument_size attr_reader :argument_options + # The catch table for this instruction sequence. + attr_reader :catch_table + # The list of instructions for this instruction sequence. attr_reader :insns @@ -162,6 +170,7 @@ def initialize( @argument_size = 0 @argument_options = {} + @catch_table = [] @local_table = LocalTable.new @inline_storages = {} @@ -229,20 +238,20 @@ def to_a # Next, set it up so that all of the labels get their correct name. length = 0 - insns.each_node do |node, value| - case value + insns.each do |insn| + case insn when Integer, Symbol # skip when Label - value.patch!(:"label_#{length}") + insn.patch!(:"label_#{length}") else - length += value.length + length += insn.length end end # Next, dump all of the instructions into a flat list. dumped = - insns.each.map do |insn| + insns.map do |insn| case insn when Integer, Symbol insn @@ -274,7 +283,7 @@ def to_a type, local_table.names, dumped_options, - [], + catch_table.map(&:to_a), dumped ] end @@ -324,7 +333,8 @@ def specialize_instructions! when Send calldata = value.calldata - if !value.block_iseq && !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + if !value.block_iseq && + !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) # Specialize the send instruction. If it doesn't have a block # attached, then we will replace it with an opt_send_without_block # and do further specializations based on the called method and @@ -386,24 +396,24 @@ def specialize_instructions! end def peephole_optimize! - insns.each_node do |node, value| - case value - when Jump - # jump LABEL - # ... - # LABEL: - # leave - # => - # leave - # ... - # LABEL: - # leave - # case value.label.node.next_node&.value - # when Leave - # node.value = Leave.new - # end - end - end + # insns.each_node do |node, value| + # case value + # when Jump + # # jump LABEL + # # ... + # # LABEL: + # # leave + # # => + # # leave + # # ... + # # LABEL: + # # leave + # # case value.label.node.next_node&.value + # # when Leave + # # node.value = Leave.new + # # end + # end + # end end ########################################################################## @@ -436,6 +446,77 @@ def singleton_class_child_iseq(location) child_iseq(:class, "singleton class", location) end + ########################################################################## + # Catch table methods + ########################################################################## + + class CatchEntry + attr_reader :iseq, :begin_label, :end_label, :exit_label + + def initialize(iseq, begin_label, end_label, exit_label) + @iseq = iseq + @begin_label = begin_label + @end_label = end_label + @exit_label = exit_label + end + end + + class CatchBreak < CatchEntry + def to_a + [:break, iseq.to_a, begin_label.name, end_label.name, exit_label.name] + end + end + + class CatchNext < CatchEntry + def to_a + [:next, nil, begin_label.name, end_label.name, exit_label.name] + end + end + + class CatchRedo < CatchEntry + def to_a + [:redo, nil, begin_label.name, end_label.name, exit_label.name] + end + end + + class CatchRescue < CatchEntry + def to_a + [ + :rescue, + iseq.to_a, + begin_label.name, + end_label.name, + exit_label.name + ] + end + end + + class CatchRetry < CatchEntry + def to_a + [:retry, nil, begin_label.name, end_label.name, exit_label.name] + end + end + + def catch_break(iseq, begin_label, end_label, exit_label) + catch_table << CatchBreak.new(iseq, begin_label, end_label, exit_label) + end + + def catch_next(begin_label, end_label, exit_label) + catch_table << CatchNext.new(nil, begin_label, end_label, exit_label) + end + + def catch_redo(begin_label, end_label, exit_label) + catch_table << CatchRedo.new(nil, begin_label, end_label, exit_label) + end + + def catch_rescue(iseq, begin_label, end_label, exit_label) + catch_table << CatchRescue.new(iseq, begin_label, end_label, exit_label) + end + + def catch_retry(begin_label, end_label, exit_label) + catch_table << CatchRetry.new(nil, begin_label, end_label, exit_label) + end + ########################################################################## # Instruction push methods ########################################################################## @@ -837,6 +918,46 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) iseq.argument_options[:opt].map! { |opt| labels[opt] } end + # set up the catch table + source[12].each do |entry| + case entry[0] + when :break + iseq.catch_break( + from(entry[1]), + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + when :next + iseq.catch_next( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + when :rescue + iseq.catch_rescue( + from(entry[1]), + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + when :redo + iseq.catch_redo( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + when :retry + iseq.catch_retry( + labels[entry[2]], + labels[entry[3]], + labels[entry[4]] + ) + else + raise "unknown catch type: #{entry[0]}" + end + end + # set up all of the instructions source[13].each do |insn| # skip line numbers @@ -969,7 +1090,12 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) when :opt_aset_with iseq.opt_aset_with(opnds[0], CallData.from(opnds[1])) when :opt_case_dispatch - iseq.opt_case_dispatch(opnds[0], labels[opnds[1]]) + hash = + opnds[0] + .each_slice(2) + .to_h + .transform_values { |value| labels[value] } + iseq.opt_case_dispatch(hash, labels[opnds[1]]) when :opt_getconstant_path iseq.opt_getconstant_path(opnds[0]) when :opt_getinlinecache diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 8ec1f068..0b60bd13 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -98,6 +98,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.pop(number) + end end # ### Summary @@ -134,6 +142,20 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + original, value = vm.pop(2) + + if value.is_a?(String) + vm.push(value) + else + vm.push("#<#{original.class.name}:0000>") + end + end end # ### Summary @@ -174,6 +196,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.jump(label) if vm.pop + end end # ### Summary @@ -215,6 +245,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.jump(label) if vm.pop.nil? + end end # ### Summary @@ -255,6 +293,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.jump(label) unless vm.pop + end end # ### Summary @@ -303,6 +349,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.local_get(keyword_bits_index, 0)[keyword_index]) + end end # ### Summary @@ -343,6 +397,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + raise NotImplementedError, "checkmatch" + end end # ### Summary @@ -406,6 +468,61 @@ def pushes # can investigate further. 2 end + + def canonical + self + end + + def call(vm) + object = vm.pop + result = + case type + when TYPE_OBJECT + raise NotImplementedError, "checktype TYPE_OBJECT" + when TYPE_CLASS + object.is_a?(Class) + when TYPE_MODULE + object.is_a?(Module) + when TYPE_FLOAT + object.is_a?(Float) + when TYPE_STRING + object.is_a?(String) + when TYPE_REGEXP + object.is_a?(Regexp) + when TYPE_ARRAY + object.is_a?(Array) + when TYPE_HASH + object.is_a?(Hash) + when TYPE_STRUCT + object.is_a?(Struct) + when TYPE_BIGNUM + raise NotImplementedError, "checktype TYPE_BIGNUM" + when TYPE_FILE + object.is_a?(File) + when TYPE_DATA + raise NotImplementedError, "checktype TYPE_DATA" + when TYPE_MATCH + raise NotImplementedError, "checktype TYPE_MATCH" + when TYPE_COMPLEX + object.is_a?(Complex) + when TYPE_RATIONAL + object.is_a?(Rational) + when TYPE_NIL + object.nil? + when TYPE_TRUE + object == true + when TYPE_FALSE + object == false + when TYPE_SYMBOL + object.is_a?(Symbol) + when TYPE_FIXNUM + object.is_a?(Integer) + when TYPE_UNDEF + raise NotImplementedError, "checktype TYPE_UNDEF" + end + + vm.push(result) + end end # ### Summary @@ -438,6 +555,15 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + left, right = vm.pop(2) + vm.push([*left, *right]) + end end # ### Summary @@ -477,6 +603,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number).join) + end end # ### Summary @@ -524,6 +658,20 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + object, superclass = vm.pop(2) + iseq = class_iseq + + clazz = Class.new(superclass || Object) + vm.push(vm.run_class_frame(iseq, clazz)) + + object.const_set(name, clazz) + end end # ### Summary @@ -579,6 +727,46 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + object = vm.pop + + result = + case type + when TYPE_NIL, TYPE_SELF, TYPE_TRUE, TYPE_FALSE, TYPE_ASGN, TYPE_EXPR + message + when TYPE_IVAR + message if vm._self.instance_variable_defined?(name) + when TYPE_LVAR + raise NotImplementedError, "defined TYPE_LVAR" + when TYPE_GVAR + message if global_variables.include?(name) + when TYPE_CVAR + clazz = vm._self + clazz = clazz.singleton_class unless clazz.is_a?(Module) + message if clazz.class_variable_defined?(name) + when TYPE_CONST + raise NotImplementedError, "defined TYPE_CONST" + when TYPE_METHOD + raise NotImplementedError, "defined TYPE_METHOD" + when TYPE_YIELD + raise NotImplementedError, "defined TYPE_YIELD" + when TYPE_ZSUPER + raise NotImplementedError, "defined TYPE_ZSUPER" + when TYPE_REF + raise NotImplementedError, "defined TYPE_REF" + when TYPE_FUNC + message if object.respond_to?(name, true) + when TYPE_CONST_FROM + raise NotImplementedError, "defined TYPE_CONST_FROM" + end + + vm.push(result) + end end # ### Summary @@ -595,15 +783,15 @@ def pushes # ~~~ # class DefineMethod - attr_reader :name, :method_iseq + attr_reader :method_name, :method_iseq - def initialize(name, method_iseq) - @name = name + def initialize(method_name, method_iseq) + @method_name = method_name @method_iseq = method_iseq end def to_a(_iseq) - [:definemethod, name, method_iseq.to_a] + [:definemethod, method_name, method_iseq.to_a] end def length @@ -617,6 +805,21 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + name = method_name + iseq = method_iseq + + vm + ._self + .__send__(:define_method, name) do |*args, **kwargs, &block| + vm.run_method_frame(name, iseq, self, *args, **kwargs, &block) + end + end end # ### Summary @@ -634,15 +837,15 @@ def pushes # ~~~ # class DefineSMethod - attr_reader :name, :method_iseq + attr_reader :method_name, :method_iseq - def initialize(name, method_iseq) - @name = name + def initialize(method_name, method_iseq) + @method_name = method_name @method_iseq = method_iseq end def to_a(_iseq) - [:definesmethod, name, method_iseq.to_a] + [:definesmethod, method_name, method_iseq.to_a] end def length @@ -656,6 +859,21 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + name = method_name + iseq = method_iseq + + vm + ._self + .__send__(:define_singleton_method, name) do |*args, **kwargs, &block| + vm.run_method_frame(name, iseq, self, *args, **kwargs, &block) + end + end end # ### Summary @@ -684,6 +902,14 @@ def pops def pushes 2 end + + def canonical + self + end + + def call(vm) + vm.push(vm.stack.last.dup) + end end # ### Summary @@ -718,6 +944,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(object.dup) + end end # ### Summary @@ -752,6 +986,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(object.dup) + end end # ### Summary @@ -786,6 +1028,16 @@ def pops def pushes number end + + def canonical + self + end + + def call(vm) + values = vm.pop(number) + vm.push(*values) + vm.push(*values) + end end # ### Summary @@ -823,6 +1075,14 @@ def pops def pushes number end + + def canonical + self + end + + def call(vm) + raise NotImplementedError, "expandarray" + end end # ### Summary @@ -867,6 +1127,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.local_get(index, level)) + end end # ### Summary @@ -909,6 +1177,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.local_get(index, level)) + end end # ### Summary @@ -946,6 +1222,16 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + clazz = vm._self + clazz = clazz.class unless clazz.is_a?(Class) + vm.push(clazz.class_variable_get(name)) + end end # ### Summary @@ -982,6 +1268,24 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + # const_base, allow_nil = + vm.pop(2) + + vm.frame.nesting.reverse_each do |clazz| + if clazz.const_defined?(name) + vm.push(clazz.const_get(name)) + return + end + end + + raise NameError, "uninitialized constant #{name}" + end end # ### Summary @@ -1016,6 +1320,16 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + # Evaluating the name of the global variable because there isn't a + # reflection API for global variables. + vm.push(eval(name.to_s, binding, __FILE__, __LINE__)) + end end # ### Summary @@ -1058,34 +1372,47 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + method = Object.instance_method(:instance_variable_get) + vm.push(method.bind(vm._self).call(name)) + end end # ### Summary # - # `getlocal_WC_0` is a specialized version of the `getlocal` instruction. It - # fetches the value of a local variable from the current frame determined by - # the index given as its only argument. + # `getlocal` fetches the value of a local variable from a frame determined + # by the level and index arguments. The level is the number of frames back + # to look and the index is the index in the local table. It pushes the value + # it finds onto the stack. # # ### Usage # # ~~~ruby # value = 5 - # value + # tap { tap { value } } # ~~~ # - class GetLocalWC0 - attr_reader :index + class GetLocal + attr_reader :index, :level - def initialize(index) + def initialize(index, level) @index = index + @level = level end def to_a(iseq) - [:getlocal_WC_0, iseq.local_table.offset(index)] + current = iseq + level.times { current = current.parent_iseq } + [:getlocal, current.local_table.offset(index), level] end def length - 2 + 3 end def pops @@ -1095,22 +1422,30 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.local_get(index, level)) + end end # ### Summary # - # `getlocal_WC_1` is a specialized version of the `getlocal` instruction. It - # fetches the value of a local variable from the parent frame determined by + # `getlocal_WC_0` is a specialized version of the `getlocal` instruction. It + # fetches the value of a local variable from the current frame determined by # the index given as its only argument. # # ### Usage # # ~~~ruby # value = 5 - # self.then { value } + # value # ~~~ # - class GetLocalWC1 + class GetLocalWC0 attr_reader :index def initialize(index) @@ -1118,7 +1453,7 @@ def initialize(index) end def to_a(iseq) - [:getlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] + [:getlocal_WC_0, iseq.local_table.offset(index)] end def length @@ -1132,38 +1467,42 @@ def pops def pushes 1 end + + def canonical + GetLocal.new(index, 0) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary # - # `getlocal` fetches the value of a local variable from a frame determined - # by the level and index arguments. The level is the number of frames back - # to look and the index is the index in the local table. It pushes the value - # it finds onto the stack. + # `getlocal_WC_1` is a specialized version of the `getlocal` instruction. It + # fetches the value of a local variable from the parent frame determined by + # the index given as its only argument. # # ### Usage # # ~~~ruby # value = 5 - # tap { tap { value } } + # self.then { value } # ~~~ # - class GetLocal - attr_reader :index, :level + class GetLocalWC1 + attr_reader :index - def initialize(index, level) + def initialize(index) @index = index - @level = level end def to_a(iseq) - current = iseq - level.times { current = current.parent_iseq } - [:getlocal, current.local_table.offset(index), level] + [:getlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] end def length - 3 + 2 end def pops @@ -1173,6 +1512,14 @@ def pops def pushes 1 end + + def canonical + GetLocal.new(index, 1) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -1212,6 +1559,21 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + case key + when SVAR_LASTLINE + raise NotImplementedError, "getspecial SVAR_LASTLINE" + when SVAR_BACKREF + raise NotImplementedError, "getspecial SVAR_BACKREF" + when SVAR_FLIPFLOP_START + vm.frame_svar.svars[SVAR_FLIPFLOP_START] + end + end end # ### Summary @@ -1241,6 +1603,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop.to_sym) + end end # ### Summary @@ -1279,6 +1649,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.frame_yield.block.call(*vm.pop(calldata.argc))) + end end # ### Summary @@ -1319,6 +1697,32 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + block = + if (iseq = block_iseq) + ->(*args, **kwargs, &blk) do + vm.run_block_frame(iseq, *args, **kwargs, &blk) + end + end + + keywords = + if calldata.kw_arg + calldata.kw_arg.zip(vm.pop(calldata.kw_arg.length)).to_h + else + {} + end + + arguments = vm.pop(calldata.argc) + receiver = vm.pop + + method = receiver.method(vm.frame.name).super_method + vm.push(method.call(*arguments, **keywords, &block)) + end end # ### Summary @@ -1358,6 +1762,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.jump(label) + end end # ### Summary @@ -1388,6 +1800,14 @@ def pushes # otherwise the stack size is incorrectly calculated. 0 end + + def canonical + self + end + + def call(vm) + vm.leave + end end # ### Summary @@ -1424,6 +1844,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number)) + end end # ### Summary @@ -1460,6 +1888,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number)) + end end # ### Summary @@ -1498,6 +1934,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number).each_slice(2).to_h) + end end # ### Summary @@ -1537,6 +1981,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(Range.new(*vm.pop(2), exclude_end == 1)) + end end # ### Summary @@ -1566,6 +2018,13 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + end end # ### Summary @@ -1604,6 +2063,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop.to_s) + end end # ### Summary @@ -1642,6 +2109,16 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + return if @executed + vm.push(vm.run_block_frame(iseq)) + @executed = true + end end # ### Summary @@ -1679,6 +2156,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -1715,6 +2200,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -1753,6 +2246,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop[object]) + end end # ### Summary @@ -1790,6 +2291,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -1827,6 +2336,15 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + hash, value = vm.pop(2) + vm.push(hash[object] = value) + end end # ### Summary @@ -1861,7 +2379,11 @@ def initialize(case_dispatch_hash, else_label) end def to_a(_iseq) - [:opt_case_dispatch, case_dispatch_hash, else_label] + [ + :opt_case_dispatch, + case_dispatch_hash.flat_map { |key, value| [key, value.name] }, + else_label + ] end def length @@ -1875,6 +2397,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.jump(case_dispatch_hash.fetch(vm.pop, else_label)) + end end # ### Summary @@ -1912,6 +2442,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -1948,6 +2486,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -1985,6 +2531,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2022,6 +2576,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2058,6 +2620,21 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + current = vm._self + current = current.class unless current.is_a?(Class) + + names.each do |name| + current = name == :"" ? Object : current.const_get(name) + end + + vm.push(current) + end end # ### Summary @@ -2095,6 +2672,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2132,6 +2717,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2169,6 +2762,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2206,6 +2807,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2243,6 +2852,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2281,6 +2898,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2318,6 +2943,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2355,6 +2988,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2395,6 +3036,15 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + receiver, argument = vm.pop(2) + vm.push(receiver != argument) + end end # ### Summary @@ -2431,6 +3081,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number).max) + end end # ### Summary @@ -2467,6 +3125,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.pop(number).min) + end end # ### Summary @@ -2504,6 +3170,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2539,6 +3213,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2576,6 +3258,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2613,6 +3303,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2649,6 +3347,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2685,6 +3391,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2722,6 +3436,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2759,6 +3481,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(object.freeze) + end end # ### Summary @@ -2796,6 +3526,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(-object) + end end # ### Summary @@ -2833,6 +3571,14 @@ def pops def pushes 1 end + + def canonical + Send.new(calldata, nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2861,6 +3607,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.pop + end end # ### Summary @@ -2889,6 +3643,14 @@ def pops def pushes 1 end + + def canonical + PutObject.new(nil) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2923,6 +3685,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(object) + end end # ### Summary @@ -2953,6 +3723,14 @@ def pops def pushes 1 end + + def canonical + PutObject.new(0) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -2983,6 +3761,14 @@ def pops def pushes 1 end + + def canonical + PutObject.new(1) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -3011,6 +3797,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm._self) + end end # ### Summary @@ -3051,6 +3845,23 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + case object + when OBJECT_VMCORE + vm.push(vm.frozen_core) + when OBJECT_CBASE + value = vm._self + value = value.singleton_class unless value.is_a?(Class) + vm.push(value) + when OBJECT_CONST_BASE + vm.push(vm.const_base) + end + end end # ### Summary @@ -3085,6 +3896,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(object.dup) + end end # ### Summary @@ -3124,6 +3943,33 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + block = + if (iseq = block_iseq) + ->(*args, **kwargs, &blk) do + vm.run_block_frame(iseq, *args, **kwargs, &blk) + end + end + + keywords = + if calldata.kw_arg + calldata.kw_arg.zip(vm.pop(calldata.kw_arg.length)).to_h + else + {} + end + + arguments = vm.pop(calldata.argc) + receiver = vm.pop + + vm.push( + receiver.__send__(calldata.method, *arguments, **keywords, &block) + ) + end end # ### Summary @@ -3166,6 +4012,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.local_set(index, level, vm.pop) + end end # ### Summary @@ -3204,6 +4058,16 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + clazz = vm._self + clazz = clazz.class unless clazz.is_a?(Class) + clazz.class_variable_set(name, vm.pop) + end end # ### Summary @@ -3239,6 +4103,15 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + value, parent = vm.pop(2) + parent.const_set(name, value) + end end # ### Summary @@ -3274,6 +4147,16 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + # Evaluating the name of the global variable because there isn't a + # reflection API for global variables. + eval("#{name} = vm.pop", binding, __FILE__, __LINE__) + end end # ### Summary @@ -3315,6 +4198,15 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + method = Object.instance_method(:instance_variable_set) + method.bind(vm._self).call(name, vm.pop) + end end # ### Summary @@ -3356,6 +4248,14 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + vm.local_set(index, level, vm.pop) + end end # ### Summary @@ -3393,6 +4293,14 @@ def pops def pushes 0 end + + def canonical + SetLocal.new(index, 0) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -3430,6 +4338,14 @@ def pops def pushes 0 end + + def canonical + SetLocal.new(index, 1) + end + + def call(vm) + canonical.call(vm) + end end # ### Summary @@ -3465,6 +4381,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.stack[-number - 1] = vm.stack.last + end end # ### Summary @@ -3501,6 +4425,21 @@ def pops def pushes 0 end + + def canonical + self + end + + def call(vm) + case key + when GetSpecial::SVAR_LASTLINE + raise NotImplementedError, "svar SVAR_LASTLINE" + when GetSpecial::SVAR_BACKREF + raise NotImplementedError, "setspecial SVAR_BACKREF" + when GetSpecial::SVAR_FLIPFLOP_START + vm.frame_svar.svars[GetSpecial::SVAR_FLIPFLOP_START] + end + end end # ### Summary @@ -3537,6 +4476,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(*vm.pop) + end end # ### Summary @@ -3569,6 +4516,15 @@ def pops def pushes 2 end + + def canonical + self + end + + def call(vm) + left, right = vm.pop(2) + vm.push(right, left) + end end # ### Summary @@ -3584,6 +4540,16 @@ def pushes # ~~~ # class Throw + TAG_NONE = 0x0 + TAG_RETURN = 0x1 + TAG_BREAK = 0x2 + TAG_NEXT = 0x3 + TAG_RETRY = 0x4 + TAG_REDO = 0x5 + TAG_RAISE = 0x6 + TAG_THROW = 0x7 + TAG_FATAL = 0x8 + attr_reader :type def initialize(type) @@ -3605,6 +4571,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + raise NotImplementedError, "throw" + end end # ### Summary @@ -3643,6 +4617,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(vm.stack[-number - 1]) + end end # ### Summary @@ -3675,6 +4657,14 @@ def pops def pushes 1 end + + def canonical + self + end + + def call(vm) + vm.push(Regexp.new(vm.pop(length).join, options)) + end end end end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index 82f7560d..93c4e4c3 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -82,6 +82,10 @@ def pops def pushes 1 end + + def call(vm) + vm.push(nil) + end end # ### Summary @@ -121,6 +125,10 @@ def pops def pushes 1 end + + def call(vm) + vm.push(vm.pop) + end end # ### Summary From 70064564221d38748366abc264368cbb5f8042b3 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 29 Nov 2022 11:02:04 -0500 Subject: [PATCH 080/104] Add an entire compile! step --- lib/syntax_tree/yarv.rb | 1 - lib/syntax_tree/yarv/bf.rb | 1 + lib/syntax_tree/yarv/compiler.rb | 4 + lib/syntax_tree/yarv/disassembler.rb | 3 +- lib/syntax_tree/yarv/instruction_sequence.rb | 103 +++++++++++-------- lib/syntax_tree/yarv/instructions.rb | 43 ++++++++ 6 files changed, 111 insertions(+), 44 deletions(-) diff --git a/lib/syntax_tree/yarv.rb b/lib/syntax_tree/yarv.rb index 74f2598e..97592d4d 100644 --- a/lib/syntax_tree/yarv.rb +++ b/lib/syntax_tree/yarv.rb @@ -281,7 +281,6 @@ def self.compile(source, options = Compiler::Options.new) def self.interpret(source, options = Compiler::Options.new) iseq = RubyVM::InstructionSequence.compile(source, **options) iseq = InstructionSequence.from(iseq.to_a) - iseq.specialize_instructions! VM.new.run_top_frame(iseq) end end diff --git a/lib/syntax_tree/yarv/bf.rb b/lib/syntax_tree/yarv/bf.rb index 78c01af5..f642fb2f 100644 --- a/lib/syntax_tree/yarv/bf.rb +++ b/lib/syntax_tree/yarv/bf.rb @@ -74,6 +74,7 @@ def compile end iseq.leave + iseq.compile! iseq end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 194b758b..3ea6d22a 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -1359,6 +1359,7 @@ def visit_program(node) node.location, options ) + with_child_iseq(top_iseq) do visit_all(preexes) @@ -1372,6 +1373,9 @@ def visit_program(node) iseq.leave end + + top_iseq.compile! + top_iseq end def visit_qsymbols(node) diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index 757b8b40..af325c31 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -54,7 +54,6 @@ def disassemble(iseq) clauses = {} clause = [] - iseq.to_a iseq.insns.each do |insn| case insn when InstructionSequence::Label @@ -192,7 +191,7 @@ def disassemble(iseq) Assign(VarField(target), value) end else - raise "Unknown instruction #{insn[0]}" + raise "Unknown instruction #{insn}" end end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index f20981df..e3d0c2fc 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -232,24 +232,7 @@ def eval def to_a versions = RUBY_VERSION.split(".").map(&:to_i) - # First, handle any compilation options that we need to. - specialize_instructions! if options.specialized_instruction? - peephole_optimize! if options.peephole_optimization? - - # Next, set it up so that all of the labels get their correct name. - length = 0 - insns.each do |insn| - case insn - when Integer, Symbol - # skip - when Label - insn.patch!(:"label_#{length}") - else - length += insn.length - end - end - - # Next, dump all of the instructions into a flat list. + # Dump all of the instructions into a flat list. dumped = insns.map do |insn| case insn @@ -288,6 +271,65 @@ def to_a ] end + def disasm + output = StringIO.new + output << "== disasm: #:1 (#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column})> (catch: FALSE)\n" + + length = 0 + events = [] + + insns.each do |insn| + case insn + when Integer + # skip + when Symbol + events << insn + when Label + # skip + else + output << "%04d " % length + output << insn.disasm(self) + output << "\n" + end + + length += insn.length + end + + output.string + end + + # This method converts our linked list of instructions into a final array + # and performs any other compilation steps necessary. + def compile! + specialize_instructions! if options.specialized_instruction? + + length = 0 + insns.each do |insn| + case insn + when Integer, Symbol + # skip + when Label + insn.patch!(:"label_#{length}") + when DefineClass + insn.class_iseq.compile! + length += insn.length + when DefineMethod, DefineSMethod + insn.method_iseq.compile! + length += insn.length + when InvokeSuper, Send + insn.block_iseq.compile! if insn.block_iseq + length += insn.length + when Once + insn.iseq.compile! + length += insn.length + else + length += insn.length + end + end + + @insns = insns.to_a + end + def specialize_instructions! insns.each_node do |node, value| case value @@ -333,8 +375,7 @@ def specialize_instructions! when Send calldata = value.calldata - if !value.block_iseq && - !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + if !value.block_iseq && !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) # Specialize the send instruction. If it doesn't have a block # attached, then we will replace it with an opt_send_without_block # and do further specializations based on the called method and @@ -395,27 +436,6 @@ def specialize_instructions! end end - def peephole_optimize! - # insns.each_node do |node, value| - # case value - # when Jump - # # jump LABEL - # # ... - # # LABEL: - # # leave - # # => - # # leave - # # ... - # # LABEL: - # # leave - # # case value.label.node.next_node&.value - # # when Leave - # # node.value = Leave.new - # # end - # end - # end - end - ########################################################################## # Child instruction sequence methods ########################################################################## @@ -1164,6 +1184,7 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) end end + iseq.compile! if iseq.type == :top iseq end end diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 0b60bd13..c146bdbf 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -33,6 +33,25 @@ def initialize( @kw_arg = kw_arg end + def disasm + flag_names = [] + flag_names << :ARGS_SPLAT if flag?(CALL_ARGS_SPLAT) + flag_names << :ARGS_BLOCKARG if flag?(CALL_ARGS_BLOCKARG) + flag_names << :FCALL if flag?(CALL_FCALL) + flag_names << :VCALL if flag?(CALL_VCALL) + flag_names << :ARGS_SIMPLE if flag?(CALL_ARGS_SIMPLE) + flag_names << :BLOCKISEQ if flag?(CALL_BLOCKISEQ) + flag_names << :KWARG if flag?(CALL_KWARG) + flag_names << :KW_SPLAT if flag?(CALL_KW_SPLAT) + flag_names << :TAILCALL if flag?(CALL_TAILCALL) + flag_names << :SUPER if flag?(CALL_SUPER) + flag_names << :ZSUPER if flag?(CALL_ZSUPER) + flag_names << :OPT_SEND if flag?(CALL_OPT_SEND) + flag_names << :KW_SPLAT_MUT if flag?(CALL_KW_SPLAT_MUT) + + "" + end + def flag?(mask) (flags & mask) > 0 end @@ -1783,6 +1802,10 @@ def call(vm) # ~~~ # class Leave + def disasm(_iseq) + "leave" + end + def to_a(_iseq) [:leave] end @@ -2973,6 +2996,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(_iseq) + "%-38s %s" % ["opt_mult", calldata.disasm] + end + def to_a(_iseq) [:opt_mult, calldata.to_h] end @@ -3288,6 +3315,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(iseq) + "%-38s %s" % ["opt_plus", calldata.disasm] + end + def to_a(_iseq) [:opt_plus, calldata.to_h] end @@ -3670,6 +3701,10 @@ def initialize(object) @object = object end + def disasm(_iseq) + "%-38s %s" % ["putobject", object.inspect] + end + def to_a(_iseq) [:putobject, object] end @@ -3708,6 +3743,10 @@ def call(vm) # ~~~ # class PutObjectInt2Fix0 + def disasm(_iseq) + "putobject_INT2FIX_0_" + end + def to_a(_iseq) [:putobject_INT2FIX_0_] end @@ -3746,6 +3785,10 @@ def call(vm) # ~~~ # class PutObjectInt2Fix1 + def disasm(_iseq) + "putobject_INT2FIX_1_" + end + def to_a(_iseq) [:putobject_INT2FIX_1_] end From 46ab8292ef0f88f5969e4dece3c45a2c8c968d74 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 29 Nov 2022 12:58:33 -0500 Subject: [PATCH 081/104] Allow calling disasm on instructions --- .rubocop.yml | 9 + lib/syntax_tree.rb | 1 + lib/syntax_tree/yarv/disasm_formatter.rb | 211 +++++++ lib/syntax_tree/yarv/instruction_sequence.rb | 37 +- lib/syntax_tree/yarv/instructions.rb | 558 +++++++++++++++++-- lib/syntax_tree/yarv/legacy.rb | 19 + lib/syntax_tree/yarv/local_table.rb | 8 + test/compiler_test.rb | 11 + 8 files changed, 793 insertions(+), 61 deletions(-) create mode 100644 lib/syntax_tree/yarv/disasm_formatter.rb diff --git a/.rubocop.yml b/.rubocop.yml index c81fdb59..daf5a824 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -16,6 +16,12 @@ Layout/LineLength: Lint/AmbiguousBlockAssociation: Enabled: false +Lint/AmbiguousOperatorPrecedence: + Enabled: false + +Lint/AmbiguousRange: + Enabled: false + Lint/BooleanSymbol: Enabled: false @@ -91,6 +97,9 @@ Style/ExplicitBlockArgument: Style/FormatString: Enabled: false +Style/FormatStringToken: + Enabled: false + Style/GuardClause: Enabled: false diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index b2ff8414..eadb485d 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -30,6 +30,7 @@ require_relative "syntax_tree/yarv" require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/compiler" +require_relative "syntax_tree/yarv/disasm_formatter" require_relative "syntax_tree/yarv/disassembler" require_relative "syntax_tree/yarv/instruction_sequence" require_relative "syntax_tree/yarv/instructions" diff --git a/lib/syntax_tree/yarv/disasm_formatter.rb b/lib/syntax_tree/yarv/disasm_formatter.rb new file mode 100644 index 00000000..566bc8fd --- /dev/null +++ b/lib/syntax_tree/yarv/disasm_formatter.rb @@ -0,0 +1,211 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + class DisasmFormatter + attr_reader :output, :queue + attr_reader :current_prefix, :current_iseq + + def initialize + @output = StringIO.new + @queue = [] + + @current_prefix = "" + @current_iseq = nil + end + + ######################################################################## + # Helpers for various instructions + ######################################################################## + + def calldata(value) + flag_names = [] + flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT) + if value.flag?(CallData::CALL_ARGS_BLOCKARG) + flag_names << :ARGS_BLOCKARG + end + flag_names << :FCALL if value.flag?(CallData::CALL_FCALL) + flag_names << :VCALL if value.flag?(CallData::CALL_VCALL) + flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE) + flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ) + flag_names << :KWARG if value.flag?(CallData::CALL_KWARG) + flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT) + flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL) + flag_names << :SUPER if value.flag?(CallData::CALL_SUPER) + flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER) + flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND) + flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT) + + parts = [] + parts << "mid:#{value.method}" if value.method + parts << "argc:#{value.argc}" + parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg + parts << flag_names.join("|") if flag_names.any? + + "" + end + + def enqueue(iseq) + queue << iseq + end + + def event(name) + case name + when :RUBY_EVENT_B_CALL + "Bc" + when :RUBY_EVENT_B_RETURN + "Br" + when :RUBY_EVENT_CALL + "Ca" + when :RUBY_EVENT_CLASS + "Cl" + when :RUBY_EVENT_END + "En" + when :RUBY_EVENT_LINE + "Li" + when :RUBY_EVENT_RETURN + "Re" + else + raise "Unknown event: #{name}" + end + end + + def inline_storage(cache) + "" + end + + def instruction(name, operands = []) + operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")] + end + + def label(value) + value.name["label_".length..] + end + + def local(index, explicit: nil, implicit: nil) + current = current_iseq + (explicit || implicit).times { current = current.parent_iseq } + + value = "#{current.local_table.name_at(index)}@#{index}" + value << ", #{explicit}" if explicit + value + end + + def object(value) + value.inspect + end + + ######################################################################## + # Main entrypoint + ######################################################################## + + def format! + while (@current_iseq = queue.shift) + output << "\n" if output.pos > 0 + format_iseq(@current_iseq) + end + + output.string + end + + private + + def format_iseq(iseq) + output << "#{current_prefix}== disasm: " + output << "#:1 " + + location = iseq.location + output << "(#{location.start_line},#{location.start_column})-" + output << "(#{location.end_line},#{location.end_column})" + output << "> " + + if iseq.catch_table.any? + output << "(catch: TRUE)\n" + output << "#{current_prefix}== catch table\n" + + with_prefix("#{current_prefix}| ") do + iseq.catch_table.each do |entry| + case entry + when InstructionSequence::CatchBreak + output << "#{current_prefix}catch type: break\n" + format_iseq(entry.iseq) + when InstructionSequence::CatchNext + output << "#{current_prefix}catch type: next\n" + when InstructionSequence::CatchRedo + output << "#{current_prefix}catch type: redo\n" + when InstructionSequence::CatchRescue + output << "#{current_prefix}catch type: rescue\n" + format_iseq(entry.iseq) + end + end + end + + output << "#{current_prefix}|#{"-" * 72}\n" + else + output << "(catch: FALSE)\n" + end + + if (local_table = iseq.local_table) && !local_table.empty? + output << "#{current_prefix}local table (size: #{local_table.size})\n" + + locals = + local_table.locals.each_with_index.map do |local, index| + "[%2d] %s@%d" % [local_table.offset(index), local.name, index] + end + + output << "#{current_prefix}#{locals.join(" ")}\n" + end + + length = 0 + events = [] + lines = [] + + iseq.insns.each do |insn| + case insn + when Integer + lines << insn + when Symbol + events << event(insn) + when InstructionSequence::Label + # skip + else + output << "#{current_prefix}%04d " % length + + disasm = insn.disasm(self) + output << disasm + + if lines.any? + output << " " * (65 - disasm.length) if disasm.length < 65 + elsif events.any? + output << " " * (39 - disasm.length) if disasm.length < 39 + end + + if lines.any? + output << "(%4d)" % lines.last + lines.clear + end + + if events.any? + output << "[#{events.join}]" + events.clear + end + + output << "\n" + length += insn.length + end + end + end + + def with_prefix(value) + previous = @current_prefix + + begin + @current_prefix = value + yield + ensure + @current_prefix = previous + end + end + end + end +end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index e3d0c2fc..ee5390a1 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -272,30 +272,9 @@ def to_a end def disasm - output = StringIO.new - output << "== disasm: #:1 (#{location.start_line},#{location.start_column})-(#{location.end_line},#{location.end_column})> (catch: FALSE)\n" - - length = 0 - events = [] - - insns.each do |insn| - case insn - when Integer - # skip - when Symbol - events << insn - when Label - # skip - else - output << "%04d " % length - output << insn.disasm(self) - output << "\n" - end - - length += insn.length - end - - output.string + formatter = DisasmFormatter.new + formatter.enqueue(self) + formatter.format! end # This method converts our linked list of instructions into a final array @@ -375,7 +354,8 @@ def specialize_instructions! when Send calldata = value.calldata - if !value.block_iseq && !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) + if !value.block_iseq && + !calldata.flag?(CallData::CALL_ARGS_BLOCKARG) # Specialize the send instruction. If it doesn't have a block # attached, then we will replace it with an opt_send_without_block # and do further specializations based on the called method and @@ -980,8 +960,11 @@ def self.from(source, options = Compiler::Options.new, parent_iseq = nil) # set up all of the instructions source[13].each do |insn| - # skip line numbers - next if insn.is_a?(Integer) + # add line numbers + if insn.is_a?(Integer) + iseq.push(insn) + next + end # add events and labels if insn.is_a?(Symbol) diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index c146bdbf..772f1bb3 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -33,25 +33,6 @@ def initialize( @kw_arg = kw_arg end - def disasm - flag_names = [] - flag_names << :ARGS_SPLAT if flag?(CALL_ARGS_SPLAT) - flag_names << :ARGS_BLOCKARG if flag?(CALL_ARGS_BLOCKARG) - flag_names << :FCALL if flag?(CALL_FCALL) - flag_names << :VCALL if flag?(CALL_VCALL) - flag_names << :ARGS_SIMPLE if flag?(CALL_ARGS_SIMPLE) - flag_names << :BLOCKISEQ if flag?(CALL_BLOCKISEQ) - flag_names << :KWARG if flag?(CALL_KWARG) - flag_names << :KW_SPLAT if flag?(CALL_KW_SPLAT) - flag_names << :TAILCALL if flag?(CALL_TAILCALL) - flag_names << :SUPER if flag?(CALL_SUPER) - flag_names << :ZSUPER if flag?(CALL_ZSUPER) - flag_names << :OPT_SEND if flag?(CALL_OPT_SEND) - flag_names << :KW_SPLAT_MUT if flag?(CALL_KW_SPLAT_MUT) - - "" - end - def flag?(mask) (flags & mask) > 0 end @@ -102,6 +83,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("adjuststack", [fmt.object(number)]) + end + def to_a(_iseq) [:adjuststack, number] end @@ -146,6 +131,10 @@ def call(vm) # ~~~ # class AnyToString + def disasm(fmt) + fmt.instruction("anytostring") + end + def to_a(_iseq) [:anytostring] end @@ -200,6 +189,10 @@ def initialize(label) @label = label end + def disasm(fmt) + fmt.instruction("branchif", [fmt.label(label)]) + end + def to_a(_iseq) [:branchif, label.name] end @@ -249,6 +242,10 @@ def initialize(label) @label = label end + def disasm(fmt) + fmt.instruction("branchnil", [fmt.label(label)]) + end + def to_a(_iseq) [:branchnil, label.name] end @@ -297,6 +294,10 @@ def initialize(label) @label = label end + def disasm(fmt) + fmt.instruction("branchunless", [fmt.label(label)]) + end + def to_a(_iseq) [:branchunless, label.name] end @@ -349,6 +350,13 @@ def initialize(keyword_bits_index, keyword_index) @keyword_index = keyword_index end + def disasm(fmt) + fmt.instruction( + "checkkeyword", + [fmt.object(keyword_bits_index), fmt.object(keyword_index)] + ) + end + def to_a(iseq) [ :checkkeyword, @@ -401,6 +409,10 @@ def initialize(type) @type = type end + def disasm(fmt) + fmt.instruction("checkmatch", [fmt.object(type)]) + end + def to_a(_iseq) [:checkmatch, type] end @@ -468,6 +480,56 @@ def initialize(type) @type = type end + def disasm(fmt) + name = + case type + when TYPE_OBJECT + "T_OBJECT" + when TYPE_CLASS + "T_CLASS" + when TYPE_MODULE + "T_MODULE" + when TYPE_FLOAT + "T_FLOAT" + when TYPE_STRING + "T_STRING" + when TYPE_REGEXP + "T_REGEXP" + when TYPE_ARRAY + "T_ARRAY" + when TYPE_HASH + "T_HASH" + when TYPE_STRUCT + "T_STRUCT" + when TYPE_BIGNUM + "T_BIGNUM" + when TYPE_FILE + "T_FILE" + when TYPE_DATA + "T_DATA" + when TYPE_MATCH + "T_MATCH" + when TYPE_COMPLEX + "T_COMPLEX" + when TYPE_RATIONAL + "T_RATIONAL" + when TYPE_NIL + "T_NIL" + when TYPE_TRUE + "T_TRUE" + when TYPE_FALSE + "T_FALSE" + when TYPE_SYMBOL + "T_SYMBOL" + when TYPE_FIXNUM + "T_FIXNUM" + when TYPE_UNDEF + "T_UNDEF" + end + + fmt.instruction("checktype", [name]) + end + def to_a(_iseq) [:checktype, type] end @@ -559,6 +621,10 @@ def call(vm) # ~~~ # class ConcatArray + def disasm(fmt) + fmt.instruction("concatarray") + end + def to_a(_iseq) [:concatarray] end @@ -607,6 +673,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("concatstrings", [fmt.object(number)]) + end + def to_a(_iseq) [:concatstrings, number] end @@ -662,6 +732,14 @@ def initialize(name, class_iseq, flags) @flags = flags end + def disasm(fmt) + fmt.enqueue(class_iseq) + fmt.instruction( + "defineclass", + [fmt.object(name), class_iseq.name, fmt.object(flags)] + ) + end + def to_a(_iseq) [:defineclass, name, class_iseq.to_a, flags] end @@ -731,6 +809,51 @@ def initialize(type, name, message) @message = message end + def disasm(fmt) + type_name = + case type + when TYPE_NIL + "nil" + when TYPE_IVAR + "ivar" + when TYPE_LVAR + "lvar" + when TYPE_GVAR + "gvar" + when TYPE_CVAR + "cvar" + when TYPE_CONST + "const" + when TYPE_METHOD + "method" + when TYPE_YIELD + "yield" + when TYPE_ZSUPER + "zsuper" + when TYPE_SELF + "self" + when TYPE_TRUE + "true" + when TYPE_FALSE + "false" + when TYPE_ASGN + "asgn" + when TYPE_EXPR + "expr" + when TYPE_REF + "ref" + when TYPE_FUNC + "func" + when TYPE_CONST_FROM + "constant-from" + end + + fmt.instruction( + "defined", + [type_name, fmt.object(name), fmt.object(message)] + ) + end + def to_a(_iseq) [:defined, type, name, message] end @@ -809,6 +932,14 @@ def initialize(method_name, method_iseq) @method_iseq = method_iseq end + def disasm(fmt) + fmt.enqueue(method_iseq) + fmt.instruction( + "definemethod", + [fmt.object(method_name), method_iseq.name] + ) + end + def to_a(_iseq) [:definemethod, method_name, method_iseq.to_a] end @@ -863,6 +994,14 @@ def initialize(method_name, method_iseq) @method_iseq = method_iseq end + def disasm(fmt) + fmt.enqueue(method_iseq) + fmt.instruction( + "definesmethod", + [fmt.object(method_name), method_iseq.name] + ) + end + def to_a(_iseq) [:definesmethod, method_name, method_iseq.to_a] end @@ -906,6 +1045,10 @@ def call(vm) # ~~~ # class Dup + def disasm(fmt) + fmt.instruction("dup") + end + def to_a(_iseq) [:dup] end @@ -948,6 +1091,10 @@ def initialize(object) @object = object end + def disasm(fmt) + fmt.instruction("duparray", [fmt.object(object)]) + end + def to_a(_iseq) [:duparray, object] end @@ -990,6 +1137,10 @@ def initialize(object) @object = object end + def disasm(fmt) + fmt.instruction("duphash", [fmt.object(object)]) + end + def to_a(_iseq) [:duphash, object] end @@ -1032,6 +1183,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("dupn", [fmt.object(number)]) + end + def to_a(_iseq) [:dupn, number] end @@ -1079,6 +1234,10 @@ def initialize(number, flags) @flags = flags end + def disasm(fmt) + fmt.instruction("expandarray", [fmt.object(number), fmt.object(flags)]) + end + def to_a(_iseq) [:expandarray, number, flags] end @@ -1129,6 +1288,10 @@ def initialize(index, level) @level = level end + def disasm(fmt) + fmt.instruction("getblockparam", [fmt.local(index, explicit: level)]) + end + def to_a(iseq) current = iseq level.times { current = iseq.parent_iseq } @@ -1179,6 +1342,13 @@ def initialize(index, level) @level = level end + def disasm(fmt) + fmt.instruction( + "getblockparamproxy", + [fmt.local(index, explicit: level)] + ) + end + def to_a(iseq) current = iseq level.times { current = iseq.parent_iseq } @@ -1226,6 +1396,13 @@ def initialize(name, cache) @cache = cache end + def disasm(fmt) + fmt.instruction( + "getclassvariable", + [fmt.object(name), fmt.inline_storage(cache)] + ) + end + def to_a(_iseq) [:getclassvariable, name, cache] end @@ -1272,6 +1449,10 @@ def initialize(name) @name = name end + def disasm(fmt) + fmt.instruction("getconstant", [fmt.object(name)]) + end + def to_a(_iseq) [:getconstant, name] end @@ -1324,6 +1505,10 @@ def initialize(name) @name = name end + def disasm(fmt) + fmt.instruction("getglobal", [fmt.object(name)]) + end + def to_a(_iseq) [:getglobal, name] end @@ -1376,6 +1561,13 @@ def initialize(name, cache) @cache = cache end + def disasm(fmt) + fmt.instruction( + "getinstancevariable", + [fmt.object(name), fmt.inline_storage(cache)] + ) + end + def to_a(_iseq) [:getinstancevariable, name, cache] end @@ -1424,6 +1616,10 @@ def initialize(index, level) @level = level end + def disasm(fmt) + fmt.instruction("getlocal", [fmt.local(index, explicit: level)]) + end + def to_a(iseq) current = iseq level.times { current = current.parent_iseq } @@ -1471,6 +1667,10 @@ def initialize(index) @index = index end + def disasm(fmt) + fmt.instruction("getlocal_WC_0", [fmt.local(index, implicit: 0)]) + end + def to_a(iseq) [:getlocal_WC_0, iseq.local_table.offset(index)] end @@ -1516,6 +1716,10 @@ def initialize(index) @index = index end + def disasm(fmt) + fmt.instruction("getlocal_WC_1", [fmt.local(index, implicit: 1)]) + end + def to_a(iseq) [:getlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] end @@ -1548,7 +1752,7 @@ def call(vm) # ### Usage # # ~~~ruby - # [true] + # 1 if (a == 1) .. (b == 2) # ~~~ # class GetSpecial @@ -1563,6 +1767,10 @@ def initialize(key, type) @type = type end + def disasm(fmt) + fmt.instruction("getspecial", [fmt.object(key), fmt.object(type)]) + end + def to_a(_iseq) [:getspecial, key, type] end @@ -1607,6 +1815,10 @@ def call(vm) # ~~~ # class Intern + def disasm(fmt) + fmt.instruction("intern") + end + def to_a(_iseq) [:intern] end @@ -1653,6 +1865,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("invokeblock", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:invokeblock, calldata.to_h] end @@ -1700,6 +1916,14 @@ def initialize(calldata, block_iseq) @block_iseq = block_iseq end + def disasm(fmt) + fmt.enqueue(block_iseq) if block_iseq + fmt.instruction( + "invokesuper", + [fmt.calldata(calldata), block_iseq&.name || "nil"] + ) + end + def to_a(_iseq) [:invokesuper, calldata.to_h, block_iseq&.to_a] end @@ -1766,6 +1990,10 @@ def initialize(label) @label = label end + def disasm(fmt) + fmt.instruction("jump", [fmt.label(label)]) + end + def to_a(_iseq) [:jump, label.name] end @@ -1802,8 +2030,8 @@ def call(vm) # ~~~ # class Leave - def disasm(_iseq) - "leave" + def disasm(fmt) + fmt.instruction("leave") end def to_a(_iseq) @@ -1852,6 +2080,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("newarray", [fmt.object(number)]) + end + def to_a(_iseq) [:newarray, number] end @@ -1896,6 +2128,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("newarraykwsplat", [fmt.object(number)]) + end + def to_a(_iseq) [:newarraykwsplat, number] end @@ -1942,6 +2178,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("newhash", [fmt.object(number)]) + end + def to_a(_iseq) [:newhash, number] end @@ -1989,6 +2229,10 @@ def initialize(exclude_end) @exclude_end = exclude_end end + def disasm(fmt) + fmt.instruction("newrange", [fmt.object(exclude_end)]) + end + def to_a(_iseq) [:newrange, exclude_end] end @@ -2026,6 +2270,10 @@ def call(vm) # ~~~ # class Nop + def disasm(fmt) + fmt.instruction("nop") + end + def to_a(_iseq) [:nop] end @@ -2071,6 +2319,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("objtostring", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:objtostring, calldata.to_h] end @@ -2117,6 +2369,11 @@ def initialize(iseq, cache) @cache = cache end + def disasm(fmt) + fmt.enqueue(iseq) + fmt.instruction("once", [iseq.name, fmt.inline_storage(cache)]) + end + def to_a(_iseq) [:once, iseq.to_a, cache] end @@ -2164,6 +2421,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_and", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_and, calldata.to_h] end @@ -2208,6 +2469,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_aref", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_aref, calldata.to_h] end @@ -2254,6 +2519,13 @@ def initialize(object, calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction( + "opt_aref_with", + [fmt.object(object), fmt.calldata(calldata)] + ) + end + def to_a(_iseq) [:opt_aref_with, object, calldata.to_h] end @@ -2299,6 +2571,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_aset", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_aset, calldata.to_h] end @@ -2344,6 +2620,13 @@ def initialize(object, calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction( + "opt_aset_with", + [fmt.object(object), fmt.calldata(calldata)] + ) + end + def to_a(_iseq) [:opt_aset_with, object, calldata.to_h] end @@ -2401,6 +2684,13 @@ def initialize(case_dispatch_hash, else_label) @else_label = else_label end + def disasm(fmt) + fmt.instruction( + "opt_case_dispatch", + ["", fmt.label(else_label)] + ) + end + def to_a(_iseq) [ :opt_case_dispatch, @@ -2450,6 +2740,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_div", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_div, calldata.to_h] end @@ -2494,6 +2788,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_empty_p", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_empty_p, calldata.to_h] end @@ -2539,6 +2837,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_eq", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_eq, calldata.to_h] end @@ -2584,6 +2886,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_ge", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_ge, calldata.to_h] end @@ -2628,6 +2934,11 @@ def initialize(names) @names = names end + def disasm(fmt) + cache = "" + fmt.instruction("opt_getconstant_path", [cache]) + end + def to_a(_iseq) [:opt_getconstant_path, names] end @@ -2680,6 +2991,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_gt", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_gt, calldata.to_h] end @@ -2725,6 +3040,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_le", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_le, calldata.to_h] end @@ -2770,6 +3089,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_length", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_length, calldata.to_h] end @@ -2815,6 +3138,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_lt", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_lt, calldata.to_h] end @@ -2860,6 +3187,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_ltlt", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_ltlt, calldata.to_h] end @@ -2906,6 +3237,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_minus", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_minus, calldata.to_h] end @@ -2951,6 +3286,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_mod", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_mod, calldata.to_h] end @@ -2996,8 +3335,8 @@ def initialize(calldata) @calldata = calldata end - def disasm(_iseq) - "%-38s %s" % ["opt_mult", calldata.disasm] + def disasm(fmt) + fmt.instruction("opt_mult", [fmt.calldata(calldata)]) end def to_a(_iseq) @@ -3048,6 +3387,13 @@ def initialize(eq_calldata, neq_calldata) @neq_calldata = neq_calldata end + def disasm(fmt) + fmt.instruction( + "opt_neq", + [fmt.calldata(eq_calldata), fmt.calldata(neq_calldata)] + ) + end + def to_a(_iseq) [:opt_neq, eq_calldata.to_h, neq_calldata.to_h] end @@ -3083,7 +3429,7 @@ def call(vm) # ### Usage # # ~~~ruby - # [1, 2, 3].max + # [a, b, c].max # ~~~ # class OptNewArrayMax @@ -3093,6 +3439,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("opt_newarray_max", [fmt.object(number)]) + end + def to_a(_iseq) [:opt_newarray_max, number] end @@ -3127,7 +3477,7 @@ def call(vm) # ### Usage # # ~~~ruby - # [1, 2, 3].min + # [a, b, c].min # ~~~ # class OptNewArrayMin @@ -3137,6 +3487,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("opt_newarray_min", [fmt.object(number)]) + end + def to_a(_iseq) [:opt_newarray_min, number] end @@ -3182,6 +3536,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_nil_p", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_nil_p, calldata.to_h] end @@ -3225,6 +3583,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_not", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_not, calldata.to_h] end @@ -3270,6 +3632,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_or", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_or, calldata.to_h] end @@ -3315,8 +3681,8 @@ def initialize(calldata) @calldata = calldata end - def disasm(iseq) - "%-38s %s" % ["opt_plus", calldata.disasm] + def disasm(fmt) + fmt.instruction("opt_plus", [fmt.calldata(calldata)]) end def to_a(_iseq) @@ -3363,6 +3729,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_regexpmatch2", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_regexpmatch2, calldata.to_h] end @@ -3407,6 +3777,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_send_without_block", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_send_without_block, calldata.to_h] end @@ -3452,6 +3826,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_size", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_size, calldata.to_h] end @@ -3497,6 +3875,13 @@ def initialize(object, calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction( + "opt_str_freeze", + [fmt.object(object), fmt.calldata(calldata)] + ) + end + def to_a(_iseq) [:opt_str_freeze, object, calldata.to_h] end @@ -3542,6 +3927,13 @@ def initialize(object, calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction( + "opt_str_uminus", + [fmt.object(object), fmt.calldata(calldata)] + ) + end + def to_a(_iseq) [:opt_str_uminus, object, calldata.to_h] end @@ -3587,6 +3979,10 @@ def initialize(calldata) @calldata = calldata end + def disasm(fmt) + fmt.instruction("opt_succ", [fmt.calldata(calldata)]) + end + def to_a(_iseq) [:opt_succ, calldata.to_h] end @@ -3623,6 +4019,10 @@ def call(vm) # ~~~ # class Pop + def disasm(fmt) + fmt.instruction("pop") + end + def to_a(_iseq) [:pop] end @@ -3659,6 +4059,10 @@ def call(vm) # ~~~ # class PutNil + def disasm(fmt) + fmt.instruction("putnil") + end + def to_a(_iseq) [:putnil] end @@ -3701,8 +4105,8 @@ def initialize(object) @object = object end - def disasm(_iseq) - "%-38s %s" % ["putobject", object.inspect] + def disasm(fmt) + fmt.instruction("putobject", [fmt.object(object)]) end def to_a(_iseq) @@ -3743,8 +4147,8 @@ def call(vm) # ~~~ # class PutObjectInt2Fix0 - def disasm(_iseq) - "putobject_INT2FIX_0_" + def disasm(fmt) + fmt.instruction("putobject_INT2FIX_0_") end def to_a(_iseq) @@ -3785,8 +4189,8 @@ def call(vm) # ~~~ # class PutObjectInt2Fix1 - def disasm(_iseq) - "putobject_INT2FIX_1_" + def disasm(fmt) + fmt.instruction("putobject_INT2FIX_1_") end def to_a(_iseq) @@ -3825,6 +4229,10 @@ def call(vm) # ~~~ # class PutSelf + def disasm(fmt) + fmt.instruction("putself") + end + def to_a(_iseq) [:putself] end @@ -3873,6 +4281,10 @@ def initialize(object) @object = object end + def disasm(fmt) + fmt.instruction("putspecialobject", [fmt.object(object)]) + end + def to_a(_iseq) [:putspecialobject, object] end @@ -3924,6 +4336,10 @@ def initialize(object) @object = object end + def disasm(fmt) + fmt.instruction("putstring", [fmt.object(object)]) + end + def to_a(_iseq) [:putstring, object] end @@ -3970,6 +4386,14 @@ def initialize(calldata, block_iseq) @block_iseq = block_iseq end + def disasm(fmt) + fmt.enqueue(block_iseq) if block_iseq + fmt.instruction( + "send", + [fmt.calldata(calldata), block_iseq&.name || "nil"] + ) + end + def to_a(_iseq) [:send, calldata.to_h, block_iseq&.to_a] end @@ -4038,6 +4462,10 @@ def initialize(index, level) @level = level end + def disasm(fmt) + fmt.instruction("setblockparam", [fmt.local(index, explicit: level)]) + end + def to_a(iseq) current = iseq level.times { current = current.parent_iseq } @@ -4086,6 +4514,13 @@ def initialize(name, cache) @cache = cache end + def disasm(fmt) + fmt.instruction( + "setclassvariable", + [fmt.object(name), fmt.inline_storage(cache)] + ) + end + def to_a(_iseq) [:setclassvariable, name, cache] end @@ -4131,6 +4566,10 @@ def initialize(name) @name = name end + def disasm(fmt) + fmt.instruction("setconstant", [fmt.object(name)]) + end + def to_a(_iseq) [:setconstant, name] end @@ -4175,6 +4614,10 @@ def initialize(name) @name = name end + def disasm(fmt) + fmt.instruction("setglobal", [fmt.object(name)]) + end + def to_a(_iseq) [:setglobal, name] end @@ -4226,6 +4669,13 @@ def initialize(name, cache) @cache = cache end + def disasm(fmt) + fmt.instruction( + "setinstancevariable", + [fmt.object(name), fmt.inline_storage(cache)] + ) + end + def to_a(_iseq) [:setinstancevariable, name, cache] end @@ -4274,6 +4724,10 @@ def initialize(index, level) @level = level end + def disasm(fmt) + fmt.instruction("setlocal", [fmt.local(index, explicit: level)]) + end + def to_a(iseq) current = iseq level.times { current = current.parent_iseq } @@ -4321,6 +4775,10 @@ def initialize(index) @index = index end + def disasm(fmt) + fmt.instruction("setlocal_WC_0", [fmt.local(index, implicit: 0)]) + end + def to_a(iseq) [:setlocal_WC_0, iseq.local_table.offset(index)] end @@ -4366,6 +4824,10 @@ def initialize(index) @index = index end + def disasm(fmt) + fmt.instruction("setlocal_WC_1", [fmt.local(index, implicit: 1)]) + end + def to_a(iseq) [:setlocal_WC_1, iseq.parent_iseq.local_table.offset(index)] end @@ -4409,6 +4871,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("setn", [fmt.object(number)]) + end + def to_a(_iseq) [:setn, number] end @@ -4453,6 +4919,10 @@ def initialize(key) @key = key end + def disasm(fmt) + fmt.instruction("setspecial", [fmt.object(key)]) + end + def to_a(_iseq) [:setspecial, key] end @@ -4504,6 +4974,10 @@ def initialize(flag) @flag = flag end + def disasm(fmt) + fmt.instruction("splatarray", [fmt.object(flag)]) + end + def to_a(_iseq) [:splatarray, flag] end @@ -4544,6 +5018,10 @@ def call(vm) # ~~~ # class Swap + def disasm(fmt) + fmt.instruction("swap") + end + def to_a(_iseq) [:swap] end @@ -4599,6 +5077,10 @@ def initialize(type) @type = type end + def disasm(fmt) + fmt.instruction("throw", [fmt.object(type)]) + end + def to_a(_iseq) [:throw, type] end @@ -4645,6 +5127,10 @@ def initialize(number) @number = number end + def disasm(fmt) + fmt.instruction("topn", [fmt.object(number)]) + end + def to_a(_iseq) [:topn, number] end @@ -4689,6 +5175,10 @@ def initialize(options, length) @length = length end + def disasm(fmt) + fmt.instruction("toregexp", [fmt.object(options), fmt.object(length)]) + end + def to_a(_iseq) [:toregexp, options, length] end diff --git a/lib/syntax_tree/yarv/legacy.rb b/lib/syntax_tree/yarv/legacy.rb index 93c4e4c3..30a95437 100644 --- a/lib/syntax_tree/yarv/legacy.rb +++ b/lib/syntax_tree/yarv/legacy.rb @@ -26,6 +26,10 @@ def initialize(name) @name = name end + def disasm(fmt) + fmt.instruction("getclassvariable", [fmt.object(name)]) + end + def to_a(_iseq) [:getclassvariable, name] end @@ -67,6 +71,13 @@ def initialize(label, cache) @cache = cache end + def disasm(fmt) + fmt.instruction( + "opt_getinlinecache", + [fmt.label(label), fmt.inline_storage(cache)] + ) + end + def to_a(_iseq) [:opt_getinlinecache, label.name, cache] end @@ -110,6 +121,10 @@ def initialize(cache) @cache = cache end + def disasm(fmt) + fmt.instruction("opt_setinlinecache", [fmt.inline_storage(cache)]) + end + def to_a(_iseq) [:opt_setinlinecache, cache] end @@ -152,6 +167,10 @@ def initialize(name) @name = name end + def disasm(fmt) + fmt.instruction("setclassvariable", [fmt.object(name)]) + end + def to_a(_iseq) [:setclassvariable, name] end diff --git a/lib/syntax_tree/yarv/local_table.rb b/lib/syntax_tree/yarv/local_table.rb index 5eac346c..54cc55ad 100644 --- a/lib/syntax_tree/yarv/local_table.rb +++ b/lib/syntax_tree/yarv/local_table.rb @@ -44,6 +44,10 @@ def initialize @locals = [] end + def empty? + locals.empty? + end + def find(name, level = 0) index = locals.index { |local| local.name == name } Lookup.new(locals[index], index, level) if index @@ -57,6 +61,10 @@ def names locals.map(&:name) end + def name_at(index) + locals[index].name + end + def size locals.length end diff --git a/test/compiler_test.rb b/test/compiler_test.rb index 1f4a5299..1922f8c6 100644 --- a/test/compiler_test.rb +++ b/test/compiler_test.rb @@ -449,6 +449,10 @@ class CompilerTest < Minitest::Test define_method(:"test_loads_#{source}_(#{suffix})") do assert_loads(source, options) end + + define_method(:"test_disasms_#{source}_(#{suffix})") do + assert_disasms(source, options) + end end end @@ -507,6 +511,13 @@ def assert_loads(source, options) ) end + # Check that we can successfully disasm the compiled instruction sequence. + def assert_disasms(source, options) + compiled = RubyVM::InstructionSequence.compile(source, **options) + yarv = YARV::InstructionSequence.from(compiled.to_a, options) + assert_kind_of String, yarv.disasm + end + def assert_evaluates(expected, source) assert_equal expected, YARV.compile(source).eval end From 67463fb645bfa2df195662df583d366e4a220fd7 Mon Sep 17 00:00:00 2001 From: Andy Waite Date: Wed, 7 Dec 2022 13:31:52 -0500 Subject: [PATCH 082/104] Some README typo fixes --- README.md | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index 0f1b626a..0d9e8856 100644 --- a/README.md +++ b/README.md @@ -328,7 +328,7 @@ Syntax Tree can be used as a library to access the syntax tree underlying Ruby s ### SyntaxTree.read(filepath) -This function takes a filepath and returns a string associated with the content of that file. It is similar in functionality to `File.read`, except htat it takes into account Ruby-level file encoding (through magic comments at the top of the file). +This function takes a filepath and returns a string associated with the content of that file. It is similar in functionality to `File.read`, except that it takes into account Ruby-level file encoding (through magic comments at the top of the file). ### SyntaxTree.parse(source) @@ -570,7 +570,7 @@ SyntaxTree::Formatter.format(source, program.accept(visitor)) ### WithEnvironment The `WithEnvironment` module can be included in visitors to automatically keep track of local variables and arguments -defined inside each environment. A `current_environment` accessor is made availble to the request, allowing it to find +defined inside each environment. A `current_environment` accessor is made available to the request, allowing it to find all usages and definitions of a local. ```ruby @@ -611,7 +611,7 @@ The language server also responds to the relatively new inlay hints request. Thi 1 + 2 * 3 ``` -Implicity, the `2 * 3` is going to be executed first because the `*` operator has higher precedence than the `+` operator. To ease mental overhead, our language server includes small parentheses to make this explicit, as in: +Implicitly, the `2 * 3` is going to be executed first because the `*` operator has higher precedence than the `+` operator. To ease mental overhead, our language server includes small parentheses to make this explicit, as in: ```ruby 1 + ₍2 * 3₎ @@ -686,7 +686,7 @@ Below are listed all of the "official" language plugins hosted under the same Gi ## Integration -Syntax Tree's goal is to seemlessly integrate into your workflow. To this end, it provides a couple of additional tools beyond the CLI and the Ruby library. +Syntax Tree's goal is to seamlessly integrate into your workflow. To this end, it provides a couple of additional tools beyond the CLI and the Ruby library. ### Rake From ad671c49fea40240fc25c49546acc7f1f0b0945f Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 7 Dec 2022 22:24:36 -0500 Subject: [PATCH 083/104] Ruby 3.2 argument forwarding --- lib/syntax_tree/yarv/compiler.rb | 54 ++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 10 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 3ea6d22a..046fb438 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -650,14 +650,36 @@ def visit_call(node) flag |= CallData::CALL_ARGS_SPLAT visit(arg_part) when ArgsForward - flag |= CallData::CALL_ARGS_SPLAT - flag |= CallData::CALL_ARGS_BLOCKARG flag |= CallData::CALL_TAILCALL if options.tailcall_optimization? - lookup = iseq.local_table.find(:*) - iseq.getlocal(lookup.index, lookup.level) - iseq.splatarray(arg_parts.length != 1) + if RUBY_VERSION < "3.2" + flag |= CallData::CALL_ARGS_SPLAT + lookup = iseq.local_table.find(:*) + iseq.getlocal(lookup.index, lookup.level) + iseq.splatarray(arg_parts.length != 1) + else + flag |= CallData::CALL_ARGS_SPLAT + lookup = iseq.local_table.find(:*) + iseq.getlocal(lookup.index, lookup.level) + iseq.splatarray(true) + + flag |= CallData::CALL_KW_SPLAT + iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) + iseq.newhash(0) + lookup = iseq.local_table.find(:**) + iseq.getlocal(lookup.index, lookup.level) + iseq.send( + YARV.calldata( + :"core#hash_merge_kwd", + 2, + CallData::CALL_ARGS_SIMPLE + ) + ) + iseq.newarray(1) + iseq.concatarray + end + flag |= CallData::CALL_ARGS_BLOCKARG lookup = iseq.local_table.find(:&) iseq.getblockparamproxy(lookup.index, lookup.level) when BareAssocHash @@ -1304,13 +1326,25 @@ def visit_params(node) end if node.keyword_rest.is_a?(ArgsForward) - iseq.local_table.plain(:*) - iseq.local_table.plain(:&) + if RUBY_VERSION >= "3.2" + iseq.local_table.plain(:*) + iseq.local_table.plain(:**) + iseq.local_table.plain(:&) + + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_options[:block_start] = iseq.argument_size + 2 + iseq.argument_options[:kwrest] = iseq.argument_size + 1 - iseq.argument_options[:rest_start] = iseq.argument_size - iseq.argument_options[:block_start] = iseq.argument_size + 1 + iseq.argument_size += 3 + else + iseq.local_table.plain(:*) + iseq.local_table.plain(:&) + + iseq.argument_options[:rest_start] = iseq.argument_size + iseq.argument_options[:block_start] = iseq.argument_size + 1 - iseq.argument_size += 2 + iseq.argument_size += 2 + end elsif node.keyword_rest visit(node.keyword_rest) end From 8dcb19cafcb17f58ede60cd0b08c3bed24df9f49 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 8 Dec 2022 17:07:06 +0000 Subject: [PATCH 084/104] Bump rubocop from 1.39.0 to 1.40.0 Bumps [rubocop](https://github.com/rubocop/rubocop) from 1.39.0 to 1.40.0. - [Release notes](https://github.com/rubocop/rubocop/releases) - [Changelog](https://github.com/rubocop/rubocop/blob/master/CHANGELOG.md) - [Commits](https://github.com/rubocop/rubocop/compare/v1.39.0...v1.40.0) --- updated-dependencies: - dependency-name: rubocop dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Gemfile.lock | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index 0e81e5ff..05e482bf 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -9,17 +9,17 @@ GEM specs: ast (2.4.2) docile (1.4.0) - json (2.6.2) + json (2.6.3) minitest (5.16.3) parallel (1.22.1) - parser (3.1.2.1) + parser (3.1.3.0) ast (~> 2.4.1) prettier_print (1.1.0) rainbow (3.1.1) rake (13.0.6) - regexp_parser (2.6.0) + regexp_parser (2.6.1) rexml (3.2.5) - rubocop (1.39.0) + rubocop (1.40.0) json (~> 2.3) parallel (~> 1.10) parser (>= 3.1.2.1) @@ -29,7 +29,7 @@ GEM rubocop-ast (>= 1.23.0, < 2.0) ruby-progressbar (~> 1.7) unicode-display_width (>= 1.4.0, < 3.0) - rubocop-ast (1.23.0) + rubocop-ast (1.24.0) parser (>= 3.1.1.0) ruby-progressbar (1.11.0) simplecov (0.21.2) From 24b62e256b57271b6c21c62bb0f0fb509fae2442 Mon Sep 17 00:00:00 2001 From: Andy Waite Date: Mon, 12 Dec 2022 15:42:43 -0500 Subject: [PATCH 085/104] Add link to docs --- README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/README.md b/README.md index 0f1b626a..70050619 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,8 @@ Syntax Tree is a suite of tools built on top of the internal CRuby parser. It pr It is built with only standard library dependencies. It additionally ships with a plugin system so that you can build your own syntax trees from other languages and incorporate these tools. +[RDoc Documentation](https://ruby-syntax-tree.github.io/syntax_tree/) + - [Installation](#installation) - [CLI](#cli) - [ast](#ast) From 9136254c32f726f17ccf7c60a92afcc7a3e6b621 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Tue, 13 Dec 2022 22:54:45 -0500 Subject: [PATCH 086/104] Move documentation --- README.md | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index fb3556c9..7a943ca8 100644 --- a/README.md +++ b/README.md @@ -11,8 +11,6 @@ Syntax Tree is a suite of tools built on top of the internal CRuby parser. It pr It is built with only standard library dependencies. It additionally ships with a plugin system so that you can build your own syntax trees from other languages and incorporate these tools. -[RDoc Documentation](https://ruby-syntax-tree.github.io/syntax_tree/) - - [Installation](#installation) - [CLI](#cli) - [ast](#ast) @@ -326,7 +324,7 @@ stree write "**/{[!schema]*,*}.rb" ## Library -Syntax Tree can be used as a library to access the syntax tree underlying Ruby source code. +Syntax Tree can be used as a library to access the syntax tree underlying Ruby source code. The API is described below. For the full library documentation, see the [RDoc documentation](https://ruby-syntax-tree.github.io/syntax_tree/). ### SyntaxTree.read(filepath) From aeafc84aae49687ea2607dfad648a41132f913cb Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 14 Dec 2022 15:09:18 -0500 Subject: [PATCH 087/104] Rename YARV classes for consistency --- lib/syntax_tree.rb | 2 +- lib/syntax_tree/yarv/decompiler.rb | 254 ++++++++++++ lib/syntax_tree/yarv/disasm_formatter.rb | 211 ---------- lib/syntax_tree/yarv/disassembler.rb | 389 +++++++++---------- lib/syntax_tree/yarv/instruction_sequence.rb | 6 +- test/yarv_test.rb | 8 +- 6 files changed, 435 insertions(+), 435 deletions(-) create mode 100644 lib/syntax_tree/yarv/decompiler.rb delete mode 100644 lib/syntax_tree/yarv/disasm_formatter.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index eadb485d..2e2d2a42 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -30,7 +30,7 @@ require_relative "syntax_tree/yarv" require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/compiler" -require_relative "syntax_tree/yarv/disasm_formatter" +require_relative "syntax_tree/yarv/decompiler" require_relative "syntax_tree/yarv/disassembler" require_relative "syntax_tree/yarv/instruction_sequence" require_relative "syntax_tree/yarv/instructions" diff --git a/lib/syntax_tree/yarv/decompiler.rb b/lib/syntax_tree/yarv/decompiler.rb new file mode 100644 index 00000000..a6a567fb --- /dev/null +++ b/lib/syntax_tree/yarv/decompiler.rb @@ -0,0 +1,254 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + # This class is responsible for taking a compiled instruction sequence and + # walking through it to generate equivalent Ruby code. + class Decompiler + # When we're decompiling, we use a looped case statement to emulate + # jumping around in the same way the virtual machine would. This class + # provides convenience methods for generating the AST nodes that have to + # do with that label. + class BlockLabel + include DSL + attr_reader :name + + def initialize(name) + @name = name + end + + def field + VarField(Ident(name)) + end + + def ref + VarRef(Ident(name)) + end + end + + include DSL + attr_reader :iseq, :block_label + + def initialize(iseq) + @iseq = iseq + @block_label = BlockLabel.new("__block_label") + end + + def to_ruby + Program(decompile(iseq)) + end + + private + + def node_for(value) + case value + when Integer + Int(value.to_s) + when Symbol + SymbolLiteral(Ident(value.to_s)) + end + end + + def decompile(iseq) + label = :label_0 + clauses = {} + clause = [] + + iseq.insns.each do |insn| + case insn + when InstructionSequence::Label + unless clause.last.is_a?(Next) + clause << Assign(block_label.field, node_for(insn.name)) + end + + clauses[label] = clause + clause = [] + label = insn.name + when BranchUnless + body = [ + Assign(block_label.field, node_for(insn.label.name)), + Next(Args([])) + ] + + clause << IfNode(clause.pop, Statements(body), nil) + when Dup + clause << clause.last + when DupHash + assocs = + insn.object.map do |key, value| + Assoc(node_for(key), node_for(value)) + end + + clause << HashLiteral(LBrace("{"), assocs) + when GetGlobal + clause << VarRef(GVar(insn.name.to_s)) + when GetLocalWC0 + local = iseq.local_table.locals[insn.index] + clause << VarRef(Ident(local.name.to_s)) + when Jump + clause << Assign(block_label.field, node_for(insn.label.name)) + clause << Next(Args([])) + when Leave + value = Args([clause.pop]) + clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) + when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT, + OptMinus, OptMod, OptMult, OptOr, OptPlus + left, right = clause.pop(2) + clause << Binary(left, insn.calldata.method, right) + when OptAref + collection, arg = clause.pop(2) + clause << ARef(collection, Args([arg])) + when OptAset + collection, arg, value = clause.pop(3) + + clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && + collection === value.left.collection && + arg === value.left.index.parts[0] + OpAssign( + ARefField(collection, Args([arg])), + Op("#{value.operator}="), + value.right + ) + else + Assign(ARefField(collection, Args([arg])), value) + end + when OptNEq + left, right = clause.pop(2) + clause << Binary(left, :"!=", right) + when OptSendWithoutBlock + method = insn.calldata.method.to_s + argc = insn.calldata.argc + + if insn.calldata.flag?(CallData::CALL_FCALL) + if argc == 0 + clause.pop + clause << CallNode(nil, nil, Ident(method), Args([])) + elsif argc == 1 && method.end_with?("=") + _receiver, argument = clause.pop(2) + clause << Assign( + CallNode(nil, nil, Ident(method[0..-2]), nil), + argument + ) + else + _receiver, *arguments = clause.pop(argc + 1) + clause << CallNode( + nil, + nil, + Ident(method), + ArgParen(Args(arguments)) + ) + end + else + if argc == 0 + clause << CallNode(clause.pop, Period("."), Ident(method), nil) + elsif argc == 1 && method.end_with?("=") + receiver, argument = clause.pop(2) + clause << Assign( + CallNode(receiver, Period("."), Ident(method[0..-2]), nil), + argument + ) + else + receiver, *arguments = clause.pop(argc + 1) + clause << CallNode( + receiver, + Period("."), + Ident(method), + ArgParen(Args(arguments)) + ) + end + end + when PutObject + case insn.object + when Float + clause << FloatLiteral(insn.object.inspect) + when Integer + clause << Int(insn.object.inspect) + else + raise "Unknown object type: #{insn.object.class.name}" + end + when PutObjectInt2Fix0 + clause << Int("0") + when PutObjectInt2Fix1 + clause << Int("1") + when PutSelf + clause << VarRef(Kw("self")) + when SetGlobal + target = GVar(insn.name.to_s) + value = clause.pop + + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign(VarField(target), Op("#{value.operator}="), value.right) + else + Assign(VarField(target), value) + end + when SetLocalWC0 + target = Ident(local_name(insn.index, 0)) + value = clause.pop + + clause << if value.is_a?(Binary) && VarRef(target) === value.left + OpAssign(VarField(target), Op("#{value.operator}="), value.right) + else + Assign(VarField(target), value) + end + else + raise "Unknown instruction #{insn}" + end + end + + # If there's only one clause, then we don't need a case statement, and + # we can just disassemble the first clause. + clauses[label] = clause + return Statements(clauses.values.first) if clauses.size == 1 + + # Here we're going to build up a big case statement that will handle all + # of the different labels. + current = nil + clauses.reverse_each do |current_label, current_clause| + current = + When( + Args([node_for(current_label)]), + Statements(current_clause), + current + ) + end + switch = Case(Kw("case"), block_label.ref, current) + + # Here we're going to make sure that any locals that were established in + # the label_0 block are initialized so that scoping rules work + # correctly. + stack = [] + locals = [block_label.name] + + clauses[:label_0].each do |node| + if node.is_a?(Assign) && node.target.is_a?(VarField) && + node.target.value.is_a?(Ident) + value = node.target.value.value + next if locals.include?(value) + + stack << Assign(node.target, VarRef(Kw("nil"))) + locals << value + end + end + + # Finally, we'll set up the initial label and loop the entire case + # statement. + stack << Assign(block_label.field, node_for(:label_0)) + stack << MethodAddBlock( + CallNode(nil, nil, Ident("loop"), Args([])), + BlockNode( + Kw("do"), + nil, + BodyStmt(Statements([switch]), nil, nil, nil, nil) + ) + ) + Statements(stack) + end + + def local_name(index, level) + current = iseq + level.times { current = current.parent_iseq } + current.local_table.locals[index].name.to_s + end + end + end +end diff --git a/lib/syntax_tree/yarv/disasm_formatter.rb b/lib/syntax_tree/yarv/disasm_formatter.rb deleted file mode 100644 index 566bc8fd..00000000 --- a/lib/syntax_tree/yarv/disasm_formatter.rb +++ /dev/null @@ -1,211 +0,0 @@ -# frozen_string_literal: true - -module SyntaxTree - module YARV - class DisasmFormatter - attr_reader :output, :queue - attr_reader :current_prefix, :current_iseq - - def initialize - @output = StringIO.new - @queue = [] - - @current_prefix = "" - @current_iseq = nil - end - - ######################################################################## - # Helpers for various instructions - ######################################################################## - - def calldata(value) - flag_names = [] - flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT) - if value.flag?(CallData::CALL_ARGS_BLOCKARG) - flag_names << :ARGS_BLOCKARG - end - flag_names << :FCALL if value.flag?(CallData::CALL_FCALL) - flag_names << :VCALL if value.flag?(CallData::CALL_VCALL) - flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE) - flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ) - flag_names << :KWARG if value.flag?(CallData::CALL_KWARG) - flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT) - flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL) - flag_names << :SUPER if value.flag?(CallData::CALL_SUPER) - flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER) - flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND) - flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT) - - parts = [] - parts << "mid:#{value.method}" if value.method - parts << "argc:#{value.argc}" - parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg - parts << flag_names.join("|") if flag_names.any? - - "" - end - - def enqueue(iseq) - queue << iseq - end - - def event(name) - case name - when :RUBY_EVENT_B_CALL - "Bc" - when :RUBY_EVENT_B_RETURN - "Br" - when :RUBY_EVENT_CALL - "Ca" - when :RUBY_EVENT_CLASS - "Cl" - when :RUBY_EVENT_END - "En" - when :RUBY_EVENT_LINE - "Li" - when :RUBY_EVENT_RETURN - "Re" - else - raise "Unknown event: #{name}" - end - end - - def inline_storage(cache) - "" - end - - def instruction(name, operands = []) - operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")] - end - - def label(value) - value.name["label_".length..] - end - - def local(index, explicit: nil, implicit: nil) - current = current_iseq - (explicit || implicit).times { current = current.parent_iseq } - - value = "#{current.local_table.name_at(index)}@#{index}" - value << ", #{explicit}" if explicit - value - end - - def object(value) - value.inspect - end - - ######################################################################## - # Main entrypoint - ######################################################################## - - def format! - while (@current_iseq = queue.shift) - output << "\n" if output.pos > 0 - format_iseq(@current_iseq) - end - - output.string - end - - private - - def format_iseq(iseq) - output << "#{current_prefix}== disasm: " - output << "#:1 " - - location = iseq.location - output << "(#{location.start_line},#{location.start_column})-" - output << "(#{location.end_line},#{location.end_column})" - output << "> " - - if iseq.catch_table.any? - output << "(catch: TRUE)\n" - output << "#{current_prefix}== catch table\n" - - with_prefix("#{current_prefix}| ") do - iseq.catch_table.each do |entry| - case entry - when InstructionSequence::CatchBreak - output << "#{current_prefix}catch type: break\n" - format_iseq(entry.iseq) - when InstructionSequence::CatchNext - output << "#{current_prefix}catch type: next\n" - when InstructionSequence::CatchRedo - output << "#{current_prefix}catch type: redo\n" - when InstructionSequence::CatchRescue - output << "#{current_prefix}catch type: rescue\n" - format_iseq(entry.iseq) - end - end - end - - output << "#{current_prefix}|#{"-" * 72}\n" - else - output << "(catch: FALSE)\n" - end - - if (local_table = iseq.local_table) && !local_table.empty? - output << "#{current_prefix}local table (size: #{local_table.size})\n" - - locals = - local_table.locals.each_with_index.map do |local, index| - "[%2d] %s@%d" % [local_table.offset(index), local.name, index] - end - - output << "#{current_prefix}#{locals.join(" ")}\n" - end - - length = 0 - events = [] - lines = [] - - iseq.insns.each do |insn| - case insn - when Integer - lines << insn - when Symbol - events << event(insn) - when InstructionSequence::Label - # skip - else - output << "#{current_prefix}%04d " % length - - disasm = insn.disasm(self) - output << disasm - - if lines.any? - output << " " * (65 - disasm.length) if disasm.length < 65 - elsif events.any? - output << " " * (39 - disasm.length) if disasm.length < 39 - end - - if lines.any? - output << "(%4d)" % lines.last - lines.clear - end - - if events.any? - output << "[#{events.join}]" - events.clear - end - - output << "\n" - length += insn.length - end - end - end - - def with_prefix(value) - previous = @current_prefix - - begin - @current_prefix = value - yield - ensure - @current_prefix = previous - end - end - end - end -end diff --git a/lib/syntax_tree/yarv/disassembler.rb b/lib/syntax_tree/yarv/disassembler.rb index af325c31..033b6d3d 100644 --- a/lib/syntax_tree/yarv/disassembler.rb +++ b/lib/syntax_tree/yarv/disassembler.rb @@ -2,252 +2,209 @@ module SyntaxTree module YARV - # This class is responsible for taking a compiled instruction sequence and - # walking through it to generate equivalent Ruby code. class Disassembler - # When we're disassmebling, we use a looped case statement to emulate - # jumping around in the same way the virtual machine would. This class - # provides convenience methods for generating the AST nodes that have to - # do with that label. - class DisasmLabel - include DSL - attr_reader :name - - def initialize(name) - @name = name - end + attr_reader :output, :queue + attr_reader :current_prefix, :current_iseq + + def initialize + @output = StringIO.new + @queue = [] + + @current_prefix = "" + @current_iseq = nil + end + + ######################################################################## + # Helpers for various instructions + ######################################################################## - def field - VarField(Ident(name)) + def calldata(value) + flag_names = [] + flag_names << :ARGS_SPLAT if value.flag?(CallData::CALL_ARGS_SPLAT) + if value.flag?(CallData::CALL_ARGS_BLOCKARG) + flag_names << :ARGS_BLOCKARG end + flag_names << :FCALL if value.flag?(CallData::CALL_FCALL) + flag_names << :VCALL if value.flag?(CallData::CALL_VCALL) + flag_names << :ARGS_SIMPLE if value.flag?(CallData::CALL_ARGS_SIMPLE) + flag_names << :BLOCKISEQ if value.flag?(CallData::CALL_BLOCKISEQ) + flag_names << :KWARG if value.flag?(CallData::CALL_KWARG) + flag_names << :KW_SPLAT if value.flag?(CallData::CALL_KW_SPLAT) + flag_names << :TAILCALL if value.flag?(CallData::CALL_TAILCALL) + flag_names << :SUPER if value.flag?(CallData::CALL_SUPER) + flag_names << :ZSUPER if value.flag?(CallData::CALL_ZSUPER) + flag_names << :OPT_SEND if value.flag?(CallData::CALL_OPT_SEND) + flag_names << :KW_SPLAT_MUT if value.flag?(CallData::CALL_KW_SPLAT_MUT) + + parts = [] + parts << "mid:#{value.method}" if value.method + parts << "argc:#{value.argc}" + parts << "kw:[#{value.kw_arg.join(", ")}]" if value.kw_arg + parts << flag_names.join("|") if flag_names.any? + + "" + end - def ref - VarRef(Ident(name)) + def enqueue(iseq) + queue << iseq + end + + def event(name) + case name + when :RUBY_EVENT_B_CALL + "Bc" + when :RUBY_EVENT_B_RETURN + "Br" + when :RUBY_EVENT_CALL + "Ca" + when :RUBY_EVENT_CLASS + "Cl" + when :RUBY_EVENT_END + "En" + when :RUBY_EVENT_LINE + "Li" + when :RUBY_EVENT_RETURN + "Re" + else + raise "Unknown event: #{name}" end end - include DSL - attr_reader :iseq, :disasm_label + def inline_storage(cache) + "" + end - def initialize(iseq) - @iseq = iseq - @disasm_label = DisasmLabel.new("__disasm_label") + def instruction(name, operands = []) + operands.empty? ? name : "%-38s %s" % [name, operands.join(", ")] end - def to_ruby - Program(disassemble(iseq)) + def label(value) + value.name["label_".length..] end - private + def local(index, explicit: nil, implicit: nil) + current = current_iseq + (explicit || implicit).times { current = current.parent_iseq } + + value = "#{current.local_table.name_at(index)}@#{index}" + value << ", #{explicit}" if explicit + value + end - def node_for(value) - case value - when Integer - Int(value.to_s) - when Symbol - SymbolLiteral(Ident(value.to_s)) + def object(value) + value.inspect + end + + ######################################################################## + # Main entrypoint + ######################################################################## + + def format! + while (@current_iseq = queue.shift) + output << "\n" if output.pos > 0 + format_iseq(@current_iseq) end + + output.string end - def disassemble(iseq) - label = :label_0 - clauses = {} - clause = [] + private + + def format_iseq(iseq) + output << "#{current_prefix}== disasm: " + output << "#:1 " + + location = iseq.location + output << "(#{location.start_line},#{location.start_column})-" + output << "(#{location.end_line},#{location.end_column})" + output << "> " + + if iseq.catch_table.any? + output << "(catch: TRUE)\n" + output << "#{current_prefix}== catch table\n" + + with_prefix("#{current_prefix}| ") do + iseq.catch_table.each do |entry| + case entry + when InstructionSequence::CatchBreak + output << "#{current_prefix}catch type: break\n" + format_iseq(entry.iseq) + when InstructionSequence::CatchNext + output << "#{current_prefix}catch type: next\n" + when InstructionSequence::CatchRedo + output << "#{current_prefix}catch type: redo\n" + when InstructionSequence::CatchRescue + output << "#{current_prefix}catch type: rescue\n" + format_iseq(entry.iseq) + end + end + end + + output << "#{current_prefix}|#{"-" * 72}\n" + else + output << "(catch: FALSE)\n" + end + + if (local_table = iseq.local_table) && !local_table.empty? + output << "#{current_prefix}local table (size: #{local_table.size})\n" + + locals = + local_table.locals.each_with_index.map do |local, index| + "[%2d] %s@%d" % [local_table.offset(index), local.name, index] + end + + output << "#{current_prefix}#{locals.join(" ")}\n" + end + + length = 0 + events = [] + lines = [] iseq.insns.each do |insn| case insn + when Integer + lines << insn + when Symbol + events << event(insn) when InstructionSequence::Label - unless clause.last.is_a?(Next) - clause << Assign(disasm_label.field, node_for(insn.name)) - end + # skip + else + output << "#{current_prefix}%04d " % length - clauses[label] = clause - clause = [] - label = insn.name - when BranchUnless - body = [ - Assign(disasm_label.field, node_for(insn.label.name)), - Next(Args([])) - ] - - clause << IfNode(clause.pop, Statements(body), nil) - when Dup - clause << clause.last - when DupHash - assocs = - insn.object.map do |key, value| - Assoc(node_for(key), node_for(value)) - end + disasm = insn.disasm(self) + output << disasm - clause << HashLiteral(LBrace("{"), assocs) - when GetGlobal - clause << VarRef(GVar(insn.name.to_s)) - when GetLocalWC0 - local = iseq.local_table.locals[insn.index] - clause << VarRef(Ident(local.name.to_s)) - when Jump - clause << Assign(disasm_label.field, node_for(insn.label.name)) - clause << Next(Args([])) - when Leave - value = Args([clause.pop]) - clause << (iseq.type == :top ? Break(value) : ReturnNode(value)) - when OptAnd, OptDiv, OptEq, OptGE, OptGT, OptLE, OptLT, OptLTLT, - OptMinus, OptMod, OptMult, OptOr, OptPlus - left, right = clause.pop(2) - clause << Binary(left, insn.calldata.method, right) - when OptAref - collection, arg = clause.pop(2) - clause << ARef(collection, Args([arg])) - when OptAset - collection, arg, value = clause.pop(3) - - clause << if value.is_a?(Binary) && value.left.is_a?(ARef) && - collection === value.left.collection && - arg === value.left.index.parts[0] - OpAssign( - ARefField(collection, Args([arg])), - Op("#{value.operator}="), - value.right - ) - else - Assign(ARefField(collection, Args([arg])), value) - end - when OptNEq - left, right = clause.pop(2) - clause << Binary(left, :"!=", right) - when OptSendWithoutBlock - method = insn.calldata.method.to_s - argc = insn.calldata.argc - - if insn.calldata.flag?(CallData::CALL_FCALL) - if argc == 0 - clause.pop - clause << CallNode(nil, nil, Ident(method), Args([])) - elsif argc == 1 && method.end_with?("=") - _receiver, argument = clause.pop(2) - clause << Assign( - CallNode(nil, nil, Ident(method[0..-2]), nil), - argument - ) - else - _receiver, *arguments = clause.pop(argc + 1) - clause << CallNode( - nil, - nil, - Ident(method), - ArgParen(Args(arguments)) - ) - end - else - if argc == 0 - clause << CallNode(clause.pop, Period("."), Ident(method), nil) - elsif argc == 1 && method.end_with?("=") - receiver, argument = clause.pop(2) - clause << Assign( - CallNode(receiver, Period("."), Ident(method[0..-2]), nil), - argument - ) - else - receiver, *arguments = clause.pop(argc + 1) - clause << CallNode( - receiver, - Period("."), - Ident(method), - ArgParen(Args(arguments)) - ) - end - end - when PutObject - case insn.object - when Float - clause << FloatLiteral(insn.object.inspect) - when Integer - clause << Int(insn.object.inspect) - else - raise "Unknown object type: #{insn.object.class.name}" + if lines.any? + output << " " * (65 - disasm.length) if disasm.length < 65 + elsif events.any? + output << " " * (39 - disasm.length) if disasm.length < 39 end - when PutObjectInt2Fix0 - clause << Int("0") - when PutObjectInt2Fix1 - clause << Int("1") - when PutSelf - clause << VarRef(Kw("self")) - when SetGlobal - target = GVar(insn.name.to_s) - value = clause.pop - - clause << if value.is_a?(Binary) && VarRef(target) === value.left - OpAssign(VarField(target), Op("#{value.operator}="), value.right) - else - Assign(VarField(target), value) + + if lines.any? + output << "(%4d)" % lines.last + lines.clear end - when SetLocalWC0 - target = Ident(local_name(insn.index, 0)) - value = clause.pop - - clause << if value.is_a?(Binary) && VarRef(target) === value.left - OpAssign(VarField(target), Op("#{value.operator}="), value.right) - else - Assign(VarField(target), value) + + if events.any? + output << "[#{events.join}]" + events.clear end - else - raise "Unknown instruction #{insn}" - end - end - # If there's only one clause, then we don't need a case statement, and - # we can just disassemble the first clause. - clauses[label] = clause - return Statements(clauses.values.first) if clauses.size == 1 - - # Here we're going to build up a big case statement that will handle all - # of the different labels. - current = nil - clauses.reverse_each do |current_label, current_clause| - current = - When( - Args([node_for(current_label)]), - Statements(current_clause), - current - ) - end - switch = Case(Kw("case"), disasm_label.ref, current) - - # Here we're going to make sure that any locals that were established in - # the label_0 block are initialized so that scoping rules work - # correctly. - stack = [] - locals = [disasm_label.name] - - clauses[:label_0].each do |node| - if node.is_a?(Assign) && node.target.is_a?(VarField) && - node.target.value.is_a?(Ident) - value = node.target.value.value - next if locals.include?(value) - - stack << Assign(node.target, VarRef(Kw("nil"))) - locals << value + output << "\n" + length += insn.length end end - - # Finally, we'll set up the initial label and loop the entire case - # statement. - stack << Assign(disasm_label.field, node_for(:label_0)) - stack << MethodAddBlock( - CallNode(nil, nil, Ident("loop"), Args([])), - BlockNode( - Kw("do"), - nil, - BodyStmt(Statements([switch]), nil, nil, nil, nil) - ) - ) - Statements(stack) end - def local_name(index, level) - current = iseq - level.times { current = current.parent_iseq } - current.local_table.locals[index].name.to_s + def with_prefix(value) + previous = @current_prefix + + begin + @current_prefix = value + yield + ensure + @current_prefix = previous + end end end end diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index ee5390a1..93b5018e 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -272,9 +272,9 @@ def to_a end def disasm - formatter = DisasmFormatter.new - formatter.enqueue(self) - formatter.format! + disassembler = Disassembler.new + disassembler.enqueue(self) + disassembler.format! end # This method converts our linked list of instructions into a final array diff --git a/test/yarv_test.rb b/test/yarv_test.rb index 02514a93..f8e0ffdb 100644 --- a/test/yarv_test.rb +++ b/test/yarv_test.rb @@ -31,7 +31,7 @@ class YARVTest < Minitest::Test CASES.each do |source, expected| define_method("test_disassemble_#{source}") do - assert_disassembles(expected, source) + assert_decompiles(expected, source) end end @@ -41,13 +41,13 @@ def test_bf ">>.>---.+++++++..+++.>>.<-.<.+++.------.--------.>>+.>++." iseq = YARV::Bf.new(hello_world).compile - Formatter.format(hello_world, YARV::Disassembler.new(iseq).to_ruby) + Formatter.format(hello_world, YARV::Decompiler.new(iseq).to_ruby) end private - def assert_disassembles(expected, source) - ruby = YARV::Disassembler.new(YARV.compile(source)).to_ruby + def assert_decompiles(expected, source) + ruby = YARV::Decompiler.new(YARV.compile(source)).to_ruby actual = Formatter.format(source, ruby) assert_equal expected, actual end From 9d57b6a7b8592e4a00a5a1b90db89fa2988b45b1 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 16 Dec 2022 08:23:05 -0500 Subject: [PATCH 088/104] Assembler --- lib/syntax_tree.rb | 1 + lib/syntax_tree/yarv/assembler.rb | 244 +++++++++++++++++++ lib/syntax_tree/yarv/compiler.rb | 13 +- lib/syntax_tree/yarv/instruction_sequence.rb | 4 +- 4 files changed, 259 insertions(+), 3 deletions(-) create mode 100644 lib/syntax_tree/yarv/assembler.rb diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 2e2d2a42..41a33a78 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -30,6 +30,7 @@ require_relative "syntax_tree/yarv" require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/compiler" +require_relative "syntax_tree/yarv/assembler" require_relative "syntax_tree/yarv/decompiler" require_relative "syntax_tree/yarv/disassembler" require_relative "syntax_tree/yarv/instruction_sequence" diff --git a/lib/syntax_tree/yarv/assembler.rb b/lib/syntax_tree/yarv/assembler.rb new file mode 100644 index 00000000..b5df37b8 --- /dev/null +++ b/lib/syntax_tree/yarv/assembler.rb @@ -0,0 +1,244 @@ +# frozen_string_literal: true + +module SyntaxTree + module YARV + class Assembler + class ObjectVisitor < Compiler::RubyVisitor + def visit_dyna_symbol(node) + if node.parts.empty? + :"" + else + raise CompilationError + end + end + + def visit_string_literal(node) + case node.parts.length + when 0 + "" + when 1 + raise CompilationError unless node.parts.first.is_a?(TStringContent) + node.parts.first.value + else + raise CompilationError + end + end + end + + attr_reader :filepath + + def initialize(filepath) + @filepath = filepath + end + + def assemble + iseq = InstructionSequence.new(:top, "
", nil, Location.default) + labels = {} + + File.foreach(filepath, chomp: true) do |line| + case line.strip + when "" + # skip over blank lines + next + when /^;/ + # skip over comments + next + when /^(\w+):$/ + # create labels + iseq.push(labels[$1] = iseq.label) + next + end + + insn, operands = line.split(" ", 2) + + case insn + when "adjuststack" + iseq.adjuststack(parse_number(operands)) + when "anytostring" + iseq.anytostring + when "checkmatch" + iseq.checkmatch(parse_number(operands)) + when "checktype" + iseq.checktype(parse_number(operands)) + when "concatarray" + iseq.concatarray + when "concatstrings" + iseq.concatstrings(parse_number(operands)) + when "dup" + iseq.dup + when "dupn" + iseq.dupn(parse_number(operands)) + when "duparray" + object = parse(operands) + raise unless object.is_a?(Array) + + iseq.duparray(object) + when "duphash" + object = parse(operands) + raise unless object.is_a?(Hash) + + iseq.duphash(object) + when "getinstancevariable" + object = parse(operands) + raise unless object.is_a?(Symbol) + + iseq.getinstancevariable(object) + when "intern" + iseq.intern + when "leave" + iseq.leave + when "newarray" + iseq.newarray(parse_number(operands)) + when "newrange" + object = parse(operands) + raise if object != 0 && object != 1 + + iseq.newrange(operands.to_i) + when "nop" + iseq.nop + when "objtostring" + iseq.objtostring( + YARV.calldata( + :to_s, + 0, + CallData::CALL_ARGS_SIMPLE | CallData::CALL_FCALL + ) + ) + when "opt_and" + iseq.send(YARV.calldata(:&, 1)) + when "opt_aref" + iseq.send(YARV.calldata(:[], 1)) + when "opt_aref_with" + object = parse(operands) + raise unless object.is_a?(String) + + iseq.opt_aref_with(object, YARV.calldata(:[], 1)) + when "opt_div" + iseq.send(YARV.calldata(:/, 1)) + when "opt_empty_p" + iseq.send( + YARV.calldata( + :empty?, + 0, + CallData::CALL_ARGS_SIMPLE | CallData::CALL_FCALL + ) + ) + when "opt_eqeq" + iseq.send(YARV.calldata(:==, 1)) + when "opt_ge" + iseq.send(YARV.calldata(:>=, 1)) + when "opt_getconstant_path" + object = parse(operands) + raise unless object.is_a?(Array) + + iseq.opt_getconstant_path(object) + when "opt_ltlt" + iseq.send(YARV.calldata(:<<, 1)) + when "opt_minus" + iseq.send(YARV.calldata(:-, 1)) + when "opt_mult" + iseq.send(YARV.calldata(:*, 1)) + when "opt_or" + iseq.send(YARV.calldata(:|, 1)) + when "opt_plus" + iseq.send(YARV.calldata(:+, 1)) + when "pop" + iseq.pop + when "putnil" + iseq.putnil + when "putobject" + iseq.putobject(parse(operands)) + when "putself" + iseq.putself + when "putstring" + object = parse(operands) + raise unless object.is_a?(String) + + iseq.putstring(object) + when "send" + iseq.send(calldata(operands)) + when "setinstancevariable" + object = parse(operands) + raise unless object.is_a?(Symbol) + + iseq.setinstancevariable(object) + when "swap" + iseq.swap + when "toregexp" + options, length = operands.split(", ") + iseq.toregexp(parse_number(options), parse_number(length)) + else + raise "Could not understand: #{line}" + end + end + + iseq.compile! + iseq + end + + def self.assemble(filepath) + new(filepath).assemble + end + + private + + def parse(value) + program = SyntaxTree.parse(value) + raise if program.statements.body.length != 1 + + program.statements.body.first.accept(ObjectVisitor.new) + end + + def parse_number(value) + object = parse(value) + raise unless object.is_a?(Integer) + + object + end + + def calldata(value) + message, argc_value, flags_value = value.split + flags = + if flags_value + flags_value + .split("|") + .map do |flag| + case flag + when "ARGS_SPLAT" + CallData::CALL_ARGS_SPLAT + when "ARGS_BLOCKARG" + CallData::CALL_ARGS_BLOCKARG + when "FCALL" + CallData::CALL_FCALL + when "VCALL" + CallData::CALL_VCALL + when "ARGS_SIMPLE" + CallData::CALL_ARGS_SIMPLE + when "BLOCKISEQ" + CallData::CALL_BLOCKISEQ + when "KWARG" + CallData::CALL_KWARG + when "KW_SPLAT" + CallData::CALL_KW_SPLAT + when "TAILCALL" + CallData::CALL_TAILCALL + when "SUPER" + CallData::CALL_SUPER + when "ZSUPER" + CallData::CALL_ZSUPER + when "OPT_SEND" + CallData::CALL_OPT_SEND + when "KW_SPLAT_MUT" + CallData::CALL_KW_SPLAT_MUT + end + end + .inject(:|) + else + CallData::CALL_ARGS_SIMPLE + end + + YARV.calldata(message.to_sym, argc_value&.to_i || 0, flags) + end + end + end +end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 046fb438..4bb5d654 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -148,7 +148,18 @@ def visit_imaginary(node) end def visit_int(node) - node.value.to_i + case (value = node.value) + when /^0b/ + value[2..].to_i(2) + when /^0o/ + value[2..].to_i(8) + when /^0d/ + value[2..].to_i + when /^0x/ + value[2..].to_i(16) + else + value.to_i + end end def visit_label(node) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 93b5018e..0f1eadd0 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -223,8 +223,8 @@ def eval compiled = to_a # Temporary hack until we get these working. - compiled[4][:node_id] = 11 - compiled[4][:node_ids] = [1, 0, 3, 2, 6, 7, 9, -1] + compiled[4][:node_id] = -1 + compiled[4][:node_ids] = [-1] * insns.length Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval end From 7e1b2a8176c37786323e334e477da8bd216f6ad6 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 16 Dec 2022 09:12:21 -0500 Subject: [PATCH 089/104] Fix Ruby head build --- lib/syntax_tree/yarv/compiler.rb | 37 ++++++-------------------------- 1 file changed, 7 insertions(+), 30 deletions(-) diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 4bb5d654..496c2075 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -663,32 +663,10 @@ def visit_call(node) when ArgsForward flag |= CallData::CALL_TAILCALL if options.tailcall_optimization? - if RUBY_VERSION < "3.2" - flag |= CallData::CALL_ARGS_SPLAT - lookup = iseq.local_table.find(:*) - iseq.getlocal(lookup.index, lookup.level) - iseq.splatarray(arg_parts.length != 1) - else - flag |= CallData::CALL_ARGS_SPLAT - lookup = iseq.local_table.find(:*) - iseq.getlocal(lookup.index, lookup.level) - iseq.splatarray(true) - - flag |= CallData::CALL_KW_SPLAT - iseq.putspecialobject(PutSpecialObject::OBJECT_VMCORE) - iseq.newhash(0) - lookup = iseq.local_table.find(:**) - iseq.getlocal(lookup.index, lookup.level) - iseq.send( - YARV.calldata( - :"core#hash_merge_kwd", - 2, - CallData::CALL_ARGS_SIMPLE - ) - ) - iseq.newarray(1) - iseq.concatarray - end + flag |= CallData::CALL_ARGS_SPLAT + lookup = iseq.local_table.find(:*) + iseq.getlocal(lookup.index, lookup.level) + iseq.splatarray(arg_parts.length != 1) flag |= CallData::CALL_ARGS_BLOCKARG lookup = iseq.local_table.find(:&) @@ -1339,14 +1317,13 @@ def visit_params(node) if node.keyword_rest.is_a?(ArgsForward) if RUBY_VERSION >= "3.2" iseq.local_table.plain(:*) - iseq.local_table.plain(:**) iseq.local_table.plain(:&) + iseq.local_table.plain(:"...") iseq.argument_options[:rest_start] = iseq.argument_size - iseq.argument_options[:block_start] = iseq.argument_size + 2 - iseq.argument_options[:kwrest] = iseq.argument_size + 1 + iseq.argument_options[:block_start] = iseq.argument_size + 1 - iseq.argument_size += 3 + iseq.argument_size += 2 else iseq.local_table.plain(:*) iseq.local_table.plain(:&) From 83d21fde2fb5f87e219d689486ea5edd911338b0 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 16 Dec 2022 18:22:52 -0500 Subject: [PATCH 090/104] Assemble other instructions --- lib/syntax_tree.rb | 2 +- lib/syntax_tree/yarv/assembler.rb | 361 ++++++++++++++----- lib/syntax_tree/yarv/compiler.rb | 2 +- lib/syntax_tree/yarv/instruction_sequence.rb | 12 +- lib/syntax_tree/yarv/instructions.rb | 2 +- 5 files changed, 269 insertions(+), 110 deletions(-) diff --git a/lib/syntax_tree.rb b/lib/syntax_tree.rb index 41a33a78..1357e95f 100644 --- a/lib/syntax_tree.rb +++ b/lib/syntax_tree.rb @@ -30,13 +30,13 @@ require_relative "syntax_tree/yarv" require_relative "syntax_tree/yarv/bf" require_relative "syntax_tree/yarv/compiler" -require_relative "syntax_tree/yarv/assembler" require_relative "syntax_tree/yarv/decompiler" require_relative "syntax_tree/yarv/disassembler" require_relative "syntax_tree/yarv/instruction_sequence" require_relative "syntax_tree/yarv/instructions" require_relative "syntax_tree/yarv/legacy" require_relative "syntax_tree/yarv/local_table" +require_relative "syntax_tree/yarv/assembler" # Syntax Tree is a suite of tools built on top of the internal CRuby parser. It # provides the ability to generate a syntax tree from source, as well as the diff --git a/lib/syntax_tree/yarv/assembler.rb b/lib/syntax_tree/yarv/assembler.rb index b5df37b8..c3a874e9 100644 --- a/lib/syntax_tree/yarv/assembler.rb +++ b/lib/syntax_tree/yarv/assembler.rb @@ -33,20 +33,37 @@ def initialize(filepath) def assemble iseq = InstructionSequence.new(:top, "
", nil, Location.default) - labels = {} + assemble_iseq(iseq, File.readlines(filepath, chomp: true)) + + iseq.compile! + iseq + end + + def self.assemble(filepath) + new(filepath).assemble + end + + private + + def assemble_iseq(iseq, lines) + labels = Hash.new { |hash, name| hash[name] = iseq.label } + line_index = 0 + + while line_index < lines.length + line = lines[line_index] + line_index += 1 - File.foreach(filepath, chomp: true) do |line| case line.strip - when "" - # skip over blank lines - next - when /^;/ - # skip over comments + when "", /^;/ + # skip over blank lines and comments next when /^(\w+):$/ # create labels - iseq.push(labels[$1] = iseq.label) + iseq.push(labels[$1]) next + when /^__END__/ + # skip over the rest of the file when we hit __END__ + return end insn, operands = line.split(" ", 2) @@ -56,6 +73,12 @@ def assemble iseq.adjuststack(parse_number(operands)) when "anytostring" iseq.anytostring + when "branchif" + iseq.branchif(labels[operands]) + when "branchnil" + iseq.branchnil(labels[operands]) + when "branchunless" + iseq.branchunless(labels[operands]) when "checkmatch" iseq.checkmatch(parse_number(operands)) when "checktype" @@ -64,84 +87,200 @@ def assemble iseq.concatarray when "concatstrings" iseq.concatstrings(parse_number(operands)) + when "defineclass" + body = parse_nested(lines[line_index..]) + line_index += body.length + + name_value, flags_value = operands.split(/,\s*/) + name = parse_symbol(name_value) + flags = parse_number(flags_value) + + class_iseq = iseq.class_child_iseq(name.to_s, Location.default) + assemble_iseq(class_iseq, body) + iseq.defineclass(name, class_iseq, flags) + when "definemethod" + body = parse_nested(lines[line_index..]) + line_index += body.length + + name = parse_symbol(operands) + method_iseq = iseq.method_child_iseq(name.to_s, Location.default) + assemble_iseq(method_iseq, body) + + iseq.definemethod(name, method_iseq) + when "definesmethod" + body = parse_nested(lines[line_index..]) + line_index += body.length + + name = parse_symbol(operands) + method_iseq = iseq.method_child_iseq(name.to_s, Location.default) + + assemble_iseq(method_iseq, body) + iseq.definesmethod(name, method_iseq) when "dup" iseq.dup when "dupn" iseq.dupn(parse_number(operands)) when "duparray" - object = parse(operands) - raise unless object.is_a?(Array) - - iseq.duparray(object) + iseq.duparray(parse_type(operands, Array)) when "duphash" - object = parse(operands) - raise unless object.is_a?(Hash) - - iseq.duphash(object) + iseq.duphash(parse_type(operands, Hash)) + when "expandarray" + number, flags = operands.split(/,\s*/) + iseq.expandarray(parse_number(number), parse_number(flags)) + when "getclassvariable" + iseq.getclassvariable(parse_symbol(operands)) + when "getconstant" + iseq.getconstant(parse_symbol(operands)) + when "getglobal" + iseq.getglobal(parse_symbol(operands)) when "getinstancevariable" - object = parse(operands) - raise unless object.is_a?(Symbol) + iseq.getinstancevariable(parse_symbol(operands)) + when "getlocal" + name_string, level_string = operands.split(/,\s*/) + name = name_string.to_sym + level = level_string&.to_i || 0 - iseq.getinstancevariable(object) + iseq.local_table.plain(name) + lookup = iseq.local_table.find(name, level) + iseq.getlocal(lookup.index, lookup.level) + when "getspecial" + key, type = operands.split(/,\s*/) + iseq.getspecial(parse_number(key), parse_number(type)) when "intern" iseq.intern + when "invokesuper" + cdata = + if operands + calldata(operands) + else + YARV.calldata( + nil, + 0, + CallData::CALL_FCALL | CallData::CALL_ARGS_SIMPLE | + CallData::CALL_SUPER + ) + end + + block_iseq = + if lines[line_index].start_with?(" ") + body = parse_nested(lines[line_index..]) + line_index += body.length + + block_iseq = iseq.block_child_iseq(Location.default) + assemble_iseq(block_iseq, body) + block_iseq + end + + iseq.invokesuper(cdata, block_iseq) + when "jump" + iseq.jump(labels[operands]) when "leave" iseq.leave when "newarray" iseq.newarray(parse_number(operands)) + when "newarraykwsplat" + iseq.newarraykwsplat(parse_number(operands)) + when "newhash" + iseq.newhash(parse_number(operands)) when "newrange" - object = parse(operands) - raise if object != 0 && object != 1 - - iseq.newrange(operands.to_i) + iseq.newrange(parse_options(operands, [0, 1])) when "nop" iseq.nop when "objtostring" - iseq.objtostring( - YARV.calldata( - :to_s, - 0, - CallData::CALL_ARGS_SIMPLE | CallData::CALL_FCALL - ) - ) + iseq.objtostring(YARV.calldata(:to_s)) + when "once" + block_iseq = + if lines[line_index].start_with?(" ") + body = parse_nested(lines[line_index..]) + line_index += body.length + + block_iseq = iseq.block_child_iseq(Location.default) + assemble_iseq(block_iseq, body) + block_iseq + end + + iseq.once(block_iseq, iseq.inline_storage) when "opt_and" iseq.send(YARV.calldata(:&, 1)) when "opt_aref" iseq.send(YARV.calldata(:[], 1)) when "opt_aref_with" - object = parse(operands) - raise unless object.is_a?(String) + iseq.opt_aref_with(parse_string(operands), YARV.calldata(:[], 1)) + when "opt_aset" + iseq.send(YARV.calldata(:[]=, 2)) + when "opt_aset_with" + iseq.opt_aset_with(parse_string(operands), YARV.calldata(:[]=, 2)) + when "opt_case_dispatch" + cdhash_value, else_label_value = operands.split(/\s*\},\s*/) + cdhash_value.sub!(/\A\{/, "") + + pairs = + cdhash_value + .split(/\s*,\s*/) + .map! { |pair| pair.split(/\s*=>\s*/) } + + cdhash = pairs.to_h { |value, nm| [parse(value), labels[nm]] } + else_label = labels[else_label_value] - iseq.opt_aref_with(object, YARV.calldata(:[], 1)) + iseq.opt_case_dispatch(cdhash, else_label) when "opt_div" iseq.send(YARV.calldata(:/, 1)) when "opt_empty_p" - iseq.send( - YARV.calldata( - :empty?, - 0, - CallData::CALL_ARGS_SIMPLE | CallData::CALL_FCALL - ) - ) - when "opt_eqeq" + iseq.send(YARV.calldata(:empty?)) + when "opt_eq" iseq.send(YARV.calldata(:==, 1)) when "opt_ge" iseq.send(YARV.calldata(:>=, 1)) + when "opt_gt" + iseq.send(YARV.calldata(:>, 1)) when "opt_getconstant_path" - object = parse(operands) - raise unless object.is_a?(Array) - - iseq.opt_getconstant_path(object) + iseq.opt_getconstant_path(parse_type(operands, Array)) + when "opt_le" + iseq.send(YARV.calldata(:<=, 1)) + when "opt_length" + iseq.send(YARV.calldata(:length)) + when "opt_lt" + iseq.send(YARV.calldata(:<, 1)) when "opt_ltlt" iseq.send(YARV.calldata(:<<, 1)) when "opt_minus" iseq.send(YARV.calldata(:-, 1)) + when "opt_mod" + iseq.send(YARV.calldata(:%, 1)) when "opt_mult" iseq.send(YARV.calldata(:*, 1)) + when "opt_neq" + iseq.send(YARV.calldata(:!=, 1)) + when "opt_newarray_max" + iseq.newarray(parse_number(operands)) + iseq.send(YARV.calldata(:max)) + when "opt_newarray_min" + iseq.newarray(parse_number(operands)) + iseq.send(YARV.calldata(:min)) + when "opt_nil_p" + iseq.send(YARV.calldata(:nil?)) + when "opt_not" + iseq.send(YARV.calldata(:!)) when "opt_or" iseq.send(YARV.calldata(:|, 1)) when "opt_plus" iseq.send(YARV.calldata(:+, 1)) + when "opt_regexpmatch2" + iseq.send(YARV.calldata(:=~, 1)) + when "opt_reverse" + iseq.send(YARV.calldata(:reverse)) + when "opt_send_without_block" + iseq.send(calldata(operands)) + when "opt_size" + iseq.send(YARV.calldata(:size)) + when "opt_str_freeze" + iseq.putstring(parse_string(operands)) + iseq.send(YARV.calldata(:freeze)) + when "opt_str_uminus" + iseq.putstring(parse_string(operands)) + iseq.send(YARV.calldata(:-@)) + when "opt_succ" + iseq.send(YARV.calldata(:succ)) when "pop" iseq.pop when "putnil" @@ -150,38 +289,60 @@ def assemble iseq.putobject(parse(operands)) when "putself" iseq.putself + when "putspecialobject" + iseq.putspecialobject(parse_options(operands, [1, 2, 3])) when "putstring" - object = parse(operands) - raise unless object.is_a?(String) - - iseq.putstring(object) + iseq.putstring(parse_string(operands)) when "send" - iseq.send(calldata(operands)) - when "setinstancevariable" - object = parse(operands) - raise unless object.is_a?(Symbol) + block_iseq = + if lines[line_index].start_with?(" ") + body = parse_nested(lines[line_index..]) + line_index += body.length + + block_iseq = iseq.block_child_iseq(Location.default) + assemble_iseq(block_iseq, body) + block_iseq + end + + iseq.send(calldata(operands), block_iseq) + when "setconstant" + iseq.setconstant(parse_symbol(operands)) + when "setglobal" + iseq.setglobal(parse_symbol(operands)) + when "setlocal" + name_string, level_string = operands.split(/,\s*/) + name = name_string.to_sym + level = level_string&.to_i || 0 - iseq.setinstancevariable(object) + iseq.local_table.plain(name) + lookup = iseq.local_table.find(name, level) + iseq.setlocal(lookup.index, lookup.level) + when "setn" + iseq.setn(parse_number(operands)) + when "setclassvariable" + iseq.setclassvariable(parse_symbol(operands)) + when "setinstancevariable" + iseq.setinstancevariable(parse_symbol(operands)) + when "setspecial" + iseq.setspecial(parse_number(operands)) + when "splatarray" + iseq.splatarray(parse_options(operands, [true, false])) when "swap" iseq.swap + when "topn" + iseq.topn(parse_number(operands)) when "toregexp" options, length = operands.split(", ") iseq.toregexp(parse_number(options), parse_number(length)) + when "ARG_REQ" + iseq.argument_size += 1 + iseq.local_table.plain(operands.to_sym) else raise "Could not understand: #{line}" end end - - iseq.compile! - iseq end - def self.assemble(filepath) - new(filepath).assemble - end - - private - def parse(value) program = SyntaxTree.parse(value) raise if program.statements.body.length != 1 @@ -189,50 +350,52 @@ def parse(value) program.statements.body.first.accept(ObjectVisitor.new) end + def parse_options(value, options) + parse(value).tap { raise unless options.include?(_1) } + end + + def parse_type(value, type) + parse(value).tap { raise unless _1.is_a?(type) } + end + def parse_number(value) - object = parse(value) - raise unless object.is_a?(Integer) + parse_type(value, Integer) + end + + def parse_string(value) + parse_type(value, String) + end - object + def parse_symbol(value) + parse_type(value, Symbol) end + def parse_nested(lines) + body = lines.take_while { |line| line.match?(/^($|;| )/) } + body.map! { |line| line.delete_prefix!(" ") || +"" } + end + + CALLDATA_FLAGS = { + "ARGS_SPLAT" => CallData::CALL_ARGS_SPLAT, + "ARGS_BLOCKARG" => CallData::CALL_ARGS_BLOCKARG, + "FCALL" => CallData::CALL_FCALL, + "VCALL" => CallData::CALL_VCALL, + "ARGS_SIMPLE" => CallData::CALL_ARGS_SIMPLE, + "BLOCKISEQ" => CallData::CALL_BLOCKISEQ, + "KWARG" => CallData::CALL_KWARG, + "KW_SPLAT" => CallData::CALL_KW_SPLAT, + "TAILCALL" => CallData::CALL_TAILCALL, + "SUPER" => CallData::CALL_SUPER, + "ZSUPER" => CallData::CALL_ZSUPER, + "OPT_SEND" => CallData::CALL_OPT_SEND, + "KW_SPLAT_MUT" => CallData::CALL_KW_SPLAT_MUT + }.freeze + def calldata(value) message, argc_value, flags_value = value.split flags = if flags_value - flags_value - .split("|") - .map do |flag| - case flag - when "ARGS_SPLAT" - CallData::CALL_ARGS_SPLAT - when "ARGS_BLOCKARG" - CallData::CALL_ARGS_BLOCKARG - when "FCALL" - CallData::CALL_FCALL - when "VCALL" - CallData::CALL_VCALL - when "ARGS_SIMPLE" - CallData::CALL_ARGS_SIMPLE - when "BLOCKISEQ" - CallData::CALL_BLOCKISEQ - when "KWARG" - CallData::CALL_KWARG - when "KW_SPLAT" - CallData::CALL_KW_SPLAT - when "TAILCALL" - CallData::CALL_TAILCALL - when "SUPER" - CallData::CALL_SUPER - when "ZSUPER" - CallData::CALL_ZSUPER - when "OPT_SEND" - CallData::CALL_OPT_SEND - when "KW_SPLAT_MUT" - CallData::CALL_KW_SPLAT_MUT - end - end - .inject(:|) + flags_value.split("|").map(&CALLDATA_FLAGS).inject(:|) else CallData::CALL_ARGS_SIMPLE end diff --git a/lib/syntax_tree/yarv/compiler.rb b/lib/syntax_tree/yarv/compiler.rb index 496c2075..4af5d6f0 100644 --- a/lib/syntax_tree/yarv/compiler.rb +++ b/lib/syntax_tree/yarv/compiler.rb @@ -125,7 +125,7 @@ def self.compile(node) end def visit_array(node) - visit_all(node.contents.parts) + node.contents ? visit_all(node.contents.parts) : [] end def visit_bare_assoc_hash(node) diff --git a/lib/syntax_tree/yarv/instruction_sequence.rb b/lib/syntax_tree/yarv/instruction_sequence.rb index 0f1eadd0..48305be6 100644 --- a/lib/syntax_tree/yarv/instruction_sequence.rb +++ b/lib/syntax_tree/yarv/instruction_sequence.rb @@ -220,13 +220,7 @@ def length def eval raise "Unsupported platform" if ISEQ_LOAD.nil? - compiled = to_a - - # Temporary hack until we get these working. - compiled[4][:node_id] = -1 - compiled[4][:node_ids] = [-1] * insns.length - - Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(compiled), 0, nil)).eval + Fiddle.dlunwrap(ISEQ_LOAD.call(Fiddle.dlwrap(to_a), 0, nil)).eval end def to_a @@ -257,7 +251,9 @@ def to_a { arg_size: argument_size, local_size: local_table.size, - stack_max: stack.maximum_size + stack_max: stack.maximum_size, + node_id: -1, + node_ids: [-1] * insns.length }, name, "", diff --git a/lib/syntax_tree/yarv/instructions.rb b/lib/syntax_tree/yarv/instructions.rb index 772f1bb3..288edb16 100644 --- a/lib/syntax_tree/yarv/instructions.rb +++ b/lib/syntax_tree/yarv/instructions.rb @@ -2695,7 +2695,7 @@ def to_a(_iseq) [ :opt_case_dispatch, case_dispatch_hash.flat_map { |key, value| [key, value.name] }, - else_label + else_label.name ] end From 13c07cfaf67a2fc9a26ae477c0382bcad7773855 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sat, 17 Dec 2022 13:13:51 -0500 Subject: [PATCH 091/104] Assemble other instructions --- lib/syntax_tree/yarv/assembler.rb | 49 +++++++++++++++++++++++-------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/lib/syntax_tree/yarv/assembler.rb b/lib/syntax_tree/yarv/assembler.rb index c3a874e9..7d8a712f 100644 --- a/lib/syntax_tree/yarv/assembler.rb +++ b/lib/syntax_tree/yarv/assembler.rb @@ -79,6 +79,12 @@ def assemble_iseq(iseq, lines) iseq.branchnil(labels[operands]) when "branchunless" iseq.branchunless(labels[operands]) + when "checkkeyword" + kwbits_index, keyword_index = operands.split(/,\s*/) + iseq.checkkeyword( + parse_number(kwbits_index), + parse_number(keyword_index) + ) when "checkmatch" iseq.checkmatch(parse_number(operands)) when "checktype" @@ -98,6 +104,8 @@ def assemble_iseq(iseq, lines) class_iseq = iseq.class_child_iseq(name.to_s, Location.default) assemble_iseq(class_iseq, body) iseq.defineclass(name, class_iseq, flags) + when "defined" + raise NotImplementedError when "definemethod" body = parse_nested(lines[line_index..]) line_index += body.length @@ -127,6 +135,12 @@ def assemble_iseq(iseq, lines) when "expandarray" number, flags = operands.split(/,\s*/) iseq.expandarray(parse_number(number), parse_number(flags)) + when "getblockparam" + lookup = find_local(iseq, operands) + iseq.getblockparam(lookup.index, lookup.level) + when "getblockparamproxy" + lookup = find_local(iseq, operands) + iseq.getblockparamproxy(lookup.index, lookup.level) when "getclassvariable" iseq.getclassvariable(parse_symbol(operands)) when "getconstant" @@ -136,18 +150,16 @@ def assemble_iseq(iseq, lines) when "getinstancevariable" iseq.getinstancevariable(parse_symbol(operands)) when "getlocal" - name_string, level_string = operands.split(/,\s*/) - name = name_string.to_sym - level = level_string&.to_i || 0 - - iseq.local_table.plain(name) - lookup = iseq.local_table.find(name, level) + lookup = find_local(iseq, operands) iseq.getlocal(lookup.index, lookup.level) when "getspecial" key, type = operands.split(/,\s*/) iseq.getspecial(parse_number(key), parse_number(type)) when "intern" iseq.intern + when "invokeblock" + cdata = operands ? calldata(operands) : YARV.calldata(nil, 0) + iseq.invokeblock(cdata) when "invokesuper" cdata = if operands @@ -305,17 +317,15 @@ def assemble_iseq(iseq, lines) end iseq.send(calldata(operands), block_iseq) + when "setblockparam" + lookup = find_local(iseq, operands) + iseq.setblockparam(lookup.index, lookup.level) when "setconstant" iseq.setconstant(parse_symbol(operands)) when "setglobal" iseq.setglobal(parse_symbol(operands)) when "setlocal" - name_string, level_string = operands.split(/,\s*/) - name = name_string.to_sym - level = level_string&.to_i || 0 - - iseq.local_table.plain(name) - lookup = iseq.local_table.find(name, level) + lookup = find_local(iseq, operands) iseq.setlocal(lookup.index, lookup.level) when "setn" iseq.setn(parse_number(operands)) @@ -329,6 +339,8 @@ def assemble_iseq(iseq, lines) iseq.splatarray(parse_options(operands, [true, false])) when "swap" iseq.swap + when "throw" + iseq.throw(parse_number(operands)) when "topn" iseq.topn(parse_number(operands)) when "toregexp" @@ -337,12 +349,25 @@ def assemble_iseq(iseq, lines) when "ARG_REQ" iseq.argument_size += 1 iseq.local_table.plain(operands.to_sym) + when "ARG_BLOCK" + iseq.argument_options[:block_start] = iseq.argument_size + iseq.local_table.block(operands.to_sym) + iseq.argument_size += 1 else raise "Could not understand: #{line}" end end end + def find_local(iseq, operands) + name_string, level_string = operands.split(/,\s*/) + name = name_string.to_sym + level = level_string&.to_i || 0 + + iseq.local_table.plain(name) + iseq.local_table.find(name, level) + end + def parse(value) program = SyntaxTree.parse(value) raise if program.statements.body.length != 1 From 9749bc02afca46e3616df88d069d1c5af58bb5ec Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Sun, 18 Dec 2022 15:00:41 -0500 Subject: [PATCH 092/104] Assemble the defined instruction --- lib/syntax_tree/yarv/assembler.rb | 77 +++++++++++++++++++++---------- 1 file changed, 52 insertions(+), 25 deletions(-) diff --git a/lib/syntax_tree/yarv/assembler.rb b/lib/syntax_tree/yarv/assembler.rb index 7d8a712f..efb179c1 100644 --- a/lib/syntax_tree/yarv/assembler.rb +++ b/lib/syntax_tree/yarv/assembler.rb @@ -25,6 +25,43 @@ def visit_string_literal(node) end end + CALLDATA_FLAGS = { + "ARGS_SPLAT" => CallData::CALL_ARGS_SPLAT, + "ARGS_BLOCKARG" => CallData::CALL_ARGS_BLOCKARG, + "FCALL" => CallData::CALL_FCALL, + "VCALL" => CallData::CALL_VCALL, + "ARGS_SIMPLE" => CallData::CALL_ARGS_SIMPLE, + "BLOCKISEQ" => CallData::CALL_BLOCKISEQ, + "KWARG" => CallData::CALL_KWARG, + "KW_SPLAT" => CallData::CALL_KW_SPLAT, + "TAILCALL" => CallData::CALL_TAILCALL, + "SUPER" => CallData::CALL_SUPER, + "ZSUPER" => CallData::CALL_ZSUPER, + "OPT_SEND" => CallData::CALL_OPT_SEND, + "KW_SPLAT_MUT" => CallData::CALL_KW_SPLAT_MUT + }.freeze + + DEFINED_TYPES = [ + nil, + "nil", + "instance-variable", + "local-variable", + "global-variable", + "class variable", + "constant", + "method", + "yield", + "super", + "self", + "true", + "false", + "assignment", + "expression", + "ref", + "func", + "constant-from" + ].freeze + attr_reader :filepath def initialize(filepath) @@ -105,7 +142,12 @@ def assemble_iseq(iseq, lines) assemble_iseq(class_iseq, body) iseq.defineclass(name, class_iseq, flags) when "defined" - raise NotImplementedError + type, object, message = operands.split(/,\s*/) + iseq.defined( + DEFINED_TYPES.index(type), + parse_symbol(object), + parse_string(message) + ) when "definemethod" body = parse_nested(lines[line_index..]) line_index += body.length @@ -158,12 +200,13 @@ def assemble_iseq(iseq, lines) when "intern" iseq.intern when "invokeblock" - cdata = operands ? calldata(operands) : YARV.calldata(nil, 0) - iseq.invokeblock(cdata) + iseq.invokeblock( + operands ? parse_calldata(operands) : YARV.calldata(nil, 0) + ) when "invokesuper" - cdata = + calldata = if operands - calldata(operands) + parse_calldata(operands) else YARV.calldata( nil, @@ -183,7 +226,7 @@ def assemble_iseq(iseq, lines) block_iseq end - iseq.invokesuper(cdata, block_iseq) + iseq.invokesuper(calldata, block_iseq) when "jump" iseq.jump(labels[operands]) when "leave" @@ -282,7 +325,7 @@ def assemble_iseq(iseq, lines) when "opt_reverse" iseq.send(YARV.calldata(:reverse)) when "opt_send_without_block" - iseq.send(calldata(operands)) + iseq.send(parse_calldata(operands)) when "opt_size" iseq.send(YARV.calldata(:size)) when "opt_str_freeze" @@ -316,7 +359,7 @@ def assemble_iseq(iseq, lines) block_iseq end - iseq.send(calldata(operands), block_iseq) + iseq.send(parse_calldata(operands), block_iseq) when "setblockparam" lookup = find_local(iseq, operands) iseq.setblockparam(lookup.index, lookup.level) @@ -400,23 +443,7 @@ def parse_nested(lines) body.map! { |line| line.delete_prefix!(" ") || +"" } end - CALLDATA_FLAGS = { - "ARGS_SPLAT" => CallData::CALL_ARGS_SPLAT, - "ARGS_BLOCKARG" => CallData::CALL_ARGS_BLOCKARG, - "FCALL" => CallData::CALL_FCALL, - "VCALL" => CallData::CALL_VCALL, - "ARGS_SIMPLE" => CallData::CALL_ARGS_SIMPLE, - "BLOCKISEQ" => CallData::CALL_BLOCKISEQ, - "KWARG" => CallData::CALL_KWARG, - "KW_SPLAT" => CallData::CALL_KW_SPLAT, - "TAILCALL" => CallData::CALL_TAILCALL, - "SUPER" => CallData::CALL_SUPER, - "ZSUPER" => CallData::CALL_ZSUPER, - "OPT_SEND" => CallData::CALL_OPT_SEND, - "KW_SPLAT_MUT" => CallData::CALL_KW_SPLAT_MUT - }.freeze - - def calldata(value) + def parse_calldata(value) message, argc_value, flags_value = value.split flags = if flags_value From dc82220cb4239ca28c703f26ac07c089b7f7c911 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 20 Dec 2022 17:04:47 +0000 Subject: [PATCH 093/104] Bump rubocop from 1.40.0 to 1.41.0 Bumps [rubocop](https://github.com/rubocop/rubocop) from 1.40.0 to 1.41.0. - [Release notes](https://github.com/rubocop/rubocop/releases) - [Changelog](https://github.com/rubocop/rubocop/blob/master/CHANGELOG.md) - [Commits](https://github.com/rubocop/rubocop/compare/v1.40.0...v1.41.0) --- updated-dependencies: - dependency-name: rubocop dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 05e482bf..4f8dfc06 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -19,7 +19,7 @@ GEM rake (13.0.6) regexp_parser (2.6.1) rexml (3.2.5) - rubocop (1.40.0) + rubocop (1.41.0) json (~> 2.3) parallel (~> 1.10) parser (>= 3.1.2.1) From 14a1f5bdfaef1fb64abf636c51c83ec5c8f9619b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 22 Dec 2022 17:05:18 +0000 Subject: [PATCH 094/104] Bump rubocop from 1.41.0 to 1.41.1 Bumps [rubocop](https://github.com/rubocop/rubocop) from 1.41.0 to 1.41.1. - [Release notes](https://github.com/rubocop/rubocop/releases) - [Changelog](https://github.com/rubocop/rubocop/blob/master/CHANGELOG.md) - [Commits](https://github.com/rubocop/rubocop/compare/v1.41.0...v1.41.1) --- updated-dependencies: - dependency-name: rubocop dependency-type: direct:development update-type: version-update:semver-patch ... Signed-off-by: dependabot[bot] --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index 4f8dfc06..cddd3f21 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -19,7 +19,7 @@ GEM rake (13.0.6) regexp_parser (2.6.1) rexml (3.2.5) - rubocop (1.41.0) + rubocop (1.41.1) json (~> 2.3) parallel (~> 1.10) parser (>= 3.1.2.1) From b9ec70019d0628c8836e087d828b0a4439d7a69c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Fri, 23 Dec 2022 12:31:56 -0500 Subject: [PATCH 095/104] BodyStmt location We were previously relying on #bind to set up the bounds for bodystmt but that isn't sufficient. This PR fixes that. --- lib/syntax_tree/parser.rb | 9 +++++---- test/node_test.rb | 2 +- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 5b093a87..85f6661e 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -820,13 +820,13 @@ def on_begin(bodystmt) end bodystmt.bind( - keyword.location.end_char, + find_next_statement_start(keyword.location.end_char), keyword.location.end_column, end_location.end_char, end_location.end_column ) - location = keyword.location.to(bodystmt.location) + location = keyword.location.to(end_location) Begin.new(bodystmt: bodystmt, location: location) end end @@ -905,14 +905,15 @@ def on_blockarg(name) # (nil | Ensure) ensure_clause # ) -> BodyStmt def on_bodystmt(statements, rescue_clause, else_clause, ensure_clause) + parts = [statements, rescue_clause, else_clause, ensure_clause].compact + BodyStmt.new( statements: statements, rescue_clause: rescue_clause, else_keyword: else_clause && consume_keyword(:else), else_clause: else_clause, ensure_clause: ensure_clause, - location: - Location.fixed(line: lineno, char: char_pos, column: current_column) + location: parts.first.location.to(parts.last.location) ) end diff --git a/test/node_test.rb b/test/node_test.rb index 15826be0..3d700e73 100644 --- a/test/node_test.rb +++ b/test/node_test.rb @@ -268,7 +268,7 @@ def test_bodystmt end SOURCE - at = location(lines: 9..9, chars: 5..64) + at = location(lines: 2..9, chars: 5..64) assert_node(BodyStmt, source, at: at, &:bodystmt) end From bedf6348601e7bdb43f3e15af82ba663d42fea12 Mon Sep 17 00:00:00 2001 From: Wei Zhe Heng Date: Sun, 25 Dec 2022 02:20:16 +0800 Subject: [PATCH 096/104] Add and ignore textDocument/documentColor --- lib/syntax_tree/language_server.rb | 2 ++ 1 file changed, 2 insertions(+) diff --git a/lib/syntax_tree/language_server.rb b/lib/syntax_tree/language_server.rb index c2265c32..a7b23664 100644 --- a/lib/syntax_tree/language_server.rb +++ b/lib/syntax_tree/language_server.rb @@ -111,6 +111,8 @@ def run write(id: request[:id], result: PP.pp(SyntaxTree.parse(store[uri]), +"")) when Request[method: %r{\$/.+}] # ignored + when Request[method: "textDocument/documentColor", params: { textDocument: { uri: :any } }] + # ignored else raise ArgumentError, "Unhandled: #{request}" end From 11cbcc9fdbd9123844eba3f66acada66b5b09d31 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Dec 2022 17:13:24 +0000 Subject: [PATCH 097/104] Bump simplecov from 0.21.2 to 0.22.0 Bumps [simplecov](https://github.com/simplecov-ruby/simplecov) from 0.21.2 to 0.22.0. - [Release notes](https://github.com/simplecov-ruby/simplecov/releases) - [Changelog](https://github.com/simplecov-ruby/simplecov/blob/main/CHANGELOG.md) - [Commits](https://github.com/simplecov-ruby/simplecov/compare/v0.21.2...v0.22.0) --- updated-dependencies: - dependency-name: simplecov dependency-type: direct:development update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index cddd3f21..206f71d9 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -32,7 +32,7 @@ GEM rubocop-ast (1.24.0) parser (>= 3.1.1.0) ruby-progressbar (1.11.0) - simplecov (0.21.2) + simplecov (0.22.0) docile (~> 1.1) simplecov-html (~> 0.11) simplecov_json_formatter (~> 0.1) From c68552ccc472c95248ea005924041f173c68951b Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 Dec 2022 17:13:36 +0000 Subject: [PATCH 098/104] Bump prettier_print from 1.1.0 to 1.2.0 Bumps [prettier_print](https://github.com/ruby-syntax-tree/prettier_print) from 1.1.0 to 1.2.0. - [Release notes](https://github.com/ruby-syntax-tree/prettier_print/releases) - [Changelog](https://github.com/ruby-syntax-tree/prettier_print/blob/main/CHANGELOG.md) - [Commits](https://github.com/ruby-syntax-tree/prettier_print/compare/v1.1.0...v1.2.0) --- updated-dependencies: - dependency-name: prettier_print dependency-type: direct:production update-type: version-update:semver-minor ... Signed-off-by: dependabot[bot] --- Gemfile.lock | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Gemfile.lock b/Gemfile.lock index cddd3f21..638f83ca 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -14,7 +14,7 @@ GEM parallel (1.22.1) parser (3.1.3.0) ast (~> 2.4.1) - prettier_print (1.1.0) + prettier_print (1.2.0) rainbow (3.1.1) rake (13.0.6) regexp_parser (2.6.1) From e5dde653f9cbe9ede5f183d9adb5f0efb01e8816 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 28 Dec 2022 14:04:18 -0500 Subject: [PATCH 099/104] Fix #234 --- lib/syntax_tree/node.rb | 10 +++++----- test/fixtures/if.rb | 7 +++++++ 2 files changed, 12 insertions(+), 5 deletions(-) diff --git a/lib/syntax_tree/node.rb b/lib/syntax_tree/node.rb index 53fb3905..e5b09044 100644 --- a/lib/syntax_tree/node.rb +++ b/lib/syntax_tree/node.rb @@ -6160,7 +6160,7 @@ def call(q, node) # want to force it to not be a ternary, like if the predicate is an # assignment because it's hard to read. case node.predicate - when Assign, Command, CommandCall, MAssign, OpAssign + when Assign, Binary, Command, CommandCall, MAssign, OpAssign return false when Not return false unless node.predicate.parentheses? @@ -6183,10 +6183,10 @@ def call(q, node) # and default instead to breaking them into multiple lines. def ternaryable?(statement) case statement - when AliasNode, Assign, Break, Command, CommandCall, Heredoc, IfNode, - IfOp, Lambda, MAssign, Next, OpAssign, RescueMod, ReturnNode, - Super, Undef, UnlessNode, UntilNode, VoidStmt, WhileNode, - YieldNode, ZSuper + when AliasNode, Assign, Break, Command, CommandCall, Defined, Heredoc, + IfNode, IfOp, Lambda, MAssign, Next, OpAssign, RescueMod, + ReturnNode, Super, Undef, UnlessNode, UntilNode, VoidStmt, + WhileNode, YieldNode, ZSuper # This is a list of nodes that should not be allowed to be a part of a # ternary clause. false diff --git a/test/fixtures/if.rb b/test/fixtures/if.rb index cfd6a882..b25386b9 100644 --- a/test/fixtures/if.rb +++ b/test/fixtures/if.rb @@ -67,3 +67,10 @@ if true # comment1 # comment2 end +% +result = + if false && val = 1 + "A" + else + "B" + end From 6a65136a8875339f1514ba1310c78bbb8ed6d1ce Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 28 Dec 2022 13:48:19 -0500 Subject: [PATCH 100/104] Fix for #235 --- lib/syntax_tree/parser.rb | 30 ++++++++++++++---------------- test/fixtures/rassign.rb | 6 ++++++ 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/lib/syntax_tree/parser.rb b/lib/syntax_tree/parser.rb index 85f6661e..fcefed30 100644 --- a/lib/syntax_tree/parser.rb +++ b/lib/syntax_tree/parser.rb @@ -995,22 +995,11 @@ def on_call(receiver, operator, message) # :call-seq: # on_case: (untyped value, untyped consequent) -> Case | RAssign def on_case(value, consequent) - if (keyword = find_keyword(:case)) - tokens.delete(keyword) - - Case.new( - keyword: keyword, - value: value, - consequent: consequent, - location: keyword.location.to(consequent.location) - ) - else - operator = - if (keyword = find_keyword(:in)) - tokens.delete(keyword) - else - consume_operator(:"=>") - end + if value && (operator = find_keyword(:in) || find_operator(:"=>")) && + (value.location.end_char...consequent.location.start_char).cover?( + operator.location.start_char + ) + tokens.delete(operator) node = RAssign.new( @@ -1022,6 +1011,15 @@ def on_case(value, consequent) PinVisitor.visit(node, tokens) node + else + keyword = consume_keyword(:case) + + Case.new( + keyword: keyword, + value: value, + consequent: consequent, + location: keyword.location.to(consequent.location) + ) end end diff --git a/test/fixtures/rassign.rb b/test/fixtures/rassign.rb index 3db52b18..3d357351 100644 --- a/test/fixtures/rassign.rb +++ b/test/fixtures/rassign.rb @@ -23,3 +23,9 @@ % a in Integer b => [Integer => c] +% +case [0] +when 0 + { a: 0 } => { a: } + puts a +end From 73761bb701e7ea98eb15ad97fb1cd214a6ca4adb Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 28 Dec 2022 20:41:51 -0500 Subject: [PATCH 101/104] Update CHANGELOG --- CHANGELOG.md | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 20808e3b..b6b854d3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,9 +6,17 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ## [Unreleased] +### Added + +- An experiment in working with instruction sequences has been added to Syntax Tree. This is subject to change, so it is not well documented or tested at the moment. It does not impact other functionality. + ### Changed - Support forwarding anonymous keyword arguments with `**`. +- The `BodyStmt` node now has a more correct location information. +- Ignore the `textDocument/documentColor` request coming into the language server to support clients that require that request be received. +- Do not attempt to convert `if..else` into ternaries if the predicate has a `Binary` node. +- Properly handle nested pattern matching when a rightward assignment is inside a `when` clause. ## [5.0.1] - 2022-11-10 From 83675f9bd5cc240ba70afc901312347971b1c38c Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 28 Dec 2022 20:44:36 -0500 Subject: [PATCH 102/104] This branch has no conflicts with the base branch. --- Gemfile.lock | 4 ++-- syntax_tree.gemspec | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Gemfile.lock b/Gemfile.lock index ffbdc5d1..5f7d8754 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -2,7 +2,7 @@ PATH remote: . specs: syntax_tree (5.0.1) - prettier_print (>= 1.1.0) + prettier_print (>= 1.2.0) GEM remote: https://rubygems.org/ @@ -14,7 +14,7 @@ GEM parallel (1.22.1) parser (3.1.2.1) ast (~> 2.4.1) - prettier_print (1.1.0) + prettier_print (1.2.0) rainbow (3.1.1) rake (13.0.6) regexp_parser (2.6.0) diff --git a/syntax_tree.gemspec b/syntax_tree.gemspec index 19f4ee97..f6c4a734 100644 --- a/syntax_tree.gemspec +++ b/syntax_tree.gemspec @@ -25,7 +25,7 @@ Gem::Specification.new do |spec| spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) } spec.require_paths = %w[lib] - spec.add_dependency "prettier_print", ">= 1.1.0" + spec.add_dependency "prettier_print", ">= 1.2.0" spec.add_development_dependency "bundler" spec.add_development_dependency "minitest" From 640f64127251068d08843fd469d2dea26f379a0e Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 28 Dec 2022 20:50:21 -0500 Subject: [PATCH 103/104] Update CHANGELOG --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index b6b854d3..71e66403 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ### Added - An experiment in working with instruction sequences has been added to Syntax Tree. This is subject to change, so it is not well documented or tested at the moment. It does not impact other functionality. +- You can now format at a different base layer of indentation. This is an optional third argument to `SyntaxTree::format`. ### Changed From 8ebd8da9fb023f2eb687b9b5ab125303d72a5247 Mon Sep 17 00:00:00 2001 From: Kevin Newton Date: Wed, 28 Dec 2022 20:55:23 -0500 Subject: [PATCH 104/104] Bump to version 5.1.0 --- CHANGELOG.md | 5 ++++- Gemfile.lock | 2 +- lib/syntax_tree/version.rb | 2 +- 3 files changed, 6 insertions(+), 3 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 71e66403..557fdf5c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a ## [Unreleased] +## [5.1.0] - 2022-12-28 + ### Added - An experiment in working with instruction sequences has been added to Syntax Tree. This is subject to change, so it is not well documented or tested at the moment. It does not impact other functionality. @@ -469,7 +471,8 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) a - 🎉 Initial release! 🎉 -[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.1...HEAD +[unreleased]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.1.0...HEAD +[5.1.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.1...v5.1.0 [5.0.1]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v5.0.0...v5.0.1 [5.0.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v4.3.0...v5.0.0 [4.3.0]: https://github.com/ruby-syntax-tree/syntax_tree/compare/v4.2.0...v4.3.0 diff --git a/Gemfile.lock b/Gemfile.lock index 995fa74e..47d0c66b 100644 --- a/Gemfile.lock +++ b/Gemfile.lock @@ -1,7 +1,7 @@ PATH remote: . specs: - syntax_tree (5.0.1) + syntax_tree (5.1.0) prettier_print (>= 1.2.0) GEM diff --git a/lib/syntax_tree/version.rb b/lib/syntax_tree/version.rb index 340bbbdf..d9bbdfa4 100644 --- a/lib/syntax_tree/version.rb +++ b/lib/syntax_tree/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module SyntaxTree - VERSION = "5.0.1" + VERSION = "5.1.0" end