diff options
author | ydah <[email protected]> | 2025-01-20 17:30:30 +0900 |
---|---|---|
committer | Yuichiro Kaneko <[email protected]> | 2025-01-21 10:16:05 +0900 |
commit | f33a76bfa93898d32841e972f006fd96bd17a1f9 (patch) | |
tree | a3b5c5619979c89691622fa5ea5373a257625ce2 | |
parent | 769cccba56e3a1b7336d0cfadcfb9e0689e8f01f (diff) |
Lrama v0.7.0
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/12595
26 files changed, 629 insertions, 65 deletions
diff --git a/tool/lrama/NEWS.md b/tool/lrama/NEWS.md index 3c3cb1df45..a535332ec3 100644 --- a/tool/lrama/NEWS.md +++ b/tool/lrama/NEWS.md @@ -1,5 +1,65 @@ # NEWS for Lrama +## Lrama 0.7.0 (2025-01-21) + +## [EXPERIMENTAL] Support the generation of the IELR(1) parser described in this paper + +Support the generation of the IELR(1) parser described in this paper. +https://www.sciencedirect.com/science/article/pii/S0167642309001191 + +If you use IELR(1) parser, you can write the following directive in your grammar file. + +```yacc +%define lr.type ielr +``` + +But, currently IELR(1) parser is experimental feature. If you find any bugs, please report it to us. Thank you. + +## Support `-t` option as same as `--debug` option + +Support to `-t` option as same as `--debug` option. +These options align with Bison behavior. So same as `--debug` option. + +## Trace only explicit rules + +Support to trace only explicit rules. +If you use `--trace=rules` option, it shows include mid-rule actions. If you want to show only explicit rules, you can use `--trace=only-explicit-rules` option. + +Example: + +```yacc +%{ +%} +%union { + int i; +} +%token <i> number +%type <i> program +%% +program : number { printf("%d", $1); } number { $$ = $1 + $3; } + ; +%% +``` + +Result of `--trace=rules`: + +```console +$ exe/lrama --trace=rules sample.y +Grammar rules: +$accept -> program YYEOF +$@1 -> ε +program -> number $@1 number +``` + +Result of `--trace=only-explicit-rules`: + +```console +$ exe/lrama --trace=explicit-rules sample.y +Grammar rules: +$accept -> program YYEOF +program -> number number +``` + ## Lrama 0.6.11 (2024-12-23) ### Add support for %type declarations using %nterm in Nonterminal Symbols diff --git a/tool/lrama/lib/lrama/bitmap.rb b/tool/lrama/lib/lrama/bitmap.rb index b2de0248c8..098c6e0b77 100644 --- a/tool/lrama/lib/lrama/bitmap.rb +++ b/tool/lrama/lib/lrama/bitmap.rb @@ -1,7 +1,9 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama module Bitmap + # @rbs (Array[Integer] ary) -> Integer def self.from_array(ary) bit = 0 @@ -12,6 +14,7 @@ module Lrama bit end + # @rbs (Integer int) -> Array[Integer] def self.to_array(int) a = [] #: Array[Integer] i = 0 diff --git a/tool/lrama/lib/lrama/command.rb b/tool/lrama/lib/lrama/command.rb index 0095c1a119..3ff39d578d 100644 --- a/tool/lrama/lib/lrama/command.rb +++ b/tool/lrama/lib/lrama/command.rb @@ -19,7 +19,7 @@ module Lrama text = options.y.read options.y.close if options.y != STDIN begin - grammar = Lrama::Parser.new(text, options.grammar_file, options.debug).parse + grammar = Lrama::Parser.new(text, options.grammar_file, options.debug, options.define).parse unless grammar.no_stdlib stdlib_grammar = Lrama::Parser.new(File.read(STDLIB_FILE_PATH), STDLIB_FILE_PATH, options.debug).parse grammar.insert_before_parameterizing_rules(stdlib_grammar.parameterizing_rules) @@ -34,6 +34,7 @@ module Lrama end states = Lrama::States.new(grammar, trace_state: (options.trace_opts[:automaton] || options.trace_opts[:closure])) states.compute + states.compute_ielr if grammar.ielr_defined? context = Lrama::Context.new(states) if options.report_file diff --git a/tool/lrama/lib/lrama/digraph.rb b/tool/lrama/lib/lrama/digraph.rb index d2bb88101a..2161f30474 100644 --- a/tool/lrama/lib/lrama/digraph.rb +++ b/tool/lrama/lib/lrama/digraph.rb @@ -1,23 +1,52 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama # Algorithm Digraph of https://dl.acm.org/doi/pdf/10.1145/69622.357187 (P. 625) + # + # @rbs generic X < Object -- Type of a member of `sets` + # @rbs generic Y < _Or -- Type of sets assigned to a member of `sets` class Digraph + # TODO: rbs-inline 0.10.0 doesn't support instance variables. + # Move these type declarations above instance variable definitions, once it's supported. + # + # @rbs! + # interface _Or + # def |: (self) -> self + # end + # @sets: Array[X] + # @relation: Hash[X, Array[X]] + # @base_function: Hash[X, Y] + # @stack: Array[X] + # @h: Hash[X, (Integer|Float)?] + # @result: Hash[X, Y] + + # @rbs sets: Array[X] + # @rbs relation: Hash[X, Array[X]] + # @rbs base_function: Hash[X, Y] + # @rbs return: void def initialize(sets, relation, base_function) + # X in the paper @sets = sets + # R in the paper @relation = relation + # F' in the paper @base_function = base_function + # S in the paper @stack = [] + # N in the paper @h = Hash.new(0) + # F in the paper @result = {} end + # @rbs () -> Hash[X, Y] def compute @sets.each do |x| next if @h[x] != 0 @@ -29,6 +58,7 @@ module Lrama private + # @rbs (X x) -> void def traverse(x) @stack.push(x) d = @stack.count diff --git a/tool/lrama/lib/lrama/grammar.rb b/tool/lrama/lib/lrama/grammar.rb index 52625627bc..214ca1a3f2 100644 --- a/tool/lrama/lib/lrama/grammar.rb +++ b/tool/lrama/lib/lrama/grammar.rb @@ -28,14 +28,14 @@ module Lrama attr_reader :percent_codes, :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux, :parameterizing_rule_resolver attr_accessor :union, :expect, :printers, :error_tokens, :lex_param, :parse_param, :initial_action, :after_shift, :before_reduce, :after_reduce, :after_shift_error_token, :after_pop_stack, - :symbols_resolver, :types, :rules, :rule_builders, :sym_to_rules, :no_stdlib, :locations + :symbols_resolver, :types, :rules, :rule_builders, :sym_to_rules, :no_stdlib, :locations, :define def_delegators "@symbols_resolver", :symbols, :nterms, :terms, :add_nterm, :add_term, :find_term_by_s_value, :find_symbol_by_number!, :find_symbol_by_id!, :token_to_symbol, :find_symbol_by_s_value!, :fill_symbol_number, :fill_nterm_type, :fill_printer, :fill_destructor, :fill_error_token, :sort_by_number! - def initialize(rule_counter) + def initialize(rule_counter, define = {}) @rule_counter = rule_counter # Code defined by "%code" @@ -57,6 +57,7 @@ module Lrama @aux = Auxiliary.new @no_stdlib = false @locations = false + @define = define.map {|d| d.split('=') }.to_h append_special_symbols end @@ -171,6 +172,10 @@ module Lrama @sym_to_rules[sym.number] end + def ielr_defined? + @define.key?('lr.type') && @define['lr.type'] == 'ielr' + end + private def compute_nullable @@ -294,7 +299,7 @@ module Lrama end def resolve_inline_rules - while @rule_builders.any? {|r| r.has_inline_rules? } do + while @rule_builders.any?(&:has_inline_rules?) do @rule_builders = @rule_builders.flat_map do |builder| if builder.has_inline_rules? builder.resolve_inline_rules diff --git a/tool/lrama/lib/lrama/grammar/binding.rb b/tool/lrama/lib/lrama/grammar/binding.rb index 5e6e7c594b..2efb918a0b 100644 --- a/tool/lrama/lib/lrama/grammar/binding.rb +++ b/tool/lrama/lib/lrama/grammar/binding.rb @@ -1,34 +1,66 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Grammar class Binding - attr_reader :actual_args, :count + # @rbs @actual_args: Array[Lexer::Token] + # @rbs @param_to_arg: Hash[String, Lexer::Token] - def initialize(parameterizing_rule, actual_args) - @parameters = parameterizing_rule.parameters + # @rbs (Array[Lexer::Token] params, Array[Lexer::Token] actual_args) -> void + def initialize(params, actual_args) @actual_args = actual_args - @parameter_to_arg = @parameters.zip(actual_args).map do |param, arg| - [param.s_value, arg] - end.to_h + @param_to_arg = map_params_to_args(params, @actual_args) end - def resolve_symbol(symbol) - if symbol.is_a?(Lexer::Token::InstantiateRule) - resolved_args = symbol.args.map { |arg| resolve_symbol(arg) } - Lrama::Lexer::Token::InstantiateRule.new(s_value: symbol.s_value, location: symbol.location, args: resolved_args, lhs_tag: symbol.lhs_tag) + # @rbs (Lexer::Token sym) -> Lexer::Token + def resolve_symbol(sym) + if sym.is_a?(Lexer::Token::InstantiateRule) + Lrama::Lexer::Token::InstantiateRule.new( + s_value: sym.s_value, location: sym.location, args: resolved_args(sym), lhs_tag: sym.lhs_tag + ) else - parameter_to_arg(symbol) || symbol + param_to_arg(sym) end end + # @rbs (Lexer::Token::InstantiateRule token) -> String + def concatenated_args_str(token) + "#{token.rule_name}_#{token_to_args_s_values(token).join('_')}" + end + private - def parameter_to_arg(symbol) - if (arg = @parameter_to_arg[symbol.s_value].dup) - arg.alias_name = symbol.alias_name + # @rbs (Array[Lexer::Token] params, Array[Lexer::Token] actual_args) -> Hash[String, Lexer::Token] + def map_params_to_args(params, actual_args) + params.zip(actual_args).map do |param, arg| + [param.s_value, arg] + end.to_h + end + + # @rbs (Lexer::Token::InstantiateRule sym) -> Array[Lexer::Token] + def resolved_args(sym) + sym.args.map { |arg| resolve_symbol(arg) } + end + + # @rbs (Lexer::Token sym) -> Lexer::Token + def param_to_arg(sym) + if (arg = @param_to_arg[sym.s_value].dup) + arg.alias_name = sym.alias_name + end + arg || sym + end + + # @rbs (Lexer::Token::InstantiateRule token) -> Array[String] + def token_to_args_s_values(token) + token.args.flat_map do |arg| + resolved = resolve_symbol(arg) + if resolved.is_a?(Lexer::Token::InstantiateRule) + [resolved.s_value] + resolved.args.map(&:s_value) + else + [resolved.s_value] + end end - arg end end end diff --git a/tool/lrama/lib/lrama/grammar/rule.rb b/tool/lrama/lib/lrama/grammar/rule.rb index 1f55bf8bfb..445752ae0d 100644 --- a/tool/lrama/lib/lrama/grammar/rule.rb +++ b/tool/lrama/lib/lrama/grammar/rule.rb @@ -21,6 +21,14 @@ module Lrama def display_name l = lhs.id.s_value r = empty_rule? ? "ε" : rhs.map {|r| r.id.s_value }.join(" ") + "#{l} -> #{r}" + end + + def display_name_without_action + l = lhs.id.s_value + r = empty_rule? ? "ε" : rhs.map do |r| + r.id.s_value if r.first_set.any? + end.compact.join(" ") "#{l} -> #{r}" end diff --git a/tool/lrama/lib/lrama/grammar/rule_builder.rb b/tool/lrama/lib/lrama/grammar/rule_builder.rb index 06097eb71c..481a3780f4 100644 --- a/tool/lrama/lib/lrama/grammar/rule_builder.rb +++ b/tool/lrama/lib/lrama/grammar/rule_builder.rb @@ -73,7 +73,7 @@ module Lrama inline_rule.rhs_list.each do |inline_rhs| rule_builder = RuleBuilder.new(@rule_counter, @midrule_action_counter, @parameterizing_rule_resolver, lhs_tag: lhs_tag) if token.is_a?(Lexer::Token::InstantiateRule) - resolve_inline_rhs(rule_builder, inline_rhs, i, Binding.new(inline_rule, token.args)) + resolve_inline_rhs(rule_builder, inline_rhs, i, Binding.new(inline_rule.parameters, token.args)) else resolve_inline_rhs(rule_builder, inline_rhs, i) end @@ -135,8 +135,8 @@ module Lrama parameterizing_rule = @parameterizing_rule_resolver.find_rule(token) raise "Unexpected token. #{token}" unless parameterizing_rule - bindings = Binding.new(parameterizing_rule, token.args) - lhs_s_value = lhs_s_value(token, bindings) + bindings = Binding.new(parameterizing_rule.parameters, token.args) + lhs_s_value = bindings.concatenated_args_str(token) if (created_lhs = @parameterizing_rule_resolver.created_lhs(lhs_s_value)) @replaced_rhs << created_lhs else @@ -174,18 +174,6 @@ module Lrama end end - def lhs_s_value(token, bindings) - s_values = token.args.map do |arg| - resolved = bindings.resolve_symbol(arg) - if resolved.is_a?(Lexer::Token::InstantiateRule) - [resolved.s_value, resolved.args.map(&:s_value)] - else - resolved.s_value - end - end - "#{token.rule_name}_#{s_values.join('_')}" - end - def resolve_inline_rhs(rule_builder, inline_rhs, index, bindings = nil) rhs.each_with_index do |token, i| if index == i diff --git a/tool/lrama/lib/lrama/lexer.rb b/tool/lrama/lib/lrama/lexer.rb index 8146628656..c50af82ae4 100644 --- a/tool/lrama/lib/lrama/lexer.rb +++ b/tool/lrama/lib/lrama/lexer.rb @@ -169,12 +169,11 @@ module Lrama def lex_comment until @scanner.eos? do case - when @scanner.scan(/\n/) - newline - when @scanner.scan(/\*\//) + when @scanner.scan_until(/[\s\S]*?\*\//) + @scanner.matched.count("\n").times { newline } return - else - @scanner.getch + when @scanner.scan_until(/\n/) + newline end end end diff --git a/tool/lrama/lib/lrama/lexer/grammar_file.rb b/tool/lrama/lib/lrama/lexer/grammar_file.rb index 45c3122975..37e82ff18d 100644 --- a/tool/lrama/lib/lrama/lexer/grammar_file.rb +++ b/tool/lrama/lib/lrama/lexer/grammar_file.rb @@ -1,30 +1,37 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Lexer class GrammarFile class Text < String + # @rbs () -> String def inspect length <= 50 ? super : "#{self[0..47]}...".inspect end end - attr_reader :path, :text + attr_reader :path #: String + attr_reader :text #: String + # @rbs (String path, String text) -> void def initialize(path, text) @path = path @text = Text.new(text).freeze end + # @rbs () -> String def inspect "<#{self.class}: @path=#{path}, @text=#{text.inspect}>" end + # @rbs (GrammarFile other) -> bool def ==(other) self.class == other.class && self.path == other.path end + # @rbs () -> Array[String] def lines @lines ||= text.split("\n") end diff --git a/tool/lrama/lib/lrama/lexer/location.rb b/tool/lrama/lib/lrama/lexer/location.rb index bf8f4f7e3e..defdbf8a0b 100644 --- a/tool/lrama/lib/lrama/lexer/location.rb +++ b/tool/lrama/lib/lrama/lexer/location.rb @@ -1,10 +1,16 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Lexer class Location - attr_reader :grammar_file, :first_line, :first_column, :last_line, :last_column + attr_reader :grammar_file #: GrammarFile + attr_reader :first_line #: Integer + attr_reader :first_column #: Integer + attr_reader :last_line #: Integer + attr_reader :last_column #: Integer + # @rbs (grammar_file: GrammarFile, first_line: Integer, first_column: Integer, last_line: Integer, last_column: Integer) -> void def initialize(grammar_file:, first_line:, first_column:, last_line:, last_column:) @grammar_file = grammar_file @first_line = first_line @@ -13,6 +19,7 @@ module Lrama @last_column = last_column end + # @rbs (Location other) -> bool def ==(other) self.class == other.class && self.grammar_file == other.grammar_file && @@ -22,6 +29,7 @@ module Lrama self.last_column == other.last_column end + # @rbs (Integer left, Integer right) -> Location def partial_location(left, right) offset = -first_column new_first_line = -1 @@ -52,10 +60,12 @@ module Lrama ) end + # @rbs () -> String def to_s "#{path} (#{first_line},#{first_column})-(#{last_line},#{last_column})" end + # @rbs (String error_message) -> String def generate_error_message(error_message) <<~ERROR.chomp #{path}:#{first_line}:#{first_column}: #{error_message} @@ -63,6 +73,7 @@ module Lrama ERROR end + # @rbs () -> String def line_with_carets <<~TEXT #{text} @@ -72,22 +83,27 @@ module Lrama private + # @rbs () -> String def path grammar_file.path end + # @rbs () -> String def blanks (text[0...first_column] or raise "#{first_column} is invalid").gsub(/[^\t]/, ' ') end + # @rbs () -> String def carets blanks + '^' * (last_column - first_column) end + # @rbs () -> String def text @text ||= _text.join("\n") end + # @rbs () -> Array[String] def _text @_text ||=begin range = (first_line - 1)...last_line diff --git a/tool/lrama/lib/lrama/lexer/token.rb b/tool/lrama/lib/lrama/lexer/token.rb index 45a097f682..63da8be4a4 100644 --- a/tool/lrama/lib/lrama/lexer/token.rb +++ b/tool/lrama/lib/lrama/lexer/token.rb @@ -1,3 +1,4 @@ +# rbs_inline: enabled # frozen_string_literal: true require_relative 'token/char' @@ -9,9 +10,12 @@ require_relative 'token/user_code' module Lrama class Lexer class Token - attr_reader :s_value, :location - attr_accessor :alias_name, :referred + attr_reader :s_value #: String + attr_reader :location #: Location + attr_accessor :alias_name #: String + attr_accessor :referred #: bool + # @rbs (s_value: String, ?alias_name: String, ?location: Location) -> void def initialize(s_value:, alias_name: nil, location: nil) s_value.freeze @s_value = s_value @@ -19,36 +23,44 @@ module Lrama @location = location end + # @rbs () -> String def to_s "value: `#{s_value}`, location: #{location}" end + # @rbs (String string) -> bool def referred_by?(string) [self.s_value, self.alias_name].compact.include?(string) end + # @rbs (Token other) -> bool def ==(other) self.class == other.class && self.s_value == other.s_value end + # @rbs () -> Integer def first_line location.first_line end alias :line :first_line + # @rbs () -> Integer def first_column location.first_column end alias :column :first_column + # @rbs () -> Integer def last_line location.last_line end + # @rbs () -> Integer def last_column location.last_column end + # @rbs (Lrama::Grammar::Reference ref, String message) -> bot def invalid_ref(ref, message) location = self.location.partial_location(ref.first_column, ref.last_column) raise location.generate_error_message(message) diff --git a/tool/lrama/lib/lrama/lexer/token/char.rb b/tool/lrama/lib/lrama/lexer/token/char.rb index 9e21952c42..fcab7a588f 100644 --- a/tool/lrama/lib/lrama/lexer/token/char.rb +++ b/tool/lrama/lib/lrama/lexer/token/char.rb @@ -1,3 +1,4 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama diff --git a/tool/lrama/lib/lrama/lexer/token/ident.rb b/tool/lrama/lib/lrama/lexer/token/ident.rb index 84835c00bc..8b1328a040 100644 --- a/tool/lrama/lib/lrama/lexer/token/ident.rb +++ b/tool/lrama/lib/lrama/lexer/token/ident.rb @@ -1,3 +1,4 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama diff --git a/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb b/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb index db7e611c5f..37d412aa83 100644 --- a/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb +++ b/tool/lrama/lib/lrama/lexer/token/instantiate_rule.rb @@ -1,21 +1,26 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Lexer class Token class InstantiateRule < Token - attr_reader :args, :lhs_tag + attr_reader :args #: Array[Lexer::Token] + attr_reader :lhs_tag #: Lexer::Token::Tag? + # @rbs (s_value: String, ?alias_name: String, ?location: Location, ?args: Array[Lexer::Token], ?lhs_tag: Lexer::Token::Tag?) -> void def initialize(s_value:, alias_name: nil, location: nil, args: [], lhs_tag: nil) super s_value: s_value, alias_name: alias_name, location: location @args = args @lhs_tag = lhs_tag end + # @rbs () -> String def rule_name s_value end + # @rbs () -> Integer def args_count args.count end diff --git a/tool/lrama/lib/lrama/lexer/token/tag.rb b/tool/lrama/lib/lrama/lexer/token/tag.rb index 52dcb50ce7..b346ef7c5c 100644 --- a/tool/lrama/lib/lrama/lexer/token/tag.rb +++ b/tool/lrama/lib/lrama/lexer/token/tag.rb @@ -1,11 +1,13 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Lexer class Token class Tag < Token - # Omit "<>" + # @rbs () -> String def member + # Omit "<>" s_value[1..-2] or raise "Unexpected Tag format (#{s_value})" end end diff --git a/tool/lrama/lib/lrama/lexer/token/user_code.rb b/tool/lrama/lib/lrama/lexer/token/user_code.rb index c606200d7a..4ef40e6dc8 100644 --- a/tool/lrama/lib/lrama/lexer/token/user_code.rb +++ b/tool/lrama/lib/lrama/lexer/token/user_code.rb @@ -1,3 +1,4 @@ +# rbs_inline: enabled # frozen_string_literal: true require "strscan" @@ -6,14 +7,16 @@ module Lrama class Lexer class Token class UserCode < Token - attr_accessor :tag + attr_accessor :tag #: Lexer::Token::Tag + # @rbs () -> Array[Lrama::Grammar::Reference] def references @references ||= _references end private + # @rbs () -> Array[Lrama::Grammar::Reference] def _references scanner = StringScanner.new(s_value) references = [] #: Array[Grammar::Reference] @@ -32,6 +35,7 @@ module Lrama references end + # @rbs (StringScanner scanner) -> Lrama::Grammar::Reference? def scan_reference(scanner) start = scanner.pos case diff --git a/tool/lrama/lib/lrama/logger.rb b/tool/lrama/lib/lrama/logger.rb index e98eef0fa5..88bb920960 100644 --- a/tool/lrama/lib/lrama/logger.rb +++ b/tool/lrama/lib/lrama/logger.rb @@ -1,15 +1,19 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class Logger + # @rbs (IO out) -> void def initialize(out = STDERR) @out = out end + # @rbs (String message) -> void def warn(message) @out << message << "\n" end + # @rbs (String message) -> void def error(message) @out << message << "\n" end diff --git a/tool/lrama/lib/lrama/option_parser.rb b/tool/lrama/lib/lrama/option_parser.rb index 0727d1b37f..23988a5fbb 100644 --- a/tool/lrama/lib/lrama/option_parser.rb +++ b/tool/lrama/lib/lrama/option_parser.rb @@ -59,8 +59,8 @@ module Lrama o.separator '' o.separator 'Tuning the Parser:' o.on('-S', '--skeleton=FILE', 'specify the skeleton to use') {|v| @options.skeleton = v } - o.on('-t', 'reserved, do nothing') { } - o.on('--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true } + o.on('-t', '--debug', 'display debugging outputs of internal parser') {|v| @options.debug = true } + o.on('-D', '--define=NAME[=VALUE]', Array, "similar to '%define NAME VALUE'") {|v| @options.define = v } o.separator '' o.separator 'Output:' o.on('-H', '--header=[FILE]', 'also produce a header file named FILE') {|v| @options.header = true; @options.header_file = v } @@ -86,6 +86,7 @@ module Lrama o.on_tail ' automaton display states' o.on_tail ' closure display states' o.on_tail ' rules display grammar rules' + o.on_tail ' only-explicit-rules display only explicit grammar rules' o.on_tail ' actions display grammar rules with actions' o.on_tail ' time display generation time' o.on_tail ' all include all the above traces' @@ -136,26 +137,27 @@ module Lrama VALID_TRACES = %w[ locations scan parse automaton bitsets closure - grammar rules actions resource sets muscles - tools m4-early m4 skeleton time ielr cex + grammar rules only-explicit-rules actions resource + sets muscles tools m4-early m4 skeleton time ielr cex ].freeze NOT_SUPPORTED_TRACES = %w[ locations scan parse bitsets grammar resource sets muscles tools m4-early m4 skeleton ielr cex ].freeze + SUPPORTED_TRACES = VALID_TRACES - NOT_SUPPORTED_TRACES def validate_trace(trace) h = {} return h if trace.empty? || trace == ['none'] - supported = VALID_TRACES - NOT_SUPPORTED_TRACES + all_traces = SUPPORTED_TRACES - %w[only-explicit-rules] if trace == ['all'] - supported.each { |t| h[t.to_sym] = true } + all_traces.each { |t| h[t.gsub(/-/, '_').to_sym] = true } return h end trace.each do |t| - if supported.include?(t) - h[t.to_sym] = true + if SUPPORTED_TRACES.include?(t) + h[t.gsub(/-/, '_').to_sym] = true else raise "Invalid trace option \"#{t}\"." end diff --git a/tool/lrama/lib/lrama/options.rb b/tool/lrama/lib/lrama/options.rb index ccd7680348..08f75a770f 100644 --- a/tool/lrama/lib/lrama/options.rb +++ b/tool/lrama/lib/lrama/options.rb @@ -7,10 +7,11 @@ module Lrama :report_file, :outfile, :error_recovery, :grammar_file, :trace_opts, :report_opts, - :diagnostic, :y, :debug + :diagnostic, :y, :debug, :define def initialize @skeleton = "bison/yacc.c" + @define = {} @header = false @header_file = nil @report_file = nil diff --git a/tool/lrama/lib/lrama/parser.rb b/tool/lrama/lib/lrama/parser.rb index 237a34d6a8..177e784e5c 100644 --- a/tool/lrama/lib/lrama/parser.rb +++ b/tool/lrama/lib/lrama/parser.rb @@ -658,17 +658,18 @@ module_eval(<<'...end parser.y/module_eval...', 'parser.y', 428) include Lrama::Report::Duration -def initialize(text, path, debug = false) +def initialize(text, path, debug = false, define = {}) @grammar_file = Lrama::Lexer::GrammarFile.new(path, text) @yydebug = debug @rule_counter = Lrama::Grammar::Counter.new(0) @midrule_action_counter = Lrama::Grammar::Counter.new(1) + @define = define end def parse report_duration(:parse) do @lexer = Lrama::Lexer.new(@grammar_file) - @grammar = Lrama::Grammar.new(@rule_counter) + @grammar = Lrama::Grammar.new(@rule_counter, @define) @precedence_number = 0 reset_precs do_parse @@ -914,7 +915,7 @@ racc_reduce_table = [ 2, 73, :_reduce_15, 1, 60, :_reduce_none, 2, 60, :_reduce_17, - 3, 60, :_reduce_none, + 3, 60, :_reduce_18, 2, 60, :_reduce_none, 2, 60, :_reduce_20, 2, 60, :_reduce_21, @@ -1328,7 +1329,12 @@ module_eval(<<'.,.,', 'parser.y', 26) end .,., -# reduce 18 omitted +module_eval(<<'.,.,', 'parser.y', 27) + def _reduce_18(val, _values, result) + @grammar.define[val[1].s_value] = val[2]&.s_value + result + end +.,., # reduce 19 omitted diff --git a/tool/lrama/lib/lrama/state.rb b/tool/lrama/lib/lrama/state.rb index c2623746aa..3008786ced 100644 --- a/tool/lrama/lib/lrama/state.rb +++ b/tool/lrama/lib/lrama/state.rb @@ -10,7 +10,7 @@ module Lrama class State attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts, :default_reduction_rule, :closure, :items - attr_accessor :shifts, :reduces + attr_accessor :shifts, :reduces, :ielr_isocores, :lalr_isocore def initialize(id, accessing_symbol, kernels) @id = id @@ -23,6 +23,12 @@ module Lrama @conflicts = [] @resolved_conflicts = [] @default_reduction_rule = nil + @predecessors = [] + @lalr_isocore = self + @ielr_isocores = [self] + @internal_dependencies = {} + @successor_dependencies = {} + @always_follows = {} end def closure=(closure) @@ -84,6 +90,18 @@ module Lrama @transitions ||= shifts.map {|shift| [shift, @items_to_state[shift.next_items]] } end + def update_transition(shift, next_state) + set_items_to_state(shift.next_items, next_state) + next_state.append_predecessor(self) + clear_transitions_cache + end + + def clear_transitions_cache + @nterm_transitions = nil + @term_transitions = nil + @transitions = nil + end + def selected_term_transitions term_transitions.reject do |shift, next_state| shift.not_selected @@ -142,5 +160,274 @@ module Lrama conflict.type == :reduce_reduce end end + + def propagate_lookaheads(next_state) + next_state.kernels.map {|item| + lookahead_sets = + if item.position == 1 + goto_follow_set(item.lhs) + else + kernel = kernels.find {|k| k.predecessor_item_of?(item) } + item_lookahead_set[kernel] + end + + [item, lookahead_sets & next_state.lookahead_set_filters[item]] + }.to_h + end + + def lookaheads_recomputed + !@item_lookahead_set.nil? + end + + def compatible_lookahead?(filtered_lookahead) + !lookaheads_recomputed || + @lalr_isocore.annotation_list.all? {|token, actions| + a = dominant_contribution(token, actions, item_lookahead_set) + b = dominant_contribution(token, actions, filtered_lookahead) + a.nil? || b.nil? || a == b + } + end + + def lookahead_set_filters + kernels.map {|kernel| + [kernel, + @lalr_isocore.annotation_list.select {|token, actions| + token.term? && actions.any? {|action, contributions| + !contributions.nil? && contributions.key?(kernel) && contributions[kernel] + } + }.map {|token, _| token } + ] + }.to_h + end + + def dominant_contribution(token, actions, lookaheads) + a = actions.select {|action, contributions| + contributions.nil? || contributions.any? {|item, contributed| contributed && lookaheads[item].include?(token) } + }.map {|action, _| action } + return nil if a.empty? + a.reject {|action| + if action.is_a?(State::Shift) + action.not_selected + elsif action.is_a?(State::Reduce) + action.not_selected_symbols.include?(token) + end + } + end + + def inadequacy_list + return @inadequacy_list if @inadequacy_list + + shift_contributions = shifts.map {|shift| + [shift.next_sym, [shift]] + }.to_h + reduce_contributions = reduces.map {|reduce| + (reduce.look_ahead || []).map {|sym| + [sym, [reduce]] + }.to_h + }.reduce(Hash.new([])) {|hash, cont| + hash.merge(cont) {|_, a, b| a | b } + } + + list = shift_contributions.merge(reduce_contributions) {|_, a, b| a | b } + @inadequacy_list = list.select {|token, actions| token.term? && actions.size > 1 } + end + + def annotation_list + return @annotation_list if @annotation_list + + @annotation_list = annotate_manifestation + @annotation_list = @items_to_state.values.map {|next_state| next_state.annotate_predecessor(self) } + .reduce(@annotation_list) {|result, annotations| + result.merge(annotations) {|_, actions_a, actions_b| + if actions_a.nil? || actions_b.nil? + actions_a || actions_b + else + actions_a.merge(actions_b) {|_, contributions_a, contributions_b| + if contributions_a.nil? || contributions_b.nil? + next contributions_a || contributions_b + end + + contributions_a.merge(contributions_b) {|_, contributed_a, contributed_b| + contributed_a || contributed_b + } + } + end + } + } + end + + def annotate_manifestation + inadequacy_list.transform_values {|actions| + actions.map {|action| + if action.is_a?(Shift) + [action, nil] + elsif action.is_a?(Reduce) + if action.rule.empty_rule? + [action, lhs_contributions(action.rule.lhs, inadequacy_list.key(actions))] + else + contributions = kernels.map {|kernel| [kernel, kernel.rule == action.rule && kernel.end_of_rule?] }.to_h + [action, contributions] + end + end + }.to_h + } + end + + def annotate_predecessor(predecessor) + annotation_list.transform_values {|actions| + token = annotation_list.key(actions) + actions.transform_values {|inadequacy| + next nil if inadequacy.nil? + lhs_adequacy = kernels.any? {|kernel| + inadequacy[kernel] && kernel.position == 1 && predecessor.lhs_contributions(kernel.lhs, token).nil? + } + if lhs_adequacy + next nil + else + predecessor.kernels.map {|pred_k| + [pred_k, kernels.any? {|k| + inadequacy[k] && ( + pred_k.predecessor_item_of?(k) && predecessor.item_lookahead_set[pred_k].include?(token) || + k.position == 1 && predecessor.lhs_contributions(k.lhs, token)[pred_k] + ) + }] + }.to_h + end + } + } + end + + def lhs_contributions(sym, token) + shift, next_state = nterm_transitions.find {|sh, _| sh.next_sym == sym } + if always_follows(shift, next_state).include?(token) + nil + else + kernels.map {|kernel| [kernel, follow_kernel_items(shift, next_state, kernel) && item_lookahead_set[kernel].include?(token)] }.to_h + end + end + + def follow_kernel_items(shift, next_state, kernel) + queue = [[self, shift, next_state]] + until queue.empty? + st, sh, next_st = queue.pop + return true if kernel.next_sym == sh.next_sym && kernel.symbols_after_transition.all?(&:nullable) + st.internal_dependencies(sh, next_st).each {|v| queue << v } + end + false + end + + def item_lookahead_set + return @item_lookahead_set if @item_lookahead_set + + kernels.map {|item| + value = + if item.lhs.accept_symbol? + [] + elsif item.position > 1 + prev_items = predecessors_with_item(item) + prev_items.map {|st, i| st.item_lookahead_set[i] }.reduce([]) {|acc, syms| acc |= syms } + elsif item.position == 1 + prev_state = @predecessors.find {|p| p.shifts.any? {|shift| shift.next_sym == item.lhs } } + shift, next_state = prev_state.nterm_transitions.find {|shift, _| shift.next_sym == item.lhs } + prev_state.goto_follows(shift, next_state) + end + [item, value] + }.to_h + end + + def item_lookahead_set=(k) + @item_lookahead_set = k + end + + def predecessors_with_item(item) + result = [] + @predecessors.each do |pre| + pre.items.each do |i| + result << [pre, i] if i.predecessor_item_of?(item) + end + end + result + end + + def append_predecessor(prev_state) + @predecessors << prev_state + @predecessors.uniq! + end + + def goto_follow_set(nterm_token) + return [] if nterm_token.accept_symbol? + shift, next_state = @lalr_isocore.nterm_transitions.find {|sh, _| sh.next_sym == nterm_token } + + @kernels + .select {|kernel| follow_kernel_items(shift, next_state, kernel) } + .map {|kernel| item_lookahead_set[kernel] } + .reduce(always_follows(shift, next_state)) {|result, terms| result |= terms } + end + + def goto_follows(shift, next_state) + queue = internal_dependencies(shift, next_state) + predecessor_dependencies(shift, next_state) + terms = always_follows(shift, next_state) + until queue.empty? + st, sh, next_st = queue.pop + terms |= st.always_follows(sh, next_st) + st.internal_dependencies(sh, next_st).each {|v| queue << v } + st.predecessor_dependencies(sh, next_st).each {|v| queue << v } + end + terms + end + + def always_follows(shift, next_state) + return @always_follows[[shift, next_state]] if @always_follows[[shift, next_state]] + + queue = internal_dependencies(shift, next_state) + successor_dependencies(shift, next_state) + terms = [] + until queue.empty? + st, sh, next_st = queue.pop + terms |= next_st.term_transitions.map {|sh, _| sh.next_sym } + st.internal_dependencies(sh, next_st).each {|v| queue << v } + st.successor_dependencies(sh, next_st).each {|v| queue << v } + end + @always_follows[[shift, next_state]] = terms + end + + def internal_dependencies(shift, next_state) + return @internal_dependencies[[shift, next_state]] if @internal_dependencies[[shift, next_state]] + + syms = @items.select {|i| + i.next_sym == shift.next_sym && i.symbols_after_transition.all?(&:nullable) && i.position == 0 + }.map(&:lhs).uniq + @internal_dependencies[[shift, next_state]] = nterm_transitions.select {|sh, _| syms.include?(sh.next_sym) }.map {|goto| [self, *goto] } + end + + def successor_dependencies(shift, next_state) + return @successor_dependencies[[shift, next_state]] if @successor_dependencies[[shift, next_state]] + + @successor_dependencies[[shift, next_state]] = + next_state.nterm_transitions + .select {|next_shift, _| next_shift.next_sym.nullable } + .map {|transition| [next_state, *transition] } + end + + def predecessor_dependencies(shift, next_state) + state_items = [] + @kernels.select {|kernel| + kernel.next_sym == shift.next_sym && kernel.symbols_after_transition.all?(&:nullable) + }.each do |item| + queue = predecessors_with_item(item) + until queue.empty? + st, i = queue.pop + if i.position == 0 + state_items << [st, i] + else + st.predecessors_with_item(i).each {|v| queue << v } + end + end + end + + state_items.map {|state, item| + sh, next_st = state.nterm_transitions.find {|shi, _| shi.next_sym == item.lhs } + [state, sh, next_st] + } + end end end diff --git a/tool/lrama/lib/lrama/states.rb b/tool/lrama/lib/lrama/states.rb index 0ed4bff9c1..fd8ded905f 100644 --- a/tool/lrama/lib/lrama/states.rb +++ b/tool/lrama/lib/lrama/states.rb @@ -41,6 +41,8 @@ module Lrama # value is array of [state.id, nterm.token_id]. @reads_relation = {} + # `Read(p, A) =s DR(p, A) ∪ ∪{Read(r, C) | (p, A) reads (r, C)}` + # # `@read_sets` is a hash whose # key is [state.id, nterm.token_id], # value is bitmap of term. @@ -62,6 +64,8 @@ module Lrama # value is array of [state.id, nterm.token_id]. @lookback_relation = {} + # `Follow(p, A) =s Read(p, A) ∪ ∪{Follow(p', B) | (p, A) includes (p', B)}` + # # `@follow_sets` is a hash whose # key is [state.id, rule.id], # value is bitmap of term. @@ -92,6 +96,20 @@ module Lrama report_duration(:compute_default_reduction) { compute_default_reduction } end + def compute_ielr + report_duration(:split_states) { split_states } + report_duration(:compute_direct_read_sets) { compute_direct_read_sets } + report_duration(:compute_reads_relation) { compute_reads_relation } + report_duration(:compute_read_sets) { compute_read_sets } + report_duration(:compute_includes_relation) { compute_includes_relation } + report_duration(:compute_lookback_relation) { compute_lookback_relation } + report_duration(:compute_follow_sets) { compute_follow_sets } + report_duration(:compute_look_ahead_sets) { compute_look_ahead_sets } + report_duration(:compute_conflicts) { compute_conflicts } + + report_duration(:compute_default_reduction) { compute_default_reduction } + end + def reporter StatesReporter.new(self) end @@ -235,7 +253,7 @@ module Lrama # Trace previous = state.kernels.first.previous_sym trace_state do |out| - out << sprintf("state_list_append (state = %d, symbol = %d (%s))", + out << sprintf("state_list_append (state = %d, symbol = %d (%s))\n", @states.count, previous.number, previous.display_name) end @@ -265,7 +283,10 @@ module Lrama state.shifts.each do |shift| new_state, created = create_state(shift.next_sym, shift.next_items, states_created) state.set_items_to_state(shift.next_items, new_state) - enqueue_state(states, new_state) if created + if created + enqueue_state(states, new_state) + new_state.append_predecessor(state) + end end end end @@ -524,5 +545,51 @@ module Lrama end.first end end + + def split_states + @states.each do |state| + state.transitions.each do |shift, next_state| + compute_state(state, shift, next_state) + end + end + end + + def merge_lookaheads(state, filtered_lookaheads) + return if state.kernels.all? {|item| (filtered_lookaheads[item] - state.item_lookahead_set[item]).empty? } + + state.item_lookahead_set = state.item_lookahead_set.merge {|_, v1, v2| v1 | v2 } + state.transitions.each do |shift, next_state| + next if next_state.lookaheads_recomputed + compute_state(state, shift, next_state) + end + end + + def compute_state(state, shift, next_state) + filtered_lookaheads = state.propagate_lookaheads(next_state) + s = next_state.ielr_isocores.find {|st| st.compatible_lookahead?(filtered_lookaheads) } + + if s.nil? + s = next_state.ielr_isocores.last + new_state = State.new(@states.count, s.accessing_symbol, s.kernels) + new_state.closure = s.closure + new_state.compute_shifts_reduces + s.transitions.each do |sh, next_state| + new_state.set_items_to_state(sh.next_items, next_state) + end + @states << new_state + new_state.lalr_isocore = s + s.ielr_isocores << new_state + s.ielr_isocores.each do |st| + st.ielr_isocores = s.ielr_isocores + end + new_state.item_lookahead_set = filtered_lookaheads + state.update_transition(shift, new_state) + elsif(!s.lookaheads_recomputed) + s.item_lookahead_set = filtered_lookaheads + else + state.update_transition(shift, s) + merge_lookaheads(s, filtered_lookaheads) + end + end end end diff --git a/tool/lrama/lib/lrama/states/item.rb b/tool/lrama/lib/lrama/states/item.rb index 5074e943b7..e89cb9695b 100644 --- a/tool/lrama/lib/lrama/states/item.rb +++ b/tool/lrama/lib/lrama/states/item.rb @@ -64,6 +64,10 @@ module Lrama rhs[position..-1] end + def symbols_after_transition + rhs[position+1..-1] + end + def to_s "#{lhs.id.s_value}: #{display_name}" end @@ -78,6 +82,10 @@ module Lrama r = symbols_after_dot.map(&:display_name).join(" ") ". #{r} (rule #{rule_id})" end + + def predecessor_item_of?(other_item) + rule == other_item.rule && position == other_item.position - 1 + end end end end diff --git a/tool/lrama/lib/lrama/trace_reporter.rb b/tool/lrama/lib/lrama/trace_reporter.rb index 87c01a4c8a..bcf1ef1e50 100644 --- a/tool/lrama/lib/lrama/trace_reporter.rb +++ b/tool/lrama/lib/lrama/trace_reporter.rb @@ -1,27 +1,42 @@ +# rbs_inline: enabled # frozen_string_literal: true module Lrama class TraceReporter + # @rbs (Lrama::Grammar grammar) -> void def initialize(grammar) @grammar = grammar end + # @rbs (**Hash[Symbol, bool] options) -> void def report(**options) _report(**options) end private - def _report(rules: false, actions: false, **_) - report_rules if rules + # @rbs rules: (bool rules, bool actions, bool only_explicit_rules, **untyped _) -> void + def _report(rules: false, actions: false, only_explicit_rules: false, **_) + report_rules if rules && !only_explicit_rules + report_only_explicit_rules if only_explicit_rules report_actions if actions end + # @rbs () -> void def report_rules puts "Grammar rules:" @grammar.rules.each { |rule| puts rule.display_name } end + # @rbs () -> void + def report_only_explicit_rules + puts "Grammar rules:" + @grammar.rules.each do |rule| + puts rule.display_name_without_action if rule.lhs.first_set.any? + end + end + + # @rbs () -> void def report_actions puts "Grammar rules with actions:" @grammar.rules.each { |rule| puts rule.with_actions } diff --git a/tool/lrama/lib/lrama/version.rb b/tool/lrama/lib/lrama/version.rb index ff55e34b51..12ece5a8f2 100644 --- a/tool/lrama/lib/lrama/version.rb +++ b/tool/lrama/lib/lrama/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module Lrama - VERSION = "0.6.11".freeze + VERSION = "0.7.0".freeze end |