diff options
author | yui-knk <[email protected]> | 2023-08-16 21:31:54 +0900 |
---|---|---|
committer | Yuichiro Kaneko <[email protected]> | 2023-08-17 19:29:36 +0900 |
commit | 8c447cffe441e68d73d6d6b1235b574439c34546 (patch) | |
tree | cdadefd511db6180cf5c1bcfce0f0496a61ec18f | |
parent | d26b015e83e499e3bad61d6f2ef88eeca55fdedc (diff) |
Lrama v0.5.4
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/8236
27 files changed, 871 insertions, 168 deletions
diff --git a/tool/lrama/lib/lrama.rb b/tool/lrama/lib/lrama.rb index 19f579c330..12e635d8b6 100644 --- a/tool/lrama/lib/lrama.rb +++ b/tool/lrama/lib/lrama.rb @@ -1,6 +1,7 @@ require "lrama/bitmap" require "lrama/command" require "lrama/context" +require "lrama/counterexamples" require "lrama/digraph" require "lrama/grammar" require "lrama/lexer" @@ -10,5 +11,6 @@ require "lrama/report" require "lrama/state" require "lrama/states" require "lrama/states_reporter" +require "lrama/type" require "lrama/version" require "lrama/warning" diff --git a/tool/lrama/lib/lrama/command.rb b/tool/lrama/lib/lrama/command.rb index 2d37639d69..9fceb55fe0 100644 --- a/tool/lrama/lib/lrama/command.rb +++ b/tool/lrama/lib/lrama/command.rb @@ -67,7 +67,7 @@ module Lrama bison_list = %w[states itemsets lookaheads solved counterexamples cex all none] others = %w[verbose] list = bison_list + others - not_supported = %w[counterexamples cex none] + not_supported = %w[cex none] h = { grammar: true } report.each do |r| @@ -121,13 +121,13 @@ module Lrama # Output Files: opt.on('-h', '--header=[FILE]') {|v| @header = true; @header_file = v } opt.on('-d') { @header = true } - opt.on('-r', '--report=THINGS') {|v| @report = v.split(',') } + opt.on('-r', '--report=THINGS', Array) {|v| @report = v } opt.on('--report-file=FILE') {|v| @report_file = v } opt.on('-v') { } # Do nothing opt.on('-o', '--output=FILE') {|v| @outfile = v } # Hidden - opt.on('--trace=THINGS') {|v| @trace = v.split(',') } + opt.on('--trace=THINGS', Array) {|v| @trace = v } # Error Recovery opt.on('-e') {|v| @error_recovery = true } diff --git a/tool/lrama/lib/lrama/counterexamples.rb b/tool/lrama/lib/lrama/counterexamples.rb new file mode 100644 index 0000000000..a5d62a0c7c --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples.rb @@ -0,0 +1,285 @@ +require "set" + +require "lrama/counterexamples/derivation" +require "lrama/counterexamples/example" +require "lrama/counterexamples/path" +require "lrama/counterexamples/state_item" +require "lrama/counterexamples/triple" + +module Lrama + # See: https://www.cs.cornell.edu/andru/papers/cupex/cupex.pdf + # 4. Constructing Nonunifying Counterexamples + class Counterexamples + attr_reader :transitions, :productions + + def initialize(states) + @states = states + setup_transitions + setup_productions + end + + def to_s + "#<Counterexamples>" + end + alias :inspect :to_s + + def compute(conflict_state) + conflict_state.conflicts.flat_map do |conflict| + case conflict.type + when :shift_reduce + shift_reduce_example(conflict_state, conflict) + when :reduce_reduce + reduce_reduce_examples(conflict_state, conflict) + end + end.compact + end + + private + + def setup_transitions + # Hash [StateItem, Symbol] => StateItem + @transitions = {} + # Hash [StateItem, Symbol] => Set(StateItem) + @reverse_transitions = {} + + @states.states.each do |src_state| + trans = {} + + src_state.transitions.each do |shift, next_state| + trans[shift.next_sym] = next_state + end + + src_state.items.each do |src_item| + next if src_item.end_of_rule? + sym = src_item.next_sym + dest_state = trans[sym] + + dest_state.kernels.each do |dest_item| + next unless (src_item.rule == dest_item.rule) && (src_item.position + 1 == dest_item.position) + src_state_item = StateItem.new(src_state, src_item) + dest_state_item = StateItem.new(dest_state, dest_item) + + @transitions[[src_state_item, sym]] = dest_state_item + + key = [dest_state_item, sym] + @reverse_transitions[key] ||= Set.new + @reverse_transitions[key] << src_state_item + end + end + end + end + + def setup_productions + # Hash [StateItem] => Set(Item) + @productions = {} + # Hash [State, Symbol] => Set(Item). Symbol is nterm + @reverse_productions = {} + + @states.states.each do |state| + # LHS => Set(Item) + h = {} + + state.closure.each do |item| + sym = item.lhs + + h[sym] ||= Set.new + h[sym] << item + end + + state.items.each do |item| + next if item.end_of_rule? + next if item.next_sym.term? + + sym = item.next_sym + state_item = StateItem.new(state, item) + key = [state, sym] + + @productions[state_item] = h[sym] + + @reverse_productions[key] ||= Set.new + @reverse_productions[key] << item + end + end + end + + def shift_reduce_example(conflict_state, conflict) + conflict_symbol = conflict.symbols.first + shift_conflict_item = conflict_state.items.find { |item| item.next_sym == conflict_symbol } + path2 = shortest_path(conflict_state, conflict.reduce.item, conflict_symbol) + path1 = find_shift_conflict_shortest_path(path2, conflict_state, shift_conflict_item) + + Example.new(path1, path2, conflict, conflict_symbol, self) + end + + def reduce_reduce_examples(conflict_state, conflict) + conflict_symbol = conflict.symbols.first + path1 = shortest_path(conflict_state, conflict.reduce1.item, conflict_symbol) + path2 = shortest_path(conflict_state, conflict.reduce2.item, conflict_symbol) + + Example.new(path1, path2, conflict, conflict_symbol, self) + end + + def find_shift_conflict_shortest_path(reduce_path, conflict_state, conflict_item) + state_items = find_shift_conflict_shortest_state_items(reduce_path, conflict_state, conflict_item) + build_paths_from_state_items(state_items) + end + + def find_shift_conflict_shortest_state_items(reduce_path, conflict_state, conflict_item) + target_state_item = StateItem.new(conflict_state, conflict_item) + result = [target_state_item] + reversed_reduce_path = reduce_path.to_a.reverse + # Index for state_item + i = 0 + + while (path = reversed_reduce_path[i]) + # Index for prev_state_item + j = i + 1 + _j = j + + while (prev_path = reversed_reduce_path[j]) + if prev_path.production? + j += 1 + else + break + end + end + + state_item = path.to + prev_state_item = prev_path&.to + + if target_state_item == state_item || target_state_item.item.start_item? + result.concat(reversed_reduce_path[_j..-1].map(&:to)) + break + end + + if target_state_item.item.beginning_of_rule? + queue = [] + queue << [target_state_item] + + # Find reverse production + while (sis = queue.shift) + si = sis.last + + # Reach to start state + if si.item.start_item? + sis.shift + result.concat(sis) + target_state_item = si + break + end + + if !si.item.beginning_of_rule? + key = [si, si.item.previous_sym] + @reverse_transitions[key].each do |prev_target_state_item| + next if prev_target_state_item.state != prev_state_item.state + sis.shift + result.concat(sis) + result << prev_target_state_item + target_state_item = prev_target_state_item + i = j + queue.clear + break + end + else + key = [si.state, si.item.lhs] + @reverse_productions[key].each do |item| + state_item = StateItem.new(si.state, item) + queue << (sis + [state_item]) + end + end + end + else + # Find reverse transition + key = [target_state_item, target_state_item.item.previous_sym] + @reverse_transitions[key].each do |prev_target_state_item| + next if prev_target_state_item.state != prev_state_item.state + result << prev_target_state_item + target_state_item = prev_target_state_item + i = j + break + end + end + end + + result.reverse + end + + def build_paths_from_state_items(state_items) + paths = state_items.zip([nil] + state_items).map do |si, prev_si| + case + when prev_si.nil? + StartPath.new(si) + when si.item.beginning_of_rule? + ProductionPath.new(prev_si, si) + else + TransitionPath.new(prev_si, si) + end + end + + paths + end + + def shortest_path(conflict_state, conflict_reduce_item, conflict_term) + # queue: is an array of [Triple, [Path]] + queue = [] + visited = {} + start_state = @states.states.first + raise "BUG: Start state should be just one kernel." if start_state.kernels.count != 1 + + start = Triple.new(start_state, start_state.kernels.first, Set.new([@states.eof_symbol])) + + queue << [start, [StartPath.new(start.state_item)]] + + while true + triple, paths = queue.shift + + next if visited[triple] + visited[triple] = true + + # Found + if triple.state == conflict_state && triple.item == conflict_reduce_item && triple.l.include?(conflict_term) + return paths + end + + # transition + triple.state.transitions.each do |shift, next_state| + next unless triple.item.next_sym && triple.item.next_sym == shift.next_sym + next_state.kernels.each do |kernel| + next if kernel.rule != triple.item.rule + t = Triple.new(next_state, kernel, triple.l) + queue << [t, paths + [TransitionPath.new(triple.state_item, t.state_item)]] + end + end + + # production step + triple.state.closure.each do |item| + next unless triple.item.next_sym && triple.item.next_sym == item.lhs + l = follow_l(triple.item, triple.l) + t = Triple.new(triple.state, item, l) + queue << [t, paths + [ProductionPath.new(triple.state_item, t.state_item)]] + end + + break if queue.empty? + end + + return nil + end + + def follow_l(item, current_l) + # 1. follow_L (A -> X1 ... Xn-1 • Xn) = L + # 2. follow_L (A -> X1 ... Xk • Xk+1 Xk+2 ... Xn) = {Xk+2} if Xk+2 is a terminal + # 3. follow_L (A -> X1 ... Xk • Xk+1 Xk+2 ... Xn) = FIRST(Xk+2) if Xk+2 is a nonnullable nonterminal + # 4. follow_L (A -> X1 ... Xk • Xk+1 Xk+2 ... Xn) = FIRST(Xk+2) + follow_L (A -> X1 ... Xk+1 • Xk+2 ... Xn) if Xk+2 is a nullable nonterminal + case + when item.number_of_rest_symbols == 1 + current_l + when item.next_next_sym.term? + Set.new([item.next_next_sym]) + when !item.next_next_sym.nullable + item.next_next_sym.first_set + else + item.next_next_sym.first_set + follow_l(item.new_by_next_position, current_l) + end + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples/derivation.rb b/tool/lrama/lib/lrama/counterexamples/derivation.rb new file mode 100644 index 0000000000..691e935356 --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples/derivation.rb @@ -0,0 +1,63 @@ +module Lrama + class Counterexamples + class Derivation + attr_reader :item, :left, :right + attr_writer :right + + def initialize(item, left, right = nil) + @item = item + @left = left + @right = right + end + + def to_s + "#<Derivation(#{item.display_name})>" + end + alias :inspect :to_s + + def render_strings_for_report + result = [] + _render_for_report(self, 0, result, 0) + result.map(&:rstrip) + end + + def render_for_report + render_strings_for_report.join("\n") + end + + private + + def _render_for_report(derivation, offset, strings, index) + item = derivation.item + if strings[index] + strings[index] << " " * (offset - strings[index].length) + else + strings[index] = " " * offset + end + str = strings[index] + str << "#{item.rule_id}: #{item.symbols_before_dot.map(&:display_name).join(" ")} " + + if derivation.left + len = str.length + str << "#{item.next_sym.display_name}" + length = _render_for_report(derivation.left, len, strings, index + 1) + # I want String#ljust! + str << " " * (length - str.length) + else + str << " • #{item.symbols_after_dot.map(&:display_name).join(" ")} " + return str.length + end + + if derivation.right&.left + length = _render_for_report(derivation.right.left, str.length, strings, index + 1) + str << "#{item.symbols_after_dot[1..-1].map(&:display_name).join(" ")} " + str << " " * (length - str.length) if length > str.length + elsif item.next_next_sym + str << "#{item.symbols_after_dot[1..-1].map(&:display_name).join(" ")} " + end + + return str.length + end + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples/example.rb b/tool/lrama/lib/lrama/counterexamples/example.rb new file mode 100644 index 0000000000..8f02d71fa4 --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples/example.rb @@ -0,0 +1,124 @@ +module Lrama + class Counterexamples + class Example + attr_reader :path1, :path2, :conflict, :conflict_symbol + + # path1 is shift conflict when S/R conflict + # path2 is always reduce conflict + def initialize(path1, path2, conflict, conflict_symbol, counterexamples) + @path1 = path1 + @path2 = path2 + @conflict = conflict + @conflict_symbol = conflict_symbol + @counterexamples = counterexamples + end + + def type + @conflict.type + end + + def path1_item + @path1.last.to.item + end + + def path2_item + @path2.last.to.item + end + + def derivations1 + @derivations1 ||= _derivations(path1) + end + + def derivations2 + @derivations2 ||= _derivations(path2) + end + + private + + def _derivations(paths) + derivation = nil + current = :production + lookahead_sym = paths.last.to.item.end_of_rule? ? @conflict_symbol : nil + + paths.reverse.each do |path| + item = path.to.item + + case current + when :production + case path + when StartPath + derivation = Derivation.new(item, derivation) + current = :start + when TransitionPath + derivation = Derivation.new(item, derivation) + current = :transition + when ProductionPath + derivation = Derivation.new(item, derivation) + current = :production + end + + if lookahead_sym && item.next_next_sym && item.next_next_sym.first_set.include?(lookahead_sym) + state_item = @counterexamples.transitions[[path.to, item.next_sym]] + derivation2 = find_derivation_for_symbol(state_item, lookahead_sym) + derivation.right = derivation2 + lookahead_sym = nil + end + + when :transition + case path + when StartPath + derivation = Derivation.new(item, derivation) + current = :start + when TransitionPath + # ignore + current = :transition + when ProductionPath + # ignore + current = :production + end + else + raise "BUG: Unknown #{current}" + end + + break if current == :start + end + + derivation + end + + def find_derivation_for_symbol(state_item, sym) + queue = [] + queue << [state_item] + + while (sis = queue.shift) + si = sis.last + next_sym = si.item.next_sym + + if next_sym == sym + derivation = nil + + sis.reverse.each do |si| + derivation = Derivation.new(si.item, derivation) + end + + return derivation + end + + if next_sym.nterm? && next_sym.first_set.include?(sym) + @counterexamples.productions[si].each do |next_item| + next if next_item.empty_rule? + next_si = StateItem.new(si.state, next_item) + next if sis.include?(next_si) + queue << (sis + [next_si]) + end + + if next_sym.nullable + next_si = @counterexamples.transitions[[si, next_sym]] + queue << (sis + [next_si]) + end + end + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples/path.rb b/tool/lrama/lib/lrama/counterexamples/path.rb new file mode 100644 index 0000000000..a4caecd765 --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples/path.rb @@ -0,0 +1,69 @@ +module Lrama + class Counterexamples + class Path + def initialize(from_state_item, to_state_item) + @from_state_item = from_state_item + @to_state_item = to_state_item + end + + def from + @from_state_item + end + + def to + @to_state_item + end + + def to_s + "#<Path(#{type})>" + end + alias :inspect :to_s + end + + class StartPath < Path + def initialize(to_state_item) + super nil, to_state_item + end + + def type + :start + end + + def transition? + false + end + + def production? + false + end + end + + class TransitionPath < Path + def type + :transition + end + + def transition? + true + end + + def production? + false + end + end + + class ProductionPath < Path + def type + :production + end + + def transition? + false + end + + def production? + true + end + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples/state_item.rb b/tool/lrama/lib/lrama/counterexamples/state_item.rb new file mode 100644 index 0000000000..930ff4a5f8 --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples/state_item.rb @@ -0,0 +1,6 @@ +module Lrama + class Counterexamples + class StateItem < Struct.new(:state, :item) + end + end +end diff --git a/tool/lrama/lib/lrama/counterexamples/triple.rb b/tool/lrama/lib/lrama/counterexamples/triple.rb new file mode 100644 index 0000000000..e802beccf4 --- /dev/null +++ b/tool/lrama/lib/lrama/counterexamples/triple.rb @@ -0,0 +1,21 @@ +module Lrama + class Counterexamples + # s: state + # itm: item within s + # l: precise lookahead set + class Triple < Struct.new(:s, :itm, :l) + alias :state :s + alias :item :itm + alias :precise_lookahead_set :l + + def state_item + StateItem.new(state, item) + end + + def inspect + "#{state.inspect}. #{item.display_name}. #{l.map(&:id).map(&:s_value)}" + end + alias :to_s :inspect + end + end +end diff --git a/tool/lrama/lib/lrama/digraph.rb b/tool/lrama/lib/lrama/digraph.rb index 28f26781b1..c48b3f4041 100644 --- a/tool/lrama/lib/lrama/digraph.rb +++ b/tool/lrama/lib/lrama/digraph.rb @@ -33,7 +33,7 @@ module Lrama @h[x] = d @result[x] = @base_function[x] # F x = F' x - @relation[x] && @relation[x].each do |y| + @relation[x]&.each do |y| traverse(y) if @h[y] == 0 @h[x] = [@h[x], @h[y]].min @result[x] |= @result[y] # F x = F x + F y @@ -43,9 +43,8 @@ module Lrama while true do z = @stack.pop @h[z] = Float::INFINITY - @result[z] = @result[x] # F (Top of S) = F x - break if z == x + @result[z] = @result[x] # F (Top of S) = F x end end end diff --git a/tool/lrama/lib/lrama/grammar.rb b/tool/lrama/lib/lrama/grammar.rb index a4f98ecc83..25f1a49170 100644 --- a/tool/lrama/lib/lrama/grammar.rb +++ b/tool/lrama/lib/lrama/grammar.rb @@ -1,3 +1,4 @@ +require "lrama/grammar/auxiliary" require "lrama/grammar/code" require "lrama/grammar/error_token" require "lrama/grammar/precedence" @@ -7,16 +8,13 @@ require "lrama/grammar/rule" require "lrama/grammar/symbol" require "lrama/grammar/union" require "lrama/lexer" +require "lrama/type" module Lrama - Type = Struct.new(:id, :tag, keyword_init: true) Token = Lrama::Lexer::Token # Grammar is the result of parsing an input grammar file class Grammar - # Grammar file information not used by States but by Output - Aux = Struct.new(:prologue_first_lineno, :prologue, :epilogue_first_lineno, :epilogue, keyword_init: true) - attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux attr_accessor :union, :expect, :printers, :error_tokens, @@ -38,7 +36,7 @@ module Lrama @error_symbol = nil @undef_symbol = nil @accept_symbol = nil - @aux = Aux.new + @aux = Auxiliary.new append_special_symbols end @@ -48,7 +46,7 @@ module Lrama end def add_error_token(ident_or_tags:, code:, lineno:) - @error_tokens << ErrorToken.new(ident_or_tags, code, lineno) + @error_tokens << ErrorToken.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno) end def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false) @@ -215,6 +213,41 @@ module Lrama end end + def compute_first_set + terms.each do |term| + term.first_set = Set.new([term]).freeze + term.first_set_bitmap = Lrama::Bitmap.from_array([term.number]) + end + + nterms.each do |nterm| + nterm.first_set = Set.new([]).freeze + nterm.first_set_bitmap = Lrama::Bitmap.from_array([]) + end + + while true do + changed = false + + @rules.each do |rule| + rule.rhs.each do |r| + if rule.lhs.first_set_bitmap | r.first_set_bitmap != rule.lhs.first_set_bitmap + changed = true + rule.lhs.first_set_bitmap = rule.lhs.first_set_bitmap | r.first_set_bitmap + end + + break unless r.nullable + end + end + + break unless changed + end + + nterms.each do |nterm| + nterm.first_set = Lrama::Bitmap.to_array(nterm.first_set_bitmap).map do |number| + find_symbol_by_number!(number) + end.to_set + end + end + def find_symbol_by_s_value(s_value) @symbols.find do |sym| sym.id.s_value == s_value diff --git a/tool/lrama/lib/lrama/grammar/auxiliary.rb b/tool/lrama/lib/lrama/grammar/auxiliary.rb new file mode 100644 index 0000000000..933574b0f6 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/auxiliary.rb @@ -0,0 +1,7 @@ +module Lrama + class Grammar + # Grammar file information not used by States but by Output + class Auxiliary < Struct.new(:prologue_first_lineno, :prologue, :epilogue_first_lineno, :epilogue, keyword_init: true) + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/rule.rb b/tool/lrama/lib/lrama/grammar/rule.rb index 7ed5b3312c..c559388b62 100644 --- a/tool/lrama/lib/lrama/grammar/rule.rb +++ b/tool/lrama/lib/lrama/grammar/rule.rb @@ -17,6 +17,12 @@ module Lrama "#{l}: #{r}" end + # opt_nl: ε <-- empty_rule + # | '\n' <-- not empty_rule + def empty_rule? + rhs.empty? + end + def precedence precedence_sym&.precedence end diff --git a/tool/lrama/lib/lrama/grammar/symbol.rb b/tool/lrama/lib/lrama/grammar/symbol.rb index 28916eb54f..9df1c2f636 100644 --- a/tool/lrama/lib/lrama/grammar/symbol.rb +++ b/tool/lrama/lib/lrama/grammar/symbol.rb @@ -7,6 +7,7 @@ module Lrama class Grammar class Symbol < Struct.new(:id, :alias_name, :number, :tag, :term, :token_id, :nullable, :precedence, :printer, :error_token, keyword_init: true) + attr_accessor :first_set, :first_set_bitmap attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol def term? @@ -34,11 +35,7 @@ module Lrama end def display_name - if alias_name - alias_name - else - id.s_value - end + alias_name || id.s_value end # name for yysymbol_kind_t @@ -51,11 +48,7 @@ module Lrama when eof_symbol? name = "YYEOF" when term? && id.type == Token::Char - if alias_name - name = number.to_s + alias_name - else - name = number.to_s + id.s_value - end + name = number.to_s + display_name when term? && id.type == Token::Ident name = id.s_value when nterm? && (id.s_value.include?("$") || id.s_value.include?("@")) @@ -66,7 +59,7 @@ module Lrama raise "Unexpected #{self}" end - "YYSYMBOL_" + name.gsub(/[^a-zA-Z_0-9]+/, "_") + "YYSYMBOL_" + name.gsub(/\W+/, "_") end # comment for yysymbol_kind_t diff --git a/tool/lrama/lib/lrama/lexer/token.rb b/tool/lrama/lib/lrama/lexer/token.rb index 29ce48b2fa..aa49bfe47d 100644 --- a/tool/lrama/lib/lrama/lexer/token.rb +++ b/tool/lrama/lib/lrama/lexer/token.rb @@ -1,7 +1,8 @@ +require 'lrama/lexer/token/type' + module Lrama class Lexer - class Token < Struct.new(:type, :s_value, :alias, keyword_init: true) - Type = Struct.new(:id, :name, keyword_init: true) + class Token attr_accessor :line, :column, :referred # For User_code diff --git a/tool/lrama/lib/lrama/lexer/token/type.rb b/tool/lrama/lib/lrama/lexer/token/type.rb new file mode 100644 index 0000000000..d58a56f40b --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token/type.rb @@ -0,0 +1,8 @@ +module Lrama + class Lexer + class Token < Struct.new(:type, :s_value, :alias, keyword_init: true) + class Type < Struct.new(:id, :name, keyword_init: true) + end + end + end +end diff --git a/tool/lrama/lib/lrama/output.rb b/tool/lrama/lib/lrama/output.rb index dd71814a11..757d16e25a 100644 --- a/tool/lrama/lib/lrama/output.rb +++ b/tool/lrama/lib/lrama/output.rb @@ -252,7 +252,7 @@ module Lrama end def extract_param_name(param) - /\A(.)+([a-zA-Z0-9_]+)\z/.match(param)[2] + /\A(\W*)([a-zA-Z0-9_]+)\z/.match(param.split.last)[2] end def parse_param_name diff --git a/tool/lrama/lib/lrama/parser.rb b/tool/lrama/lib/lrama/parser.rb index 4faa402d1b..a12c8bbb34 100644 --- a/tool/lrama/lib/lrama/parser.rb +++ b/tool/lrama/lib/lrama/parser.rb @@ -22,6 +22,7 @@ module Lrama process_epilogue(grammar, lexer) grammar.prepare grammar.compute_nullable + grammar.compute_first_set grammar.validate! grammar diff --git a/tool/lrama/lib/lrama/parser/token_scanner.rb b/tool/lrama/lib/lrama/parser/token_scanner.rb index 1ff67b30a3..992b5742d3 100644 --- a/tool/lrama/lib/lrama/parser/token_scanner.rb +++ b/tool/lrama/lib/lrama/parser/token_scanner.rb @@ -11,7 +11,7 @@ module Lrama end def current_type - current_token && current_token.type + current_token&.type end def previous_token @@ -26,9 +26,7 @@ module Lrama def consume(*token_types) if token_types.include?(current_type) - token = current_token - self.next - return token + return self.next end return nil @@ -42,8 +40,7 @@ module Lrama a = [] while token_types.include?(current_type) - a << current_token - self.next + a << self.next end raise "No token is consumed. #{token_types}" if a.empty? diff --git a/tool/lrama/lib/lrama/state.rb b/tool/lrama/lib/lrama/state.rb index b868035e1a..6632bafecc 100644 --- a/tool/lrama/lib/lrama/state.rb +++ b/tool/lrama/lib/lrama/state.rb @@ -1,11 +1,11 @@ require "lrama/state/reduce" -require "lrama/state/shift" +require "lrama/state/reduce_reduce_conflict" require "lrama/state/resolved_conflict" +require "lrama/state/shift" +require "lrama/state/shift_reduce_conflict" module Lrama class State - Conflict = Struct.new(:symbols, :reduce, :type, keyword_init: true) - attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts, :default_reduction_rule, :closure, :items attr_accessor :shifts, :reduces @@ -101,6 +101,10 @@ module Lrama @term_transitions end + def transitions + term_transitions + nterm_transitions + end + def selected_term_transitions term_transitions.select do |shift, next_state| !shift.not_selected @@ -144,6 +148,10 @@ module Lrama end end + def has_conflicts? + end + def sr_conflicts @conflicts.select do |conflict| conflict.type == :shift_reduce diff --git a/tool/lrama/lib/lrama/state/reduce_reduce_conflict.rb b/tool/lrama/lib/lrama/state/reduce_reduce_conflict.rb new file mode 100644 index 0000000000..0a0e4dc20a --- /dev/null +++ b/tool/lrama/lib/lrama/state/reduce_reduce_conflict.rb @@ -0,0 +1,9 @@ +module Lrama + class State + class ReduceReduceConflict < Struct.new(:symbols, :reduce1, :reduce2, keyword_init: true) + def type + :reduce_reduce + end + end + end +end diff --git a/tool/lrama/lib/lrama/state/shift_reduce_conflict.rb b/tool/lrama/lib/lrama/state/shift_reduce_conflict.rb new file mode 100644 index 0000000000..f80bd5f352 --- /dev/null +++ b/tool/lrama/lib/lrama/state/shift_reduce_conflict.rb @@ -0,0 +1,9 @@ +module Lrama + class State + class ShiftReduceConflict < Struct.new(:symbols, :shift, :reduce, keyword_init: true) + def type + :shift_reduce + end + end + end +end diff --git a/tool/lrama/lib/lrama/states.rb b/tool/lrama/lib/lrama/states.rb index cf26416a37..d27c5411ea 100644 --- a/tool/lrama/lib/lrama/states.rb +++ b/tool/lrama/lib/lrama/states.rb @@ -102,43 +102,27 @@ module Lrama end def direct_read_sets - h = {} - - @direct_read_sets.each do |k, v| - h[k] = bitmap_to_terms(v) + @direct_read_sets.transform_values do |v| + bitmap_to_terms(v) end - - return h end def read_sets - h = {} - - @read_sets.each do |k, v| - h[k] = bitmap_to_terms(v) + @read_sets.transform_values do |v| + bitmap_to_terms(v) end - - return h end def follow_sets - h = {} - - @follow_sets.each do |k, v| - h[k] = bitmap_to_terms(v) + @follow_sets.transform_values do |v| + bitmap_to_terms(v) end - - return h end def la - h = {} - - @la.each do |k, v| - h[k] = bitmap_to_terms(v) + @la.transform_values do |v| + bitmap_to_terms(v) end - - return h end private @@ -452,7 +436,7 @@ module Lrama # Can resolve only when both have prec unless shift_prec && reduce_prec - state.conflicts << State::Conflict.new(symbols: [sym], reduce: reduce, type: :shift_reduce) + state.conflicts << State::ShiftReduceConflict.new(symbols: [sym], shift: shift, reduce: reduce) next end @@ -501,16 +485,21 @@ module Lrama def compute_reduce_reduce_conflicts states.each do |state| - a = [] + count = state.reduces.count - state.reduces.each do |reduce| - next if reduce.look_ahead.nil? + for i in 0...count do + reduce1 = state.reduces[i] + next if reduce1.look_ahead.nil? - intersection = a & reduce.look_ahead - a += reduce.look_ahead + for j in (i+1)...count do + reduce2 = state.reduces[j] + next if reduce2.look_ahead.nil? - if !intersection.empty? - state.conflicts << State::Conflict.new(symbols: intersection.dup, reduce: reduce, type: :reduce_reduce) + intersection = reduce1.look_ahead & reduce2.look_ahead + + if !intersection.empty? + state.conflicts << State::ReduceReduceConflict.new(symbols: intersection, reduce1: reduce1, reduce2: reduce2) + end end end end diff --git a/tool/lrama/lib/lrama/states/item.rb b/tool/lrama/lib/lrama/states/item.rb index 5c3696cc7b..823ccc72e1 100644 --- a/tool/lrama/lib/lrama/states/item.rb +++ b/tool/lrama/lib/lrama/states/item.rb @@ -12,20 +12,56 @@ module Lrama rule.id end + def empty_rule? + rule.empty_rule? + end + + def number_of_rest_symbols + rule.rhs.count - position + end + + def lhs + rule.lhs + end + def next_sym rule.rhs[position] end + def next_next_sym + rule.rhs[position + 1] + end + + def previous_sym + rule.rhs[position - 1] + end + def end_of_rule? rule.rhs.count == position end + def beginning_of_rule? + position == 0 + end + + def start_item? + rule.id == 0 && position == 0 + end + def new_by_next_position Item.new(rule: rule, position: position + 1) end - def previous_sym - rule.rhs[position - 1] + def symbols_before_dot + rule.rhs[0...position] + end + + def symbols_after_dot + rule.rhs[position..-1] + end + + def to_s + "#{lhs.id.s_value}: #{display_name}" end def display_name diff --git a/tool/lrama/lib/lrama/states_reporter.rb b/tool/lrama/lib/lrama/states_reporter.rb index d3dbe6b1b4..0d805829eb 100644 --- a/tool/lrama/lib/lrama/states_reporter.rb +++ b/tool/lrama/lib/lrama/states_reporter.rb @@ -14,13 +14,13 @@ module Lrama private - def _report(io, grammar: false, states: false, itemsets: false, lookaheads: false, solved: false, verbose: false) + def _report(io, grammar: false, states: false, itemsets: false, lookaheads: false, solved: false, counterexamples: false, verbose: false) # TODO: Unused terms # TODO: Unused rules report_conflicts(io) report_grammar(io) if grammar - report_states(io, itemsets, lookaheads, solved, verbose) + report_states(io, itemsets, lookaheads, solved, counterexamples, verbose) end def report_conflicts(io) @@ -71,7 +71,11 @@ module Lrama io << "\n\n" end - def report_states(io, itemsets, lookaheads, solved, verbose) + def report_states(io, itemsets, lookaheads, solved, counterexamples, verbose) + if counterexamples + cex = Counterexamples.new(@states) + end + @states.states.each do |state| # Report State io << "State #{state.id}\n\n" @@ -194,6 +198,27 @@ module Lrama io << "\n" if !state.resolved_conflicts.empty? end + if counterexamples && state.has_conflicts? + # Report counterexamples + examples = cex.compute(state) + examples.each do |example| + label0 = example.type == :shift_reduce ? "shift/reduce" : "reduce/reduce" + label1 = example.type == :shift_reduce ? "Shift derivation" : "First Reduce derivation" + label2 = example.type == :shift_reduce ? "Reduce derivation" : "Second Reduce derivation" + + io << " #{label0} conflict on token #{example.conflict_symbol.id.s_value}:\n" + io << " #{example.path1_item.to_s}\n" + io << " #{example.path2_item.to_s}\n" + io << " #{label1}\n" + example.derivations1.render_strings_for_report.each do |str| + io << " #{str}\n" + end + io << " #{label2}\n" + example.derivations2.render_strings_for_report.each do |str| + io << " #{str}\n" + end + end + end if verbose # Report direct_read_sets diff --git a/tool/lrama/lib/lrama/type.rb b/tool/lrama/lib/lrama/type.rb new file mode 100644 index 0000000000..fe5ff74058 --- /dev/null +++ b/tool/lrama/lib/lrama/type.rb @@ -0,0 +1,4 @@ +module Lrama + class Type < Struct.new(:id, :tag, keyword_init: true) + end +end diff --git a/tool/lrama/lib/lrama/version.rb b/tool/lrama/lib/lrama/version.rb index 54eea75bfd..3f2eb3b9d4 100644 --- a/tool/lrama/lib/lrama/version.rb +++ b/tool/lrama/lib/lrama/version.rb @@ -1,3 +1,3 @@ module Lrama - VERSION = "0.5.3".freeze + VERSION = "0.5.4".freeze end diff --git a/tool/lrama/template/bison/yacc.c b/tool/lrama/template/bison/yacc.c index 61d23d506b..8f57984ef3 100644 --- a/tool/lrama/template/bison/yacc.c +++ b/tool/lrama/template/bison/yacc.c @@ -1220,26 +1220,30 @@ yydestruct (const char *yymsg, <%- if output.error_recovery -%> #ifndef YYMAXREPAIR -# define YYMAXREPAIR 3 +# define YYMAXREPAIR(<%= output.parse_param_name %>) (3) #endif -enum repair_type { +#ifndef YYERROR_RECOVERY_ENABLED +# define YYERROR_RECOVERY_ENABLED(<%= output.parse_param_name %>) (1) +#endif + +enum yy_repair_type { insert, delete, shift, }; -struct repair { - enum repair_type type; +struct yy_repair { + enum yy_repair_type type; yysymbol_kind_t term; }; -typedef struct repair repair; +typedef struct yy_repair yy_repair; -struct repairs { +struct yy_repairs { /* For debug */ int id; /* For breadth-first traversing */ - struct repairs *next; + struct yy_repairs *next; YYPTRDIFF_T stack_length; /* Bottom of states */ yy_state_t *states; @@ -1248,10 +1252,10 @@ struct repairs { /* repair length */ int repair_length; /* */ - struct repairs *prev_repair; - struct repair repair; + struct yy_repairs *prev_repair; + struct yy_repair repair; }; -typedef struct repairs repairs; +typedef struct yy_repairs yy_repairs; struct yy_term { yysymbol_kind_t kind; @@ -1260,12 +1264,12 @@ struct yy_term { }; typedef struct yy_term yy_term; -struct repair_terms { +struct yy_repair_terms { int id; int length; yy_term terms[]; }; -typedef struct repair_terms repair_terms; +typedef struct yy_repair_terms yy_repair_terms; static void yy_error_token_initialize (yysymbol_kind_t yykind, YYSTYPE * const yyvaluep, YYLTYPE * const yylocationp<%= output.user_formals %>) @@ -1280,11 +1284,11 @@ switch (yykind) YY_IGNORE_MAYBE_UNINITIALIZED_END } -static repair_terms * -yy_create_repair_terms(repairs *reps) +static yy_repair_terms * +yy_create_repair_terms(yy_repairs *reps<%= output.user_formals %>) { - repairs *r = reps; - repair_terms *rep_terms; + yy_repairs *r = reps; + yy_repair_terms *rep_terms; int count = 0; while (r->prev_repair) @@ -1293,7 +1297,7 @@ yy_create_repair_terms(repairs *reps) r = r->prev_repair; } - rep_terms = (repair_terms *) malloc (sizeof (repair_terms) + sizeof (yy_term) * count); + rep_terms = (yy_repair_terms *) YYMALLOC (sizeof (yy_repair_terms) + sizeof (yy_term) * count); rep_terms->id = reps->id; rep_terms->length = count; @@ -1309,46 +1313,46 @@ yy_create_repair_terms(repairs *reps) } static void -yy_print_repairs(repairs *reps) +yy_print_repairs(yy_repairs *reps<%= output.user_formals %>) { - repairs *r = reps; + yy_repairs *r = reps; - fprintf (stderr, + YYDPRINTF ((stderr, "id: %d, repair_length: %d, repair_state: %d, prev_repair_id: %d\n", - reps->id, reps->repair_length, *reps->state, reps->prev_repair->id); + reps->id, reps->repair_length, *reps->state, reps->prev_repair->id)); while (r->prev_repair) { - fprintf (stderr, "%s ", yysymbol_name (r->repair.term)); + YYDPRINTF ((stderr, "%s ", yysymbol_name (r->repair.term))); r = r->prev_repair; } - fprintf (stderr, "\n"); + YYDPRINTF ((stderr, "\n")); } static void -yy_print_repair_terms(repair_terms *rep_terms) +yy_print_repair_terms(yy_repair_terms *rep_terms<%= output.user_formals %>) { for (int i = 0; i < rep_terms->length; i++) - fprintf (stderr, "%s ", yysymbol_name (rep_terms->terms[i].kind)); + YYDPRINTF ((stderr, "%s ", yysymbol_name (rep_terms->terms[i].kind))); - fprintf (stderr, "\n"); + YYDPRINTF ((stderr, "\n")); } static void -yy_free_repairs(repairs *reps) +yy_free_repairs(yy_repairs *reps<%= output.user_formals %>) { while (reps) { - repairs *r = reps; + yy_repairs *r = reps; reps = reps->next; - free (r->states); - free (r); + YYFREE (r->states); + YYFREE (r); } } static int -yy_process_repairs(repairs *reps, yysymbol_kind_t token) +yy_process_repairs(yy_repairs *reps, yysymbol_kind_t token) { int yyn; int yystate = *reps->state; @@ -1417,22 +1421,22 @@ yyrecover_errlab: return 0; } -static repair_terms * -yyrecover(yy_state_t *yyss, yy_state_t *yyssp, int yychar) +static yy_repair_terms * +yyrecover(yy_state_t *yyss, yy_state_t *yyssp, int yychar<%= output.user_formals %>) { yysymbol_kind_t yytoken = YYTRANSLATE (yychar); - repair_terms *rep_terms = YY_NULLPTR; + yy_repair_terms *rep_terms = YY_NULLPTR; int count = 0; - repairs *head = (repairs *) malloc (sizeof (repairs)); - repairs *current = head; - repairs *tail = head; + yy_repairs *head = (yy_repairs *) YYMALLOC (sizeof (yy_repairs)); + yy_repairs *current = head; + yy_repairs *tail = head; YYPTRDIFF_T stack_length = yyssp - yyss + 1; head->id = count; head->next = 0; head->stack_length = stack_length; - head->states = (yy_state_t *) malloc (sizeof (yy_state_t) * (stack_length)); + head->states = (yy_state_t *) YYMALLOC (sizeof (yy_state_t) * (stack_length)); head->state = head->states + (yyssp - yyss); YYCOPY (head->states, yyss, stack_length); head->repair_length = 0; @@ -1456,14 +1460,14 @@ yyrecover(yy_state_t *yyss, yy_state_t *yyssp, int yychar) { if (yyx != YYSYMBOL_YYerror) { - if (current->repair_length + 1 > YYMAXREPAIR) + if (current->repair_length + 1 > YYMAXREPAIR(<%= output.parse_param_name %>)) continue; - repairs *new = (repairs *) malloc (sizeof (repairs)); + yy_repairs *new = (yy_repairs *) YYMALLOC (sizeof (yy_repairs)); new->id = count; new->next = 0; new->stack_length = stack_length; - new->states = (yy_state_t *) malloc (sizeof (yy_state_t) * (stack_length)); + new->states = (yy_state_t *) YYMALLOC (sizeof (yy_state_t) * (stack_length)); new->state = new->states + (current->state - current->states); YYCOPY (new->states, current->states, current->state - current->states + 1); new->repair_length = current->repair_length + 1; @@ -1474,7 +1478,7 @@ yyrecover(yy_state_t *yyss, yy_state_t *yyssp, int yychar) /* Process PDA assuming next token is yyx */ if (! yy_process_repairs (new, yyx)) { - free (new); + YYFREE (new); continue; } @@ -1484,18 +1488,18 @@ yyrecover(yy_state_t *yyss, yy_state_t *yyssp, int yychar) if (yyx == yytoken) { - rep_terms = yy_create_repair_terms (current); - fprintf (stderr, "repair_terms found. id: %d, length: %d\n", rep_terms->id, rep_terms->length); - yy_print_repairs (current); - yy_print_repair_terms (rep_terms); + rep_terms = yy_create_repair_terms (current<%= output.user_args %>); + YYDPRINTF ((stderr, "repair_terms found. id: %d, length: %d\n", rep_terms->id, rep_terms->length)); + yy_print_repairs (current<%= output.user_args %>); + yy_print_repair_terms (rep_terms<%= output.user_args %>); goto done; } - fprintf (stderr, + YYDPRINTF ((stderr, "New repairs is enqueued. count: %d, yystate: %d, yyx: %d\n", - count, yystate, yyx); - yy_print_repairs (new); + count, yystate, yyx)); + yy_print_repairs (new<%= output.user_args %>); } } } @@ -1505,11 +1509,11 @@ yyrecover(yy_state_t *yyss, yy_state_t *yyssp, int yychar) done: - yy_free_repairs(head); + yy_free_repairs(head<%= output.user_args %>); if (!rep_terms) { - fprintf (stderr, "repair_terms not found\n"); + YYDPRINTF ((stderr, "repair_terms not found\n")); } return rep_terms; @@ -1586,7 +1590,7 @@ YYLTYPE yylloc = yyloc_default; /* The locations where the error started and ended. */ YYLTYPE yyerror_range[3]; <%- if output.error_recovery -%> - repair_terms *rep_terms = 0; + yy_repair_terms *rep_terms = 0; yy_term term_backup; int rep_terms_index; int yychar_backup; @@ -1726,32 +1730,35 @@ yybackup: /* Not known => get a lookahead token if don't already have one. */ <%- if output.error_recovery -%> - if (yychar == YYEMPTY && rep_terms) + if (YYERROR_RECOVERY_ENABLED(<%= output.parse_param_name %>)) { - - if (rep_terms_index < rep_terms->length) - { - YYDPRINTF ((stderr, "An error recovery token is used\n")); - yy_term term = rep_terms->terms[rep_terms_index]; - yytoken = term.kind; - yylval = term.value; - yylloc = term.location; - yychar = yytranslate_inverted[yytoken]; - YY_SYMBOL_PRINT ("Next error recovery token is", yytoken, &yylval, &yylloc<%= output.user_args %>); - rep_terms_index++; - } - else + if (yychar == YYEMPTY && rep_terms) { - YYDPRINTF ((stderr, "Error recovery is completed\n")); - yytoken = term_backup.kind; - yylval = term_backup.value; - yylloc = term_backup.location; - yychar = yychar_backup; - YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc<%= output.user_args %>); - - free (rep_terms); - rep_terms = 0; - yychar_backup = 0; + + if (rep_terms_index < rep_terms->length) + { + YYDPRINTF ((stderr, "An error recovery token is used\n")); + yy_term term = rep_terms->terms[rep_terms_index]; + yytoken = term.kind; + yylval = term.value; + yylloc = term.location; + yychar = yytranslate_inverted[yytoken]; + YY_SYMBOL_PRINT ("Next error recovery token is", yytoken, &yylval, &yylloc<%= output.user_args %>); + rep_terms_index++; + } + else + { + YYDPRINTF ((stderr, "Error recovery is completed\n")); + yytoken = term_backup.kind; + yylval = term_backup.value; + yylloc = term_backup.location; + yychar = yychar_backup; + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc<%= output.user_args %>); + + YYFREE (rep_terms); + rep_terms = 0; + yychar_backup = 0; + } } } <%- end -%> @@ -1980,27 +1987,28 @@ yyerrorlab: `-------------------------------------------------------------*/ yyerrlab1: <%- if output.error_recovery -%> - { - rep_terms = yyrecover (yyss, yyssp, yychar); - if (rep_terms) - { - for (int i = 0; i < rep_terms->length; i++) - { - yy_term *term = &rep_terms->terms[i]; - yy_error_token_initialize (term->kind, &term->value, &term->location<%= output.user_args %>); - } + if (YYERROR_RECOVERY_ENABLED(<%= output.parse_param_name %>)) + { + rep_terms = yyrecover (yyss, yyssp, yychar<%= output.user_args %>); + if (rep_terms) + { + for (int i = 0; i < rep_terms->length; i++) + { + yy_term *term = &rep_terms->terms[i]; + yy_error_token_initialize (term->kind, &term->value, &term->location<%= output.user_args %>); + } - yychar_backup = yychar; - /* Can be packed into (the tail of) rep_terms? */ - term_backup.kind = yytoken; - term_backup.value = yylval; - term_backup.location = yylloc; - rep_terms_index = 0; - yychar = YYEMPTY; + yychar_backup = yychar; + /* Can be packed into (the tail of) rep_terms? */ + term_backup.kind = yytoken; + term_backup.value = yylval; + term_backup.location = yylloc; + rep_terms_index = 0; + yychar = YYEMPTY; - goto yybackup; - } - } + goto yybackup; + } + } <%- end -%> yyerrstatus = 3; /* Each real token shifted decrements this. */ |