diff options
author | Hiroshi SHIBATA <[email protected]> | 2019-05-13 21:25:22 +0900 |
---|---|---|
committer | Hiroshi SHIBATA <[email protected]> | 2019-06-19 18:17:25 +0900 |
commit | 1a2546c2be839baa7d0a50dc056d4d6987d26852 (patch) | |
tree | 19fef5d8b8d96452a51ab68e8093ea895192ca27 /lib | |
parent | cbe06cd3501fdadd0e6e63094da2973484d70b0b (diff) |
Backport racc-1.4.15 from upstream.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/racc.rb | 6 | ||||
-rw-r--r-- | lib/racc/compat.rb | 32 | ||||
-rw-r--r-- | lib/racc/debugflags.rb | 59 | ||||
-rw-r--r-- | lib/racc/exception.rb | 13 | ||||
-rw-r--r-- | lib/racc/grammar.rb | 1113 | ||||
-rw-r--r-- | lib/racc/grammarfileparser.rb | 559 | ||||
-rw-r--r-- | lib/racc/info.rb | 14 | ||||
-rw-r--r-- | lib/racc/iset.rb | 91 | ||||
-rw-r--r-- | lib/racc/logfilegenerator.rb | 211 | ||||
-rw-r--r-- | lib/racc/parser-text.rb | 640 | ||||
-rw-r--r-- | lib/racc/parser.rb | 85 | ||||
-rw-r--r-- | lib/racc/parserfilegenerator.rb | 510 | ||||
-rw-r--r-- | lib/racc/pre-setup | 13 | ||||
-rw-r--r-- | lib/racc/sourcetext.rb | 34 | ||||
-rw-r--r-- | lib/racc/state.rb | 969 | ||||
-rw-r--r-- | lib/racc/statetransitiontable.rb | 316 | ||||
-rw-r--r-- | lib/racc/static.rb | 5 |
17 files changed, 4635 insertions, 35 deletions
diff --git a/lib/racc.rb b/lib/racc.rb new file mode 100644 index 0000000000..f6e4ac03a8 --- /dev/null +++ b/lib/racc.rb @@ -0,0 +1,6 @@ +require 'racc/compat' +require 'racc/debugflags' +require 'racc/grammar' +require 'racc/state' +require 'racc/exception' +require 'racc/info' diff --git a/lib/racc/compat.rb b/lib/racc/compat.rb new file mode 100644 index 0000000000..ccb033e2e0 --- /dev/null +++ b/lib/racc/compat.rb @@ -0,0 +1,32 @@ +# +# $Id: 14fa1118eb3a23e85265e4f7afe2d5a297d69f9c $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of the GNU LGPL, see the file "COPYING". +# + +unless Object.method_defined?(:__send) + class Object + alias __send __send__ + end +end + +unless Object.method_defined?(:__send!) + class Object + alias __send! __send__ + end +end + +unless Array.method_defined?(:map!) + class Array + if Array.method_defined?(:collect!) + alias map! collect! + else + alias map! filter + end + end +end diff --git a/lib/racc/debugflags.rb b/lib/racc/debugflags.rb new file mode 100644 index 0000000000..1b5d2fe54c --- /dev/null +++ b/lib/racc/debugflags.rb @@ -0,0 +1,59 @@ +# +# $Id: 74ff4369ce53c7f45cfc2644ce907785104ebf6e $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of LGPL, see the file "COPYING". +# + +module Racc + + class DebugFlags + def DebugFlags.parse_option_string(s) + parse = rule = token = state = la = prec = conf = false + s.split(//).each do |ch| + case ch + when 'p' then parse = true + when 'r' then rule = true + when 't' then token = true + when 's' then state = true + when 'l' then la = true + when 'c' then prec = true + when 'o' then conf = true + else + raise "unknown debug flag char: #{ch.inspect}" + end + end + new(parse, rule, token, state, la, prec, conf) + end + + def initialize(parse = false, rule = false, token = false, state = false, + la = false, prec = false, conf = false) + @parse = parse + @rule = rule + @token = token + @state = state + @la = la + @prec = prec + @any = (parse || rule || token || state || la || prec) + @status_logging = conf + end + + attr_reader :parse + attr_reader :rule + attr_reader :token + attr_reader :state + attr_reader :la + attr_reader :prec + + def any? + @any + end + + attr_reader :status_logging + end + +end diff --git a/lib/racc/exception.rb b/lib/racc/exception.rb new file mode 100644 index 0000000000..bd46fcb323 --- /dev/null +++ b/lib/racc/exception.rb @@ -0,0 +1,13 @@ +# +# $Id: d5858363d1d4a4b5a2ca8d193b5153a49312188e $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# see the file "COPYING". + +module Racc + class Error < StandardError; end + class CompileError < Error; end +end diff --git a/lib/racc/grammar.rb b/lib/racc/grammar.rb new file mode 100644 index 0000000000..e55785a3a0 --- /dev/null +++ b/lib/racc/grammar.rb @@ -0,0 +1,1113 @@ +# +# $Id: acc33b7e1fe05f28f2d271f1fb9f1c42e50705dc $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# see the file "COPYING". + +require 'racc/compat' +require 'racc/iset' +require 'racc/sourcetext' +require 'racc/logfilegenerator' +require 'racc/exception' +require 'forwardable' + +module Racc + + class Grammar + + def initialize(debug_flags = DebugFlags.new) + @symboltable = SymbolTable.new + @debug_symbol = debug_flags.token + @rules = [] # :: [Rule] + @start = nil + @n_expected_srconflicts = nil + @prec_table = [] + @prec_table_closed = false + @closed = false + @states = nil + end + + attr_reader :start + attr_reader :symboltable + attr_accessor :n_expected_srconflicts + + def [](x) + @rules[x] + end + + def each_rule(&block) + @rules.each(&block) + end + + alias each each_rule + + def each_index(&block) + @rules.each_index(&block) + end + + def each_with_index(&block) + @rules.each_with_index(&block) + end + + def size + @rules.size + end + + def to_s + "<Racc::Grammar>" + end + + extend Forwardable + + def_delegator "@symboltable", :each, :each_symbol + def_delegator "@symboltable", :each_terminal + def_delegator "@symboltable", :each_nonterminal + + def intern(value, dummy = false) + @symboltable.intern(value, dummy) + end + + def symbols + @symboltable.symbols + end + + def nonterminal_base + @symboltable.nt_base + end + + def useless_nonterminal_exist? + n_useless_nonterminals() != 0 + end + + def n_useless_nonterminals + @n_useless_nonterminals ||= + begin + n = 0 + @symboltable.each_nonterminal do |sym| + n += 1 if sym.useless? + end + n + end + end + + def useless_rule_exist? + n_useless_rules() != 0 + end + + def n_useless_rules + @n_useless_rules ||= + begin + n = 0 + each do |r| + n += 1 if r.useless? + end + n + end + end + + def nfa + (@states ||= States.new(self)).nfa + end + + def dfa + (@states ||= States.new(self)).dfa + end + + alias states dfa + + def state_transition_table + states().state_transition_table + end + + def parser_class + states = states() # cache + if $DEBUG + srcfilename = caller(1).first.slice(/\A(.*?):/, 1) + begin + write_log srcfilename + ".output" + rescue SystemCallError + end + report = lambda {|s| $stderr.puts "racc: #{srcfilename}: #{s}" } + if states.should_report_srconflict? + report["#{states.n_srconflicts} shift/reduce conflicts"] + end + if states.rrconflict_exist? + report["#{states.n_rrconflicts} reduce/reduce conflicts"] + end + g = states.grammar + if g.useless_nonterminal_exist? + report["#{g.n_useless_nonterminals} useless nonterminals"] + end + if g.useless_rule_exist? + report["#{g.n_useless_rules} useless rules"] + end + end + states.state_transition_table.parser_class + end + + def write_log(path) + File.open(path, 'w') {|f| + LogFileGenerator.new(states()).output f + } + end + + # + # Grammar Definition Interface + # + + def add(rule) + raise ArgumentError, "rule added after the Grammar closed" if @closed + @rules.push rule + end + + def added?(sym) + @rules.detect {|r| r.target == sym } + end + + def start_symbol=(s) + raise CompileError, "start symbol set twice'" if @start + @start = s + end + + def declare_precedence(assoc, syms) + raise CompileError, "precedence table defined twice" if @prec_table_closed + @prec_table.push [assoc, syms] + end + + def end_precedence_declaration(reverse) + @prec_table_closed = true + return if @prec_table.empty? + table = reverse ? @prec_table.reverse : @prec_table + table.each_with_index do |(assoc, syms), idx| + syms.each do |sym| + sym.assoc = assoc + sym.precedence = idx + end + end + end + + # + # Dynamic Generation Interface + # + + def Grammar.define(&block) + env = DefinitionEnv.new + env.instance_eval(&block) + env.grammar + end + + class DefinitionEnv + def initialize + @grammar = Grammar.new + @seqs = Hash.new(0) + @delayed = [] + end + + def grammar + flush_delayed + @grammar.each do |rule| + if rule.specified_prec + rule.specified_prec = @grammar.intern(rule.specified_prec) + end + end + @grammar.init + @grammar + end + + def precedence_table(&block) + env = PrecedenceDefinitionEnv.new(@grammar) + env.instance_eval(&block) + @grammar.end_precedence_declaration env.reverse + end + + def method_missing(mid, *args, &block) + unless mid.to_s[-1,1] == '=' + super # raises NoMethodError + end + target = @grammar.intern(mid.to_s.chop.intern) + unless args.size == 1 + raise ArgumentError, "too many arguments for #{mid} (#{args.size} for 1)" + end + _add target, args.first + end + + def _add(target, x) + case x + when Sym + @delayed.each do |rule| + rule.replace x, target if rule.target == x + end + @grammar.symboltable.delete x + else + x.each_rule do |r| + r.target = target + @grammar.add r + end + end + flush_delayed + end + + def _delayed_add(rule) + @delayed.push rule + end + + def _added?(sym) + @grammar.added?(sym) or @delayed.detect {|r| r.target == sym } + end + + def flush_delayed + return if @delayed.empty? + @delayed.each do |rule| + @grammar.add rule + end + @delayed.clear + end + + def seq(*list, &block) + Rule.new(nil, list.map {|x| _intern(x) }, UserAction.proc(block)) + end + + def null(&block) + seq(&block) + end + + def action(&block) + id = "@#{@seqs["action"] += 1}".intern + _delayed_add Rule.new(@grammar.intern(id), [], UserAction.proc(block)) + id + end + + alias _ action + + def option(sym, default = nil, &block) + _defmetasyntax("option", _intern(sym), block) {|target| + seq() { default } | seq(sym) + } + end + + def many(sym, &block) + _defmetasyntax("many", _intern(sym), block) {|target| + seq() { [] }\ + | seq(target, sym) {|list, x| list.push x; list } + } + end + + def many1(sym, &block) + _defmetasyntax("many1", _intern(sym), block) {|target| + seq(sym) {|x| [x] }\ + | seq(target, sym) {|list, x| list.push x; list } + } + end + + def separated_by(sep, sym, &block) + option(separated_by1(sep, sym), [], &block) + end + + def separated_by1(sep, sym, &block) + _defmetasyntax("separated_by1", _intern(sym), block) {|target| + seq(sym) {|x| [x] }\ + | seq(target, sep, sym) {|list, _, x| list.push x; list } + } + end + + def _intern(x) + case x + when Symbol, String + @grammar.intern(x) + when Racc::Sym + x + else + raise TypeError, "wrong type #{x.class} (expected Symbol/String/Racc::Sym)" + end + end + + private + + def _defmetasyntax(type, id, action, &block) + if action + idbase = "#{type}@#{id}-#{@seqs[type] += 1}" + target = _wrap(idbase, "#{idbase}-core", action) + _regist("#{idbase}-core", &block) + else + target = _regist("#{type}@#{id}", &block) + end + @grammar.intern(target) + end + + def _regist(target_name) + target = target_name.intern + unless _added?(@grammar.intern(target)) + yield(target).each_rule do |rule| + rule.target = @grammar.intern(target) + _delayed_add rule + end + end + target + end + + def _wrap(target_name, sym, block) + target = target_name.intern + _delayed_add Rule.new(@grammar.intern(target), + [@grammar.intern(sym.intern)], + UserAction.proc(block)) + target + end + end + + class PrecedenceDefinitionEnv + def initialize(g) + @grammar = g + @prechigh_seen = false + @preclow_seen = false + @reverse = false + end + + attr_reader :reverse + + def higher + if @prechigh_seen + raise CompileError, "prechigh used twice" + end + @prechigh_seen = true + end + + def lower + if @preclow_seen + raise CompileError, "preclow used twice" + end + if @prechigh_seen + @reverse = true + end + @preclow_seen = true + end + + def left(*syms) + @grammar.declare_precedence :Left, syms.map {|s| @grammar.intern(s) } + end + + def right(*syms) + @grammar.declare_precedence :Right, syms.map {|s| @grammar.intern(s) } + end + + def nonassoc(*syms) + @grammar.declare_precedence :Nonassoc, syms.map {|s| @grammar.intern(s)} + end + end + + # + # Computation + # + + def init + return if @closed + @closed = true + @start ||= @rules.map {|r| r.target }.detect {|sym| not sym.dummy? } + raise CompileError, 'no rule in input' if @rules.empty? + add_start_rule + @rules.freeze + fix_ident + compute_hash + compute_heads + determine_terminals + compute_nullable_0 + @symboltable.fix + compute_locate + @symboltable.each_nonterminal {|t| compute_expand t } + compute_nullable + compute_useless + end + + private + + def add_start_rule + r = Rule.new(@symboltable.dummy, + [@start, @symboltable.anchor, @symboltable.anchor], + UserAction.empty) + r.ident = 0 + r.hash = 0 + r.precedence = nil + @rules.unshift r + end + + # Rule#ident + # LocationPointer#ident + def fix_ident + @rules.each_with_index do |rule, idx| + rule.ident = idx + end + end + + # Rule#hash + def compute_hash + hash = 4 # size of dummy rule + @rules.each do |rule| + rule.hash = hash + hash += (rule.size + 1) + end + end + + # Sym#heads + def compute_heads + @rules.each do |rule| + rule.target.heads.push rule.ptrs[0] + end + end + + # Sym#terminal? + def determine_terminals + @symboltable.each do |s| + s.term = s.heads.empty? + end + end + + # Sym#self_null? + def compute_nullable_0 + @symboltable.each do |s| + if s.terminal? + s.snull = false + else + s.snull = s.heads.any? {|loc| loc.reduce? } + end + end + end + + # Sym#locate + def compute_locate + @rules.each do |rule| + t = nil + rule.ptrs.each do |ptr| + unless ptr.reduce? + tok = ptr.dereference + tok.locate.push ptr + t = tok if tok.terminal? + end + end + rule.precedence = t + end + end + + # Sym#expand + def compute_expand(t) + puts "expand> #{t.to_s}" if @debug_symbol + t.expand = _compute_expand(t, ISet.new, []) + puts "expand< #{t.to_s}: #{t.expand.to_s}" if @debug_symbol + end + + def _compute_expand(t, set, lock) + if tmp = t.expand + set.update tmp + return set + end + tok = nil + set.update_a t.heads + t.heads.each do |ptr| + tok = ptr.dereference + if tok and tok.nonterminal? + unless lock[tok.ident] + lock[tok.ident] = true + _compute_expand tok, set, lock + end + end + end + set + end + + # Sym#nullable?, Rule#nullable? + def compute_nullable + @rules.each {|r| r.null = false } + @symboltable.each {|t| t.null = false } + r = @rules.dup + s = @symboltable.nonterminals + begin + rs = r.size + ss = s.size + check_rules_nullable r + check_symbols_nullable s + end until rs == r.size and ss == s.size + end + + def check_rules_nullable(rules) + rules.delete_if do |rule| + rule.null = true + rule.symbols.each do |t| + unless t.nullable? + rule.null = false + break + end + end + rule.nullable? + end + end + + def check_symbols_nullable(symbols) + symbols.delete_if do |sym| + sym.heads.each do |ptr| + if ptr.rule.nullable? + sym.null = true + break + end + end + sym.nullable? + end + end + + # Sym#useless?, Rule#useless? + # FIXME: what means "useless"? + def compute_useless + @symboltable.each_terminal {|sym| sym.useless = false } + @symboltable.each_nonterminal {|sym| sym.useless = true } + @rules.each {|rule| rule.useless = true } + r = @rules.dup + s = @symboltable.nonterminals + begin + rs = r.size + ss = s.size + check_rules_useless r + check_symbols_useless s + end until r.size == rs and s.size == ss + end + + def check_rules_useless(rules) + rules.delete_if do |rule| + rule.useless = false + rule.symbols.each do |sym| + if sym.useless? + rule.useless = true + break + end + end + not rule.useless? + end + end + + def check_symbols_useless(s) + s.delete_if do |t| + t.heads.each do |ptr| + unless ptr.rule.useless? + t.useless = false + break + end + end + not t.useless? + end + end + + end # class Grammar + + + class Rule + + def initialize(target, syms, act) + @target = target + @symbols = syms + @action = act + @alternatives = [] + + @ident = nil + @hash = nil + @ptrs = nil + @precedence = nil + @specified_prec = nil + @null = nil + @useless = nil + end + + attr_accessor :target + attr_reader :symbols + attr_reader :action + + def |(x) + @alternatives.push x.rule + self + end + + def rule + self + end + + def each_rule(&block) + yield self + @alternatives.each(&block) + end + + attr_accessor :ident + + attr_reader :hash + attr_reader :ptrs + + def hash=(n) + @hash = n + ptrs = [] + @symbols.each_with_index do |sym, idx| + ptrs.push LocationPointer.new(self, idx, sym) + end + ptrs.push LocationPointer.new(self, @symbols.size, nil) + @ptrs = ptrs + end + + def precedence + @specified_prec || @precedence + end + + def precedence=(sym) + @precedence ||= sym + end + + def prec(sym, &block) + @specified_prec = sym + if block + unless @action.empty? + raise CompileError, 'both of rule action block and prec block given' + end + @action = UserAction.proc(block) + end + self + end + + attr_accessor :specified_prec + + def nullable?() @null end + def null=(n) @null = n end + + def useless?() @useless end + def useless=(u) @useless = u end + + def inspect + "#<Racc::Rule id=#{@ident} (#{@target})>" + end + + def ==(other) + other.kind_of?(Rule) and @ident == other.ident + end + + def [](idx) + @symbols[idx] + end + + def size + @symbols.size + end + + def empty? + @symbols.empty? + end + + def to_s + "#<rule#{@ident}>" + end + + def accept? + if tok = @symbols[-1] + tok.anchor? + else + false + end + end + + def each(&block) + @symbols.each(&block) + end + + def replace(src, dest) + @target = dest + @symbols = @symbols.map {|s| s == src ? dest : s } + end + + end # class Rule + + + class UserAction + + def UserAction.source_text(src) + new(src, nil) + end + + def UserAction.proc(pr = nil, &block) + if pr and block + raise ArgumentError, "both of argument and block given" + end + new(nil, pr || block) + end + + def UserAction.empty + new(nil, nil) + end + + private_class_method :new + + def initialize(src, proc) + @source = src + @proc = proc + end + + attr_reader :source + attr_reader :proc + + def source? + not @proc + end + + def proc? + not @source + end + + def empty? + not @proc and not @source + end + + def name + "{action type=#{@source || @proc || 'nil'}}" + end + + alias inspect name + + end + + + class OrMark + def initialize(lineno) + @lineno = lineno + end + + def name + '|' + end + + alias inspect name + + attr_reader :lineno + end + + + class Prec + def initialize(symbol, lineno) + @symbol = symbol + @lineno = lineno + end + + def name + "=#{@symbol}" + end + + alias inspect name + + attr_reader :symbol + attr_reader :lineno + end + + + # + # A set of rule and position in it's RHS. + # Note that the number of pointers is more than rule's RHS array, + # because pointer points right edge of the final symbol when reducing. + # + class LocationPointer + + def initialize(rule, i, sym) + @rule = rule + @index = i + @symbol = sym + @ident = @rule.hash + i + @reduce = sym.nil? + end + + attr_reader :rule + attr_reader :index + attr_reader :symbol + + alias dereference symbol + + attr_reader :ident + alias hash ident + attr_reader :reduce + alias reduce? reduce + + def to_s + sprintf('(%d,%d %s)', + @rule.ident, @index, (reduce?() ? '#' : @symbol.to_s)) + end + + alias inspect to_s + + def eql?(ot) + @hash == ot.hash + end + + alias == eql? + + def head? + @index == 0 + end + + def next + @rule.ptrs[@index + 1] or ptr_bug! + end + + alias increment next + + def before(len) + @rule.ptrs[@index - len] or ptr_bug! + end + + private + + def ptr_bug! + raise "racc: fatal: pointer not exist: self: #{to_s}" + end + + end # class LocationPointer + + + class SymbolTable + + include Enumerable + + def initialize + @symbols = [] # :: [Racc::Sym] + @cache = {} # :: {(String|Symbol) => Racc::Sym} + @dummy = intern(:$start, true) + @anchor = intern(false, true) # Symbol ID = 0 + @error = intern(:error, false) # Symbol ID = 1 + end + + attr_reader :dummy + attr_reader :anchor + attr_reader :error + + def [](id) + @symbols[id] + end + + def intern(val, dummy = false) + @cache[val] ||= + begin + sym = Sym.new(val, dummy) + @symbols.push sym + sym + end + end + + attr_reader :symbols + alias to_a symbols + + def delete(sym) + @symbols.delete sym + @cache.delete sym.value + end + + attr_reader :nt_base + + def nt_max + @symbols.size + end + + def each(&block) + @symbols.each(&block) + end + + def terminals(&block) + @symbols[0, @nt_base] + end + + def each_terminal(&block) + @terms.each(&block) + end + + def nonterminals + @symbols[@nt_base, @symbols.size - @nt_base] + end + + def each_nonterminal(&block) + @nterms.each(&block) + end + + def fix + terms, nterms = @symbols.partition {|s| s.terminal? } + @symbols = terms + nterms + @terms = terms + @nterms = nterms + @nt_base = terms.size + fix_ident + check_terminals + end + + private + + def fix_ident + @symbols.each_with_index do |t, i| + t.ident = i + end + end + + def check_terminals + return unless @symbols.any? {|s| s.should_terminal? } + @anchor.should_terminal + @error.should_terminal + each_terminal do |t| + t.should_terminal if t.string_symbol? + end + each do |s| + s.should_terminal if s.assoc + end + terminals().reject {|t| t.should_terminal? }.each do |t| + raise CompileError, "terminal #{t} not declared as terminal" + end + nonterminals().select {|n| n.should_terminal? }.each do |n| + raise CompileError, "symbol #{n} declared as terminal but is not terminal" + end + end + + end # class SymbolTable + + + # Stands terminal and nonterminal symbols. + class Sym + + def initialize(value, dummyp) + @ident = nil + @value = value + @dummyp = dummyp + + @term = nil + @nterm = nil + @should_terminal = false + @precedence = nil + case value + when Symbol + @to_s = value.to_s + @serialized = value.inspect + @string = false + when String + @to_s = value.inspect + @serialized = value.dump + @string = true + when false + @to_s = '$end' + @serialized = 'false' + @string = false + when ErrorSymbolValue + @to_s = 'error' + @serialized = 'Object.new' + @string = false + else + raise ArgumentError, "unknown symbol value: #{value.class}" + end + + @heads = [] + @locate = [] + @snull = nil + @null = nil + @expand = nil + @useless = nil + end + + class << self + def once_writer(nm) + nm = nm.id2name + module_eval(<<-EOS) + def #{nm}=(v) + raise 'racc: fatal: @#{nm} != nil' unless @#{nm}.nil? + @#{nm} = v + end + EOS + end + end + + once_writer :ident + attr_reader :ident + + alias hash ident + + attr_reader :value + + def dummy? + @dummyp + end + + def terminal? + @term + end + + def nonterminal? + @nterm + end + + def term=(t) + raise 'racc: fatal: term= called twice' unless @term.nil? + @term = t + @nterm = !t + end + + def should_terminal + @should_terminal = true + end + + def should_terminal? + @should_terminal + end + + def string_symbol? + @string + end + + def serialize + @serialized + end + + attr_writer :serialized + + attr_accessor :precedence + attr_accessor :assoc + + def to_s + @to_s.dup + end + + alias inspect to_s + + def |(x) + rule() | x.rule + end + + def rule + Rule.new(nil, [self], UserAction.empty) + end + + # + # cache + # + + attr_reader :heads + attr_reader :locate + + def self_null? + @snull + end + + once_writer :snull + + def nullable? + @null + end + + def null=(n) + @null = n + end + + attr_reader :expand + once_writer :expand + + def useless? + @useless + end + + def useless=(f) + @useless = f + end + + end # class Sym + +end # module Racc diff --git a/lib/racc/grammarfileparser.rb b/lib/racc/grammarfileparser.rb new file mode 100644 index 0000000000..7548a9ea37 --- /dev/null +++ b/lib/racc/grammarfileparser.rb @@ -0,0 +1,559 @@ +# +# $Id: 5e1871defa15d288d2252e6a76bb2c4cf2119ed3 $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of the GNU LGPL, see the file "COPYING". +# + +require 'racc' +require 'racc/compat' +require 'racc/grammar' +require 'racc/parserfilegenerator' +require 'racc/sourcetext' +require 'stringio' + +module Racc + + grammar = Grammar.define { + g = self + + g.class = seq(:CLASS, :cname, many(:param), :RULE, :rules, option(:END)) + + g.cname = seq(:rubyconst) {|name| + @result.params.classname = name + }\ + | seq(:rubyconst, "<", :rubyconst) {|c, _, s| + @result.params.classname = c + @result.params.superclass = s + } + + g.rubyconst = separated_by1(:colon2, :SYMBOL) {|syms| + syms.map {|s| s.to_s }.join('::') + } + + g.colon2 = seq(':', ':') + + g.param = seq(:CONV, many1(:convdef), :END) {|*| + #@grammar.end_convert_block # FIXME + }\ + | seq(:PRECHIGH, many1(:precdef), :PRECLOW) {|*| + @grammar.end_precedence_declaration true + }\ + | seq(:PRECLOW, many1(:precdef), :PRECHIGH) {|*| + @grammar.end_precedence_declaration false + }\ + | seq(:START, :symbol) {|_, sym| + @grammar.start_symbol = sym + }\ + | seq(:TOKEN, :symbols) {|_, syms| + syms.each do |s| + s.should_terminal + end + }\ + | seq(:OPTION, :options) {|_, syms| + syms.each do |opt| + case opt + when 'result_var' + @result.params.result_var = true + when 'no_result_var' + @result.params.result_var = false + when 'omit_action_call' + @result.params.omit_action_call = true + when 'no_omit_action_call' + @result.params.omit_action_call = false + else + raise CompileError, "unknown option: #{opt}" + end + end + }\ + | seq(:EXPECT, :DIGIT) {|_, num| + if @grammar.n_expected_srconflicts + raise CompileError, "`expect' seen twice" + end + @grammar.n_expected_srconflicts = num + } + + g.convdef = seq(:symbol, :STRING) {|sym, code| + sym.serialized = code + } + + g.precdef = seq(:LEFT, :symbols) {|_, syms| + @grammar.declare_precedence :Left, syms + }\ + | seq(:RIGHT, :symbols) {|_, syms| + @grammar.declare_precedence :Right, syms + }\ + | seq(:NONASSOC, :symbols) {|_, syms| + @grammar.declare_precedence :Nonassoc, syms + } + + g.symbols = seq(:symbol) {|sym| + [sym] + }\ + | seq(:symbols, :symbol) {|list, sym| + list.push sym + list + }\ + | seq(:symbols, "|") + + g.symbol = seq(:SYMBOL) {|sym| @grammar.intern(sym) }\ + | seq(:STRING) {|str| @grammar.intern(str) } + + g.options = many(:SYMBOL) {|syms| syms.map {|s| s.to_s } } + + g.rules = option(:rules_core) {|list| + add_rule_block list unless list.empty? + nil + } + + g.rules_core = seq(:symbol) {|sym| + [sym] + }\ + | seq(:rules_core, :rule_item) {|list, i| + list.push i + list + }\ + | seq(:rules_core, ';') {|list, *| + add_rule_block list unless list.empty? + list.clear + list + }\ + | seq(:rules_core, ':') {|list, *| + next_target = list.pop + add_rule_block list unless list.empty? + [next_target] + } + + g.rule_item = seq(:symbol)\ + | seq("|") {|*| + OrMark.new(@scanner.lineno) + }\ + | seq("=", :symbol) {|_, sym| + Prec.new(sym, @scanner.lineno) + }\ + | seq(:ACTION) {|src| + UserAction.source_text(src) + } + } + + GrammarFileParser = grammar.parser_class + + if grammar.states.srconflict_exist? + raise 'Racc boot script fatal: S/R conflict in build' + end + if grammar.states.rrconflict_exist? + raise 'Racc boot script fatal: R/R conflict in build' + end + + class GrammarFileParser # reopen + + class Result + def initialize(grammar) + @grammar = grammar + @params = ParserFileGenerator::Params.new + end + + attr_reader :grammar + attr_reader :params + end + + def GrammarFileParser.parse_file(filename) + parse(File.read(filename), filename, 1) + end + + def GrammarFileParser.parse(src, filename = '-', lineno = 1) + new().parse(src, filename, lineno) + end + + def initialize(debug_flags = DebugFlags.new) + @yydebug = debug_flags.parse + end + + def parse(src, filename = '-', lineno = 1) + @filename = filename + @lineno = lineno + @scanner = GrammarFileScanner.new(src, @filename) + @scanner.debug = @yydebug + @grammar = Grammar.new + @result = Result.new(@grammar) + @embedded_action_seq = 0 + yyparse @scanner, :yylex + parse_user_code + @result.grammar.init + @result + end + + private + + def next_token + @scanner.scan + end + + def on_error(tok, val, _values) + if val.respond_to?(:id2name) + v = val.id2name + elsif val.kind_of?(String) + v = val + else + v = val.inspect + end + raise CompileError, "#{location()}: unexpected token '#{v}'" + end + + def location + "#{@filename}:#{@lineno - 1 + @scanner.lineno}" + end + + def add_rule_block(list) + sprec = nil + target = list.shift + case target + when OrMark, UserAction, Prec + raise CompileError, "#{target.lineno}: unexpected symbol #{target.name}" + end + curr = [] + list.each do |i| + case i + when OrMark + add_rule target, curr, sprec + curr = [] + sprec = nil + when Prec + raise CompileError, "'=<prec>' used twice in one rule" if sprec + sprec = i.symbol + else + curr.push i + end + end + add_rule target, curr, sprec + end + + def add_rule(target, list, sprec) + if list.last.kind_of?(UserAction) + act = list.pop + else + act = UserAction.empty + end + list.map! {|s| s.kind_of?(UserAction) ? embedded_action(s) : s } + rule = Rule.new(target, list, act) + rule.specified_prec = sprec + @grammar.add rule + end + + def embedded_action(act) + sym = @grammar.intern("@#{@embedded_action_seq += 1}".intern, true) + @grammar.add Rule.new(sym, [], act) + sym + end + + # + # User Code Block + # + + def parse_user_code + line = @scanner.lineno + _, *blocks = *@scanner.epilogue.split(/^----/) + blocks.each do |block| + header, *body = block.lines.to_a + label0, pathes = *header.sub(/\A-+/, '').split('=', 2) + label = canonical_label(label0) + (pathes ? pathes.strip.split(' ') : []).each do |path| + add_user_code label, SourceText.new(File.read(path), path, 1) + end + add_user_code label, SourceText.new(body.join(''), @filename, line + 1) + line += (1 + body.size) + end + end + + USER_CODE_LABELS = { + 'header' => :header, + 'prepare' => :header, # obsolete + 'inner' => :inner, + 'footer' => :footer, + 'driver' => :footer # obsolete + } + + def canonical_label(src) + label = src.to_s.strip.downcase.slice(/\w+/) + unless USER_CODE_LABELS.key?(label) + raise CompileError, "unknown user code type: #{label.inspect}" + end + label + end + + def add_user_code(label, src) + @result.params.send(USER_CODE_LABELS[label]).push src + end + + end + + + class GrammarFileScanner + + def initialize(str, filename = '-') + @lines = str.split(/\n|\r\n|\r/) + @filename = filename + @lineno = -1 + @line_head = true + @in_rule_blk = false + @in_conv_blk = false + @in_block = nil + @epilogue = '' + @debug = false + next_line + end + + attr_reader :epilogue + + def lineno + @lineno + 1 + end + + attr_accessor :debug + + def yylex(&block) + unless @debug + yylex0(&block) + else + yylex0 do |sym, tok| + $stderr.printf "%7d %-10s %s\n", lineno(), sym.inspect, tok.inspect + yield [sym, tok] + end + end + end + + private + + def yylex0 + begin + until @line.empty? + @line.sub!(/\A\s+/, '') + if /\A\#/ =~ @line + break + elsif /\A\/\*/ =~ @line + skip_comment + elsif s = reads(/\A[a-zA-Z_]\w*/) + yield [atom_symbol(s), s.intern] + elsif s = reads(/\A\d+/) + yield [:DIGIT, s.to_i] + elsif ch = reads(/\A./) + case ch + when '"', "'" + yield [:STRING, eval(scan_quoted(ch))] + when '{' + lineno = lineno() + yield [:ACTION, SourceText.new(scan_action(), @filename, lineno)] + else + if ch == '|' + @line_head = false + end + yield [ch, ch] + end + else + end + end + end while next_line() + yield nil + end + + def next_line + @lineno += 1 + @line = @lines[@lineno] + if not @line or /\A----/ =~ @line + @epilogue = @lines.join("\n") + @lines.clear + @line = nil + if @in_block + @lineno -= 1 + scan_error! sprintf('unterminated %s', @in_block) + end + false + else + @line.sub!(/(?:\n|\r\n|\r)\z/, '') + @line_head = true + true + end + end + + ReservedWord = { + 'right' => :RIGHT, + 'left' => :LEFT, + 'nonassoc' => :NONASSOC, + 'preclow' => :PRECLOW, + 'prechigh' => :PRECHIGH, + 'token' => :TOKEN, + 'convert' => :CONV, + 'options' => :OPTION, + 'start' => :START, + 'expect' => :EXPECT, + 'class' => :CLASS, + 'rule' => :RULE, + 'end' => :END + } + + def atom_symbol(token) + if token == 'end' + symbol = :END + @in_conv_blk = false + @in_rule_blk = false + else + if @line_head and not @in_conv_blk and not @in_rule_blk + symbol = ReservedWord[token] || :SYMBOL + else + symbol = :SYMBOL + end + case symbol + when :RULE then @in_rule_blk = true + when :CONV then @in_conv_blk = true + end + end + @line_head = false + symbol + end + + def skip_comment + @in_block = 'comment' + until m = /\*\//.match(@line) + next_line + end + @line = m.post_match + @in_block = nil + end + + $raccs_print_type = false + + def scan_action + buf = '' + nest = 1 + pre = nil + @in_block = 'action' + begin + pre = nil + if s = reads(/\A\s+/) + # does not set 'pre' + buf << s + end + until @line.empty? + if s = reads(/\A[^'"`{}%#\/\$]+/) + buf << (pre = s) + next + end + case ch = read(1) + when '{' + nest += 1 + buf << (pre = ch) + when '}' + nest -= 1 + if nest == 0 + @in_block = nil + return buf + end + buf << (pre = ch) + when '#' # comment + buf << ch << @line + break + when "'", '"', '`' + buf << (pre = scan_quoted(ch)) + when '%' + if literal_head? pre, @line + # % string, regexp, array + buf << ch + case ch = read(1) + when /[qQx]/n + buf << ch << (pre = scan_quoted(read(1), '%string')) + when /wW/n + buf << ch << (pre = scan_quoted(read(1), '%array')) + when /s/n + buf << ch << (pre = scan_quoted(read(1), '%symbol')) + when /r/n + buf << ch << (pre = scan_quoted(read(1), '%regexp')) + when /[a-zA-Z0-9= ]/n # does not include "_" + scan_error! "unknown type of % literal '%#{ch}'" + else + buf << (pre = scan_quoted(ch, '%string')) + end + else + # operator + buf << '||op->' if $raccs_print_type + buf << (pre = ch) + end + when '/' + if literal_head? pre, @line + # regexp + buf << (pre = scan_quoted(ch, 'regexp')) + else + # operator + buf << '||op->' if $raccs_print_type + buf << (pre = ch) + end + when '$' # gvar + buf << ch << (pre = read(1)) + else + raise 'racc: fatal: must not happen' + end + end + buf << "\n" + end while next_line() + raise 'racc: fatal: scan finished before parser finished' + end + + def literal_head?(pre, post) + (!pre || /[a-zA-Z_0-9]/n !~ pre[-1,1]) && + !post.empty? && /\A[\s\=]/n !~ post + end + + def read(len) + s = @line[0, len] + @line = @line[len .. -1] + s + end + + def reads(re) + m = re.match(@line) or return nil + @line = m.post_match + m[0] + end + + def scan_quoted(left, tag = 'string') + buf = left.dup + buf = "||#{tag}->" + buf if $raccs_print_type + re = get_quoted_re(left) + sv, @in_block = @in_block, tag + begin + if s = reads(re) + buf << s + break + else + buf << @line + end + end while next_line() + @in_block = sv + buf << "<-#{tag}||" if $raccs_print_type + buf + end + + LEFT_TO_RIGHT = { + '(' => ')', + '{' => '}', + '[' => ']', + '<' => '>' + } + + CACHE = {} + + def get_quoted_re(left) + term = Regexp.quote(LEFT_TO_RIGHT[left] || left) + CACHE[left] ||= /\A[^#{term}\\]*(?:\\.[^\\#{term}]*)*#{term}/ + end + + def scan_error!(msg) + raise CompileError, "#{lineno()}: #{msg}" + end + + end + +end # module Racc diff --git a/lib/racc/info.rb b/lib/racc/info.rb new file mode 100644 index 0000000000..0e61c3a393 --- /dev/null +++ b/lib/racc/info.rb @@ -0,0 +1,14 @@ +# +# $Id: 10d9595b388ab1ba061c08c038901ff632a0c3c3 $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# see the file "COPYING". + +module Racc + VERSION = '1.4.15' + Version = VERSION + Copyright = 'Copyright (c) 1999-2006 Minero Aoki' +end diff --git a/lib/racc/iset.rb b/lib/racc/iset.rb new file mode 100644 index 0000000000..a79e709f9c --- /dev/null +++ b/lib/racc/iset.rb @@ -0,0 +1,91 @@ +# +# $Id: de638608cfd72d3ed9819d87b65a89ee6a57b589 $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of the GNU LGPL, see the file "COPYING". +# + +module Racc + + # An "indexed" set. All items must respond to :ident. + class ISet + + def initialize(a = []) + @set = a + end + + attr_reader :set + + def add(i) + @set[i.ident] = i + end + + def [](key) + @set[key.ident] + end + + def []=(key, val) + @set[key.ident] = val + end + + alias include? [] + alias key? [] + + def update(other) + s = @set + o = other.set + o.each_index do |idx| + if t = o[idx] + s[idx] = t + end + end + end + + def update_a(a) + s = @set + a.each {|i| s[i.ident] = i } + end + + def delete(key) + i = @set[key.ident] + @set[key.ident] = nil + i + end + + def each(&block) + @set.compact.each(&block) + end + + def to_a + @set.compact + end + + def to_s + "[#{@set.compact.join(' ')}]" + end + + alias inspect to_s + + def size + @set.nitems + end + + def empty? + @set.nitems == 0 + end + + def clear + @set.clear + end + + def dup + ISet.new(@set.dup) + end + + end # class ISet + +end # module Racc diff --git a/lib/racc/logfilegenerator.rb b/lib/racc/logfilegenerator.rb new file mode 100644 index 0000000000..b95b1afaa2 --- /dev/null +++ b/lib/racc/logfilegenerator.rb @@ -0,0 +1,211 @@ +# +# $Id: a7e9663605afdda065d305b250a9805e3bd3fa70 $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of the GNU LGPL, see the file "COPYING". +# + +module Racc + + class LogFileGenerator + + def initialize(states, debug_flags = DebugFlags.new) + @states = states + @grammar = states.grammar + @debug_flags = debug_flags + end + + def output(out) + output_conflict out; out.puts + output_useless out; out.puts + output_rule out; out.puts + output_token out; out.puts + output_state out + end + + # + # Warnings + # + + def output_conflict(out) + @states.each do |state| + if state.srconf + out.printf "state %d contains %d shift/reduce conflicts\n", + state.stateid, state.srconf.size + end + if state.rrconf + out.printf "state %d contains %d reduce/reduce conflicts\n", + state.stateid, state.rrconf.size + end + end + end + + def output_useless(out) + @grammar.each do |rl| + if rl.useless? + out.printf "rule %d (%s) never reduced\n", + rl.ident, rl.target.to_s + end + end + @grammar.each_nonterminal do |t| + if t.useless? + out.printf "useless nonterminal %s\n", t.to_s + end + end + end + + # + # States + # + + def output_state(out) + out << "--------- State ---------\n" + + showall = @debug_flags.la || @debug_flags.state + @states.each do |state| + out << "\nstate #{state.ident}\n\n" + + (showall ? state.closure : state.core).each do |ptr| + pointer_out(out, ptr) if ptr.rule.ident != 0 or showall + end + out << "\n" + + action_out out, state + end + end + + def pointer_out(out, ptr) + buf = sprintf("%4d) %s :", ptr.rule.ident, ptr.rule.target.to_s) + ptr.rule.symbols.each_with_index do |tok, idx| + buf << ' _' if idx == ptr.index + buf << ' ' << tok.to_s + end + buf << ' _' if ptr.reduce? + out.puts buf + end + + def action_out(f, state) + sr = state.srconf && state.srconf.dup + rr = state.rrconf && state.rrconf.dup + acts = state.action + keys = acts.keys + keys.sort! {|a,b| a.ident <=> b.ident } + + [ Shift, Reduce, Error, Accept ].each do |klass| + keys.delete_if do |tok| + act = acts[tok] + if act.kind_of?(klass) + outact f, tok, act + if sr and c = sr.delete(tok) + outsrconf f, c + end + if rr and c = rr.delete(tok) + outrrconf f, c + end + + true + else + false + end + end + end + sr.each {|tok, c| outsrconf f, c } if sr + rr.each {|tok, c| outrrconf f, c } if rr + + act = state.defact + if not act.kind_of?(Error) or @debug_flags.any? + outact f, '$default', act + end + + f.puts + state.goto_table.each do |t, st| + if t.nonterminal? + f.printf " %-12s go to state %d\n", t.to_s, st.ident + end + end + end + + def outact(f, t, act) + case act + when Shift + f.printf " %-12s shift, and go to state %d\n", + t.to_s, act.goto_id + when Reduce + f.printf " %-12s reduce using rule %d (%s)\n", + t.to_s, act.ruleid, act.rule.target.to_s + when Accept + f.printf " %-12s accept\n", t.to_s + when Error + f.printf " %-12s error\n", t.to_s + else + raise "racc: fatal: wrong act for outact: act=#{act}(#{act.class})" + end + end + + def outsrconf(f, confs) + confs.each do |c| + r = c.reduce + f.printf " %-12s [reduce using rule %d (%s)]\n", + c.shift.to_s, r.ident, r.target.to_s + end + end + + def outrrconf(f, confs) + confs.each do |c| + r = c.low_prec + f.printf " %-12s [reduce using rule %d (%s)]\n", + c.token.to_s, r.ident, r.target.to_s + end + end + + # + # Rules + # + + def output_rule(out) + out.print "-------- Grammar --------\n\n" + @grammar.each do |rl| + if @debug_flags.any? or rl.ident != 0 + out.printf "rule %d %s: %s\n", + rl.ident, rl.target.to_s, rl.symbols.join(' ') + end + end + end + + # + # Tokens + # + + def output_token(out) + out.print "------- Symbols -------\n\n" + + out.print "**Nonterminals, with rules where they appear\n\n" + @grammar.each_nonterminal do |t| + tmp = <<SRC + %s (%d) + on right: %s + on left : %s +SRC + out.printf tmp, t.to_s, t.ident, + symbol_locations(t.locate).join(' '), + symbol_locations(t.heads).join(' ') + end + + out.print "\n**Terminals, with rules where they appear\n\n" + @grammar.each_terminal do |t| + out.printf " %s (%d) %s\n", + t.to_s, t.ident, symbol_locations(t.locate).join(' ') + end + end + + def symbol_locations(locs) + locs.map {|loc| loc.rule.ident }.reject {|n| n == 0 }.uniq + end + + end + +end # module Racc diff --git a/lib/racc/parser-text.rb b/lib/racc/parser-text.rb new file mode 100644 index 0000000000..07ff41afaa --- /dev/null +++ b/lib/racc/parser-text.rb @@ -0,0 +1,640 @@ +module Racc + PARSER_TEXT = <<'__end_of_file__' +# +# $Id: 1c0ef52c0f41acc465725e9e44b5b9d74d392ba5 $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# +# As a special exception, when this code is copied by Racc +# into a Racc output file, you may use that output file +# without restriction. +# + +require 'racc/info' + +unless defined?(NotImplementedError) + NotImplementedError = NotImplementError # :nodoc: +end + +module Racc + class ParseError < StandardError; end +end +unless defined?(::ParseError) + ParseError = Racc::ParseError +end + +# Racc is a LALR(1) parser generator. +# It is written in Ruby itself, and generates Ruby programs. +# +# == Command-line Reference +# +# racc [-o<var>filename</var>] [--output-file=<var>filename</var>] +# [-e<var>rubypath</var>] [--embedded=<var>rubypath</var>] +# [-v] [--verbose] +# [-O<var>filename</var>] [--log-file=<var>filename</var>] +# [-g] [--debug] +# [-E] [--embedded] +# [-l] [--no-line-convert] +# [-c] [--line-convert-all] +# [-a] [--no-omit-actions] +# [-C] [--check-only] +# [-S] [--output-status] +# [--version] [--copyright] [--help] <var>grammarfile</var> +# +# [+filename+] +# Racc grammar file. Any extention is permitted. +# [-o+outfile+, --output-file=+outfile+] +# A filename for output. default is <+filename+>.tab.rb +# [-O+filename+, --log-file=+filename+] +# Place logging output in file +filename+. +# Default log file name is <+filename+>.output. +# [-e+rubypath+, --executable=+rubypath+] +# output executable file(mode 755). where +path+ is the Ruby interpreter. +# [-v, --verbose] +# verbose mode. create +filename+.output file, like yacc's y.output file. +# [-g, --debug] +# add debug code to parser class. To display debuggin information, +# use this '-g' option and set @yydebug true in parser class. +# [-E, --embedded] +# Output parser which doesn't need runtime files (racc/parser.rb). +# [-C, --check-only] +# Check syntax of racc grammer file and quit. +# [-S, --output-status] +# Print messages time to time while compiling. +# [-l, --no-line-convert] +# turns off line number converting. +# [-c, --line-convert-all] +# Convert line number of actions, inner, header and footer. +# [-a, --no-omit-actions] +# Call all actions, even if an action is empty. +# [--version] +# print Racc version and quit. +# [--copyright] +# Print copyright and quit. +# [--help] +# Print usage and quit. +# +# == Generating Parser Using Racc +# +# To compile Racc grammar file, simply type: +# +# $ racc parse.y +# +# This creates Ruby script file "parse.tab.y". The -o option can change the output filename. +# +# == Writing A Racc Grammar File +# +# If you want your own parser, you have to write a grammar file. +# A grammar file contains the name of your parser class, grammar for the parser, +# user code, and anything else. +# When writing a grammar file, yacc's knowledge is helpful. +# If you have not used yacc before, Racc is not too difficult. +# +# Here's an example Racc grammar file. +# +# class Calcparser +# rule +# target: exp { print val[0] } +# +# exp: exp '+' exp +# | exp '*' exp +# | '(' exp ')' +# | NUMBER +# end +# +# Racc grammar files resemble yacc files. +# But (of course), this is Ruby code. +# yacc's $$ is the 'result', $0, $1... is +# an array called 'val', and $-1, $-2... is an array called '_values'. +# +# See the {Grammar File Reference}[rdoc-ref:lib/racc/rdoc/grammar.en.rdoc] for +# more information on grammar files. +# +# == Parser +# +# Then you must prepare the parse entry method. There are two types of +# parse methods in Racc, Racc::Parser#do_parse and Racc::Parser#yyparse +# +# Racc::Parser#do_parse is simple. +# +# It's yyparse() of yacc, and Racc::Parser#next_token is yylex(). +# This method must returns an array like [TOKENSYMBOL, ITS_VALUE]. +# EOF is [false, false]. +# (TOKENSYMBOL is a Ruby symbol (taken from String#intern) by default. +# If you want to change this, see the grammar reference. +# +# Racc::Parser#yyparse is little complicated, but useful. +# It does not use Racc::Parser#next_token, instead it gets tokens from any iterator. +# +# For example, <code>yyparse(obj, :scan)</code> causes +# calling +obj#scan+, and you can return tokens by yielding them from +obj#scan+. +# +# == Debugging +# +# When debugging, "-v" or/and the "-g" option is helpful. +# +# "-v" creates verbose log file (.output). +# "-g" creates a "Verbose Parser". +# Verbose Parser prints the internal status when parsing. +# But it's _not_ automatic. +# You must use -g option and set +@yydebug+ to +true+ in order to get output. +# -g option only creates the verbose parser. +# +# === Racc reported syntax error. +# +# Isn't there too many "end"? +# grammar of racc file is changed in v0.10. +# +# Racc does not use '%' mark, while yacc uses huge number of '%' marks.. +# +# === Racc reported "XXXX conflicts". +# +# Try "racc -v xxxx.y". +# It causes producing racc's internal log file, xxxx.output. +# +# === Generated parsers does not work correctly +# +# Try "racc -g xxxx.y". +# This command let racc generate "debugging parser". +# Then set @yydebug=true in your parser. +# It produces a working log of your parser. +# +# == Re-distributing Racc runtime +# +# A parser, which is created by Racc, requires the Racc runtime module; +# racc/parser.rb. +# +# Ruby 1.8.x comes with Racc runtime module, +# you need NOT distribute Racc runtime files. +# +# If you want to include the Racc runtime module with your parser. +# This can be done by using '-E' option: +# +# $ racc -E -omyparser.rb myparser.y +# +# This command creates myparser.rb which `includes' Racc runtime. +# Only you must do is to distribute your parser file (myparser.rb). +# +# Note: parser.rb is ruby license, but your parser is not. +# Your own parser is completely yours. +module Racc + + unless defined?(Racc_No_Extentions) + Racc_No_Extentions = false # :nodoc: + end + + class Parser + + Racc_Runtime_Version = ::Racc::VERSION + Racc_Runtime_Revision = '$Id: 1c0ef52c0f41acc465725e9e44b5b9d74d392ba5 $' + + Racc_Runtime_Core_Version_R = ::Racc::VERSION + Racc_Runtime_Core_Revision_R = '$Id: 1c0ef52c0f41acc465725e9e44b5b9d74d392ba5 $'.split[1] + begin + if Object.const_defined?(:RUBY_ENGINE) and RUBY_ENGINE == 'jruby' + require 'racc/cparse-jruby.jar' + com.headius.racc.Cparse.new.load(JRuby.runtime, false) + else + require 'racc/cparse' + end + # Racc_Runtime_Core_Version_C = (defined in extention) + Racc_Runtime_Core_Revision_C = Racc_Runtime_Core_Id_C.split[2] + unless new.respond_to?(:_racc_do_parse_c, true) + raise LoadError, 'old cparse.so' + end + if Racc_No_Extentions + raise LoadError, 'selecting ruby version of racc runtime core' + end + + Racc_Main_Parsing_Routine = :_racc_do_parse_c # :nodoc: + Racc_YY_Parse_Method = :_racc_yyparse_c # :nodoc: + Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_C # :nodoc: + Racc_Runtime_Core_Revision = Racc_Runtime_Core_Revision_C # :nodoc: + Racc_Runtime_Type = 'c' # :nodoc: + rescue LoadError +puts $! +puts $!.backtrace + Racc_Main_Parsing_Routine = :_racc_do_parse_rb + Racc_YY_Parse_Method = :_racc_yyparse_rb + Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_R + Racc_Runtime_Core_Revision = Racc_Runtime_Core_Revision_R + Racc_Runtime_Type = 'ruby' + end + + def Parser.racc_runtime_type # :nodoc: + Racc_Runtime_Type + end + + def _racc_setup + @yydebug = false unless self.class::Racc_debug_parser + @yydebug = false unless defined?(@yydebug) + if @yydebug + @racc_debug_out = $stderr unless defined?(@racc_debug_out) + @racc_debug_out ||= $stderr + end + arg = self.class::Racc_arg + arg[13] = true if arg.size < 14 + arg + end + + def _racc_init_sysvars + @racc_state = [0] + @racc_tstack = [] + @racc_vstack = [] + + @racc_t = nil + @racc_val = nil + + @racc_read_next = true + + @racc_user_yyerror = false + @racc_error_status = 0 + end + + # The entry point of the parser. This method is used with #next_token. + # If Racc wants to get token (and its value), calls next_token. + # + # Example: + # def parse + # @q = [[1,1], + # [2,2], + # [3,3], + # [false, '$']] + # do_parse + # end + # + # def next_token + # @q.shift + # end + def do_parse + __send__(Racc_Main_Parsing_Routine, _racc_setup(), false) + end + + # The method to fetch next token. + # If you use #do_parse method, you must implement #next_token. + # + # The format of return value is [TOKEN_SYMBOL, VALUE]. + # +token-symbol+ is represented by Ruby's symbol by default, e.g. :IDENT + # for 'IDENT'. ";" (String) for ';'. + # + # The final symbol (End of file) must be false. + def next_token + raise NotImplementedError, "#{self.class}\#next_token is not defined" + end + + def _racc_do_parse_rb(arg, in_debug) + action_table, action_check, action_default, action_pointer, + _, _, _, _, + _, _, token_table, * = arg + + _racc_init_sysvars + tok = act = i = nil + + catch(:racc_end_parse) { + while true + if i = action_pointer[@racc_state[-1]] + if @racc_read_next + if @racc_t != 0 # not EOF + tok, @racc_val = next_token() + unless tok # EOF + @racc_t = 0 + else + @racc_t = (token_table[tok] or 1) # error token + end + racc_read_token(@racc_t, tok, @racc_val) if @yydebug + @racc_read_next = false + end + end + i += @racc_t + unless i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + else + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + end + } + end + + # Another entry point for the parser. + # If you use this method, you must implement RECEIVER#METHOD_ID method. + # + # RECEIVER#METHOD_ID is a method to get next token. + # It must 'yield' the token, which format is [TOKEN-SYMBOL, VALUE]. + def yyparse(recv, mid) + __send__(Racc_YY_Parse_Method, recv, mid, _racc_setup(), false) + end + + def _racc_yyparse_rb(recv, mid, arg, c_debug) + action_table, action_check, action_default, action_pointer, + _, _, _, _, + _, _, token_table, * = arg + + _racc_init_sysvars + + catch(:racc_end_parse) { + until i = action_pointer[@racc_state[-1]] + while act = _racc_evalact(action_default[@racc_state[-1]], arg) + ; + end + end + recv.__send__(mid) do |tok, val| + unless tok + @racc_t = 0 + else + @racc_t = (token_table[tok] or 1) # error token + end + @racc_val = val + @racc_read_next = false + + i += @racc_t + unless i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + + while !(i = action_pointer[@racc_state[-1]]) || + ! @racc_read_next || + @racc_t == 0 # $ + unless i and i += @racc_t and + i >= 0 and + act = action_table[i] and + action_check[i] == @racc_state[-1] + act = action_default[@racc_state[-1]] + end + while act = _racc_evalact(act, arg) + ; + end + end + end + } + end + + ### + ### common + ### + + def _racc_evalact(act, arg) + action_table, action_check, _, action_pointer, + _, _, _, _, + _, _, _, shift_n, + reduce_n, * = arg + nerr = 0 # tmp + + if act > 0 and act < shift_n + # + # shift + # + if @racc_error_status > 0 + @racc_error_status -= 1 unless @racc_t <= 1 # error token or EOF + end + @racc_vstack.push @racc_val + @racc_state.push act + @racc_read_next = true + if @yydebug + @racc_tstack.push @racc_t + racc_shift @racc_t, @racc_tstack, @racc_vstack + end + + elsif act < 0 and act > -reduce_n + # + # reduce + # + code = catch(:racc_jump) { + @racc_state.push _racc_do_reduce(arg, act) + false + } + if code + case code + when 1 # yyerror + @racc_user_yyerror = true # user_yyerror + return -reduce_n + when 2 # yyaccept + return shift_n + else + raise '[Racc Bug] unknown jump code' + end + end + + elsif act == shift_n + # + # accept + # + racc_accept if @yydebug + throw :racc_end_parse, @racc_vstack[0] + + elsif act == -reduce_n + # + # error + # + case @racc_error_status + when 0 + unless arg[21] # user_yyerror + nerr += 1 + on_error @racc_t, @racc_val, @racc_vstack + end + when 3 + if @racc_t == 0 # is $ + # We're at EOF, and another error occurred immediately after + # attempting auto-recovery + throw :racc_end_parse, nil + end + @racc_read_next = true + end + @racc_user_yyerror = false + @racc_error_status = 3 + while true + if i = action_pointer[@racc_state[-1]] + i += 1 # error token + if i >= 0 and + (act = action_table[i]) and + action_check[i] == @racc_state[-1] + break + end + end + throw :racc_end_parse, nil if @racc_state.size <= 1 + @racc_state.pop + @racc_vstack.pop + if @yydebug + @racc_tstack.pop + racc_e_pop @racc_state, @racc_tstack, @racc_vstack + end + end + return act + + else + raise "[Racc Bug] unknown action #{act.inspect}" + end + + racc_next_state(@racc_state[-1], @racc_state) if @yydebug + + nil + end + + def _racc_do_reduce(arg, act) + _, _, _, _, + goto_table, goto_check, goto_default, goto_pointer, + nt_base, reduce_table, _, _, + _, use_result, * = arg + + state = @racc_state + vstack = @racc_vstack + tstack = @racc_tstack + + i = act * -3 + len = reduce_table[i] + reduce_to = reduce_table[i+1] + method_id = reduce_table[i+2] + void_array = [] + + tmp_t = tstack[-len, len] if @yydebug + tmp_v = vstack[-len, len] + tstack[-len, len] = void_array if @yydebug + vstack[-len, len] = void_array + state[-len, len] = void_array + + # tstack must be updated AFTER method call + if use_result + vstack.push __send__(method_id, tmp_v, vstack, tmp_v[0]) + else + vstack.push __send__(method_id, tmp_v, vstack) + end + tstack.push reduce_to + + racc_reduce(tmp_t, reduce_to, tstack, vstack) if @yydebug + + k1 = reduce_to - nt_base + if i = goto_pointer[k1] + i += state[-1] + if i >= 0 and (curstate = goto_table[i]) and goto_check[i] == k1 + return curstate + end + end + goto_default[k1] + end + + # This method is called when a parse error is found. + # + # ERROR_TOKEN_ID is an internal ID of token which caused error. + # You can get string representation of this ID by calling + # #token_to_str. + # + # ERROR_VALUE is a value of error token. + # + # value_stack is a stack of symbol values. + # DO NOT MODIFY this object. + # + # This method raises ParseError by default. + # + # If this method returns, parsers enter "error recovering mode". + def on_error(t, val, vstack) + raise ParseError, sprintf("\nparse error on value %s (%s)", + val.inspect, token_to_str(t) || '?') + end + + # Enter error recovering mode. + # This method does not call #on_error. + def yyerror + throw :racc_jump, 1 + end + + # Exit parser. + # Return value is Symbol_Value_Stack[0]. + def yyaccept + throw :racc_jump, 2 + end + + # Leave error recovering mode. + def yyerrok + @racc_error_status = 0 + end + + # For debugging output + def racc_read_token(t, tok, val) + @racc_debug_out.print 'read ' + @racc_debug_out.print tok.inspect, '(', racc_token2str(t), ') ' + @racc_debug_out.puts val.inspect + @racc_debug_out.puts + end + + def racc_shift(tok, tstack, vstack) + @racc_debug_out.puts "shift #{racc_token2str tok}" + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_reduce(toks, sim, tstack, vstack) + out = @racc_debug_out + out.print 'reduce ' + if toks.empty? + out.print ' <none>' + else + toks.each {|t| out.print ' ', racc_token2str(t) } + end + out.puts " --> #{racc_token2str(sim)}" + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_accept + @racc_debug_out.puts 'accept' + @racc_debug_out.puts + end + + def racc_e_pop(state, tstack, vstack) + @racc_debug_out.puts 'error recovering mode: pop token' + racc_print_states state + racc_print_stacks tstack, vstack + @racc_debug_out.puts + end + + def racc_next_state(curstate, state) + @racc_debug_out.puts "goto #{curstate}" + racc_print_states state + @racc_debug_out.puts + end + + def racc_print_stacks(t, v) + out = @racc_debug_out + out.print ' [' + t.each_index do |i| + out.print ' (', racc_token2str(t[i]), ' ', v[i].inspect, ')' + end + out.puts ' ]' + end + + def racc_print_states(s) + out = @racc_debug_out + out.print ' [' + s.each {|st| out.print ' ', st } + out.puts ' ]' + end + + def racc_token2str(tok) + self.class::Racc_token_to_s_table[tok] or + raise "[Racc Bug] can't convert token #{tok} to string" + end + + # Convert internal ID of token symbol to the string. + def token_to_str(t) + self.class::Racc_token_to_s_table[t] + end + + end + +end + +__end_of_file__ +end diff --git a/lib/racc/parser.rb b/lib/racc/parser.rb index 0cdb42e49d..41740ade39 100644 --- a/lib/racc/parser.rb +++ b/lib/racc/parser.rb @@ -1,7 +1,5 @@ # frozen_string_literal: false #-- -# $originalId: parser.rb,v 1.8 2006/07/06 11:42:07 aamine Exp $ -# # Copyright (c) 1999-2006 Minero Aoki # # This program is free software. @@ -12,6 +10,12 @@ # without restriction. #++ +require 'racc/info' + +unless defined?(NotImplementedError) + NotImplementedError = NotImplementError # :nodoc: +end + module Racc class ParseError < StandardError; end end @@ -49,12 +53,12 @@ end # [-v, --verbose] # verbose mode. create +filename+.output file, like yacc's y.output file. # [-g, --debug] -# add debug code to parser class. To display debugging information, +# add debug code to parser class. To display debuggin information, # use this '-g' option and set @yydebug true in parser class. # [-E, --embedded] # Output parser which doesn't need runtime files (racc/parser.rb). # [-C, --check-only] -# Check syntax of racc grammar file and quit. +# Check syntax of racc grammer file and quit. # [-S, --output-status] # Print messages time to time while compiling. # [-l, --no-line-convert] @@ -171,29 +175,34 @@ end # This command creates myparser.rb which `includes' Racc runtime. # Only you must do is to distribute your parser file (myparser.rb). # -# Note: parser.rb is LGPL, but your parser is not. +# Note: parser.rb is ruby license, but your parser is not. # Your own parser is completely yours. module Racc - unless defined?(Racc_No_Extensions) - Racc_No_Extensions = false # :nodoc: + unless defined?(Racc_No_Extentions) + Racc_No_Extentions = false # :nodoc: end class Parser - Racc_Runtime_Version = '1.4.6' - Racc_Runtime_Revision = %w$originalRevision: 1.8 $[1] + Racc_Runtime_Version = ::Racc::VERSION + Racc_Runtime_Revision = '$Id: 87af5c09d4467cae567837b4162ec2145417a90e $' - Racc_Runtime_Core_Version_R = '1.4.6' - Racc_Runtime_Core_Revision_R = %w$originalRevision: 1.8 $[1] + Racc_Runtime_Core_Version_R = ::Racc::VERSION + Racc_Runtime_Core_Revision_R = '$Id: 87af5c09d4467cae567837b4162ec2145417a90e $'.split[1] begin - require 'racc/cparse' - # Racc_Runtime_Core_Version_C = (defined in extension) + if Object.const_defined?(:RUBY_ENGINE) and RUBY_ENGINE == 'jruby' + require 'racc/cparse-jruby.jar' + com.headius.racc.Cparse.new.load(JRuby.runtime, false) + else + require 'racc/cparse' + end + # Racc_Runtime_Core_Version_C = (defined in extention) Racc_Runtime_Core_Revision_C = Racc_Runtime_Core_Id_C.split[2] unless new.respond_to?(:_racc_do_parse_c, true) raise LoadError, 'old cparse.so' end - if Racc_No_Extensions + if Racc_No_Extentions raise LoadError, 'selecting ruby version of racc runtime core' end @@ -203,6 +212,8 @@ module Racc Racc_Runtime_Core_Revision = Racc_Runtime_Core_Revision_C # :nodoc: Racc_Runtime_Type = 'c' # :nodoc: rescue LoadError +puts $! +puts $!.backtrace Racc_Main_Parsing_Routine = :_racc_do_parse_rb Racc_YY_Parse_Method = :_racc_yyparse_rb Racc_Runtime_Core_Version = Racc_Runtime_Core_Version_R @@ -255,9 +266,11 @@ module Racc # def next_token # @q.shift # end + class_eval %{ def do_parse - __send__(Racc_Main_Parsing_Routine, _racc_setup(), false) + #{Racc_Main_Parsing_Routine}(_racc_setup(), false) end + } # The method to fetch next token. # If you use #do_parse method, you must implement #next_token. @@ -274,8 +287,7 @@ module Racc def _racc_do_parse_rb(arg, in_debug) action_table, action_check, action_default, action_pointer, _, _, _, _, - _, _, token_table, _, - _, _, * = arg + _, _, token_table, * = arg _racc_init_sysvars tok = act = i = nil @@ -316,19 +328,18 @@ module Racc # # RECEIVER#METHOD_ID is a method to get next token. # It must 'yield' the token, which format is [TOKEN-SYMBOL, VALUE]. + class_eval %{ def yyparse(recv, mid) - __send__(Racc_YY_Parse_Method, recv, mid, _racc_setup(), true) + #{Racc_YY_Parse_Method}(recv, mid, _racc_setup(), true) end + } def _racc_yyparse_rb(recv, mid, arg, c_debug) action_table, action_check, action_default, action_pointer, - _, _, _, _, - _, _, token_table, _, - _, _, * = arg + _, _, _, _, + _, _, token_table, * = arg _racc_init_sysvars - act = nil - i = nil catch(:racc_end_parse) { until i = action_pointer[@racc_state[-1]] @@ -355,9 +366,9 @@ module Racc ; end - while not(i = action_pointer[@racc_state[-1]]) or - not @racc_read_next or - @racc_t == 0 # $ + while !(i = action_pointer[@racc_state[-1]]) || + ! @racc_read_next || + @racc_t == 0 # $ unless i and i += @racc_t and i >= 0 and act = action_table[i] and @@ -378,16 +389,17 @@ module Racc def _racc_evalact(act, arg) action_table, action_check, _, action_pointer, - _, _, _, _, - _, _, _, shift_n, reduce_n, - _, _, * = arg + _, _, _, _, + _, _, _, shift_n, + reduce_n, * = arg + nerr = 0 # tmp if act > 0 and act < shift_n # # shift # if @racc_error_status > 0 - @racc_error_status -= 1 unless @racc_t == 1 # error token + @racc_error_status -= 1 unless @racc_t <= 1 # error token or EOF end @racc_vstack.push @racc_val @racc_state.push act @@ -431,10 +443,13 @@ module Racc case @racc_error_status when 0 unless arg[21] # user_yyerror + nerr += 1 on_error @racc_t, @racc_val, @racc_vstack end when 3 if @racc_t == 0 # is $ + # We're at EOF, and another error occurred immediately after + # attempting auto-recovery throw :racc_end_parse, nil end @racc_read_next = true @@ -470,10 +485,11 @@ module Racc end def _racc_do_reduce(arg, act) - _, _, _, _, - goto_table, goto_check, goto_default, goto_pointer, - nt_base, reduce_table, _, _, - _, use_result, * = arg + _, _, _, _, + goto_table, goto_check, goto_default, goto_pointer, + nt_base, reduce_table, _, _, + _, use_result, * = arg + state = @racc_state vstack = @racc_vstack tstack = @racc_tstack @@ -569,7 +585,6 @@ module Racc toks.each {|t| out.print ' ', racc_token2str(t) } end out.puts " --> #{racc_token2str(sim)}" - racc_print_stacks tstack, vstack @racc_debug_out.puts end diff --git a/lib/racc/parserfilegenerator.rb b/lib/racc/parserfilegenerator.rb new file mode 100644 index 0000000000..f082144854 --- /dev/null +++ b/lib/racc/parserfilegenerator.rb @@ -0,0 +1,510 @@ +# +# $Id: 19fb5debfd07d70f6bc2ddc79ef43fbb3d27f15e $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# see the file "COPYING". + +require 'enumerator' +require 'racc/compat' +require 'racc/sourcetext' +require 'racc/parser-text' +require 'rbconfig' + +module Racc + + class ParserFileGenerator + + class Params + def self.bool_attr(name) + module_eval(<<-End) + def #{name}? + @#{name} + end + + def #{name}=(b) + @#{name} = b + end + End + end + + attr_accessor :filename + attr_accessor :classname + attr_accessor :superclass + bool_attr :omit_action_call + bool_attr :result_var + attr_accessor :header + attr_accessor :inner + attr_accessor :footer + + bool_attr :debug_parser + bool_attr :convert_line + bool_attr :convert_line_all + bool_attr :embed_runtime + bool_attr :make_executable + attr_accessor :interpreter + + def initialize + # Parameters derived from parser + self.filename = nil + self.classname = nil + self.superclass = 'Racc::Parser' + self.omit_action_call = true + self.result_var = true + self.header = [] + self.inner = [] + self.footer = [] + + # Parameters derived from command line options + self.debug_parser = false + self.convert_line = true + self.convert_line_all = false + self.embed_runtime = false + self.make_executable = false + self.interpreter = nil + end + end + + def initialize(states, params) + @states = states + @grammar = states.grammar + @params = params + end + + def generate_parser + string_io = StringIO.new + + init_line_conversion_system + @f = string_io + parser_file + + string_io.rewind + string_io.read + end + + def generate_parser_file(destpath) + init_line_conversion_system + File.open(destpath, 'w') {|f| + @f = f + parser_file + } + File.chmod 0755, destpath if @params.make_executable? + end + + private + + def parser_file + shebang @params.interpreter if @params.make_executable? + notice + line + if @params.embed_runtime? + embed_library runtime_source() + else + require 'racc/parser.rb' + end + header + parser_class(@params.classname, @params.superclass) { + inner + state_transition_table + } + footer + end + + c = ::RbConfig::CONFIG + RUBY_PATH = "#{c['bindir']}/#{c['ruby_install_name']}#{c['EXEEXT']}" + + def shebang(path) + line '#!' + (path == 'ruby' ? RUBY_PATH : path) + end + + def notice + line %q[#] + line %q[# DO NOT MODIFY!!!!] + line %Q[# This file is automatically generated by Racc #{Racc::Version}] + line %Q[# from Racc grammer file "#{@params.filename}".] + line %q[#] + end + + def runtime_source + SourceText.new(::Racc::PARSER_TEXT, 'racc/parser.rb', 1) + end + + def embed_library(src) + line %[###### #{src.filename} begin] + line %[unless $".index '#{src.filename}'] + line %[$".push '#{src.filename}'] + put src, @params.convert_line? + line %[end] + line %[###### #{src.filename} end] + end + + def require(feature) + line "require '#{feature}'" + end + + def parser_class(classname, superclass) + mods = classname.split('::') + classid = mods.pop + mods.each do |mod| + indent; line "module #{mod}" + cref_push mod + end + indent; line "class #{classid} < #{superclass}" + cref_push classid + yield + cref_pop + indent; line "end \# class #{classid}" + mods.reverse_each do |mod| + indent; line "end \# module #{mod}" + cref_pop + end + end + + def header + @params.header.each do |src| + line + put src, @params.convert_line_all? + end + end + + def inner + @params.inner.each do |src| + line + put src, @params.convert_line? + end + end + + def footer + @params.footer.each do |src| + line + put src, @params.convert_line_all? + end + end + + # Low Level Routines + + def put(src, convert_line = false) + if convert_line + replace_location(src) { + @f.puts src.text + } + else + @f.puts src.text + end + end + + def line(str = '') + @f.puts str + end + + def init_line_conversion_system + @cref = [] + @used_separator = {} + end + + def cref_push(name) + @cref.push name + end + + def cref_pop + @cref.pop + end + + def indent + @f.print ' ' * @cref.size + end + + def toplevel? + @cref.empty? + end + + def replace_location(src) + sep = make_separator(src) + @f.print 'self.class.' if toplevel? + @f.puts "module_eval(<<'#{sep}', '#{src.filename}', #{src.lineno})" + yield + @f.puts sep + end + + def make_separator(src) + sep = unique_separator(src.filename) + sep *= 2 while src.text.index(sep) + sep + end + + def unique_separator(id) + sep = "...end #{id}/module_eval..." + while @used_separator.key?(sep) + sep.concat sprintf('%02x', rand(255)) + end + @used_separator[sep] = true + sep + end + + # + # State Transition Table Serialization + # + + public + + def put_state_transition_table(f) + @f = f + state_transition_table + end + + private + + def state_transition_table + table = @states.state_transition_table + table.use_result_var = @params.result_var? + table.debug_parser = @params.debug_parser? + + line "##### State transition tables begin ###" + line + integer_list 'racc_action_table', table.action_table + line + integer_list 'racc_action_check', table.action_check + line + integer_list 'racc_action_pointer', table.action_pointer + line + integer_list 'racc_action_default', table.action_default + line + integer_list 'racc_goto_table', table.goto_table + line + integer_list 'racc_goto_check', table.goto_check + line + integer_list 'racc_goto_pointer', table.goto_pointer + line + integer_list 'racc_goto_default', table.goto_default + line + i_i_sym_list 'racc_reduce_table', table.reduce_table + line + line "racc_reduce_n = #{table.reduce_n}" + line + line "racc_shift_n = #{table.shift_n}" + line + sym_int_hash 'racc_token_table', table.token_table + line + line "racc_nt_base = #{table.nt_base}" + line + line "racc_use_result_var = #{table.use_result_var}" + line + @f.print(unindent_auto(<<-End)) + Racc_arg = [ + racc_action_table, + racc_action_check, + racc_action_default, + racc_action_pointer, + racc_goto_table, + racc_goto_check, + racc_goto_default, + racc_goto_pointer, + racc_nt_base, + racc_reduce_table, + racc_token_table, + racc_shift_n, + racc_reduce_n, + racc_use_result_var ] + End + line + string_list 'Racc_token_to_s_table', table.token_to_s_table + line + line "Racc_debug_parser = #{table.debug_parser}" + line + line '##### State transition tables end #####' + actions + end + + def integer_list(name, table) + if table.size > 2000 + serialize_integer_list_compressed name, table + else + serialize_integer_list_std name, table + end + end + + def serialize_integer_list_compressed(name, table) + # TODO: this can be made a LOT more clean with a simple split/map + sep = "\n" + nsep = ",\n" + buf = '' + com = '' + ncom = ',' + co = com + @f.print 'clist = [' + table.each do |i| + buf << co << i.to_s; co = ncom + if buf.size > 66 + @f.print sep; sep = nsep + @f.print "'", buf, "'" + buf = '' + co = com + end + end + unless buf.empty? + @f.print sep + @f.print "'", buf, "'" + end + line ' ]' + + @f.print(<<-End) + #{name} = arr = ::Array.new(#{table.size}, nil) + idx = 0 + clist.each do |str| + str.split(',', -1).each do |i| + arr[idx] = i.to_i unless i.empty? + idx += 1 + end + end + End + end + + def serialize_integer_list_std(name, table) + sep = '' + line "#{name} = [" + table.each_slice(10) do |ns| + @f.print sep; sep = ",\n" + @f.print ns.map {|n| sprintf('%6s', n ? n.to_s : 'nil') }.join(',') + end + line ' ]' + end + + def i_i_sym_list(name, table) + sep = '' + line "#{name} = [" + table.each_slice(3) do |len, target, mid| + @f.print sep; sep = ",\n" + @f.printf ' %d, %d, %s', len, target, mid.inspect + end + line " ]" + end + + def sym_int_hash(name, h) + sep = "\n" + @f.print "#{name} = {" + h.to_a.sort_by {|sym, i| i }.each do |sym, i| + @f.print sep; sep = ",\n" + @f.printf " %s => %d", sym.serialize, i + end + line " }" + end + + def string_list(name, list) + sep = " " + line "#{name} = [" + list.each do |s| + @f.print sep; sep = ",\n " + @f.print s.dump + end + line ' ]' + end + + def actions + @grammar.each do |rule| + unless rule.action.source? + raise "racc: fatal: cannot generate parser file when any action is a Proc" + end + end + + if @params.result_var? + decl = ', result' + retval = "\n result" + default_body = '' + else + decl = '' + retval = '' + default_body = 'val[0]' + end + @grammar.each do |rule| + line + if rule.action.empty? and @params.omit_action_call? + line "# reduce #{rule.ident} omitted" + else + src0 = rule.action.source || SourceText.new(default_body, __FILE__, 0) + if @params.convert_line? + src = remove_blank_lines(src0) + delim = make_delimiter(src.text) + @f.printf unindent_auto(<<-End), + module_eval(<<'%s', '%s', %d) + def _reduce_%d(val, _values%s) + %s%s + end + %s + End + delim, src.filename, src.lineno - 1, + rule.ident, decl, + src.text, retval, + delim + else + src = remove_blank_lines(src0) + @f.printf unindent_auto(<<-End), + def _reduce_%d(val, _values%s) + %s%s + end + End + rule.ident, decl, + src.text, retval + end + end + end + line + @f.printf unindent_auto(<<-'End'), decl + def _reduce_none(val, _values%s) + val[0] + end + End + line + end + + def remove_blank_lines(src) + body = src.text.dup + line = src.lineno + while body.slice!(/\A[ \t\f]*(?:\n|\r\n|\r)/) + line += 1 + end + SourceText.new(body, src.filename, line) + end + + def make_delimiter(body) + delim = '.,.,' + while body.index(delim) + delim *= 2 + end + delim + end + + def unindent_auto(str) + lines = str.lines.to_a + n = minimum_indent(lines) + lines.map {|line| detab(line).sub(indent_re(n), '').rstrip + "\n" }.join('') + end + + def minimum_indent(lines) + lines.map {|line| n_indent(line) }.min + end + + def n_indent(line) + line.slice(/\A\s+/).size + end + + RE_CACHE = {} + + def indent_re(n) + RE_CACHE[n] ||= /\A {#{n}}/ + end + + def detab(str, ts = 8) + add = 0 + len = nil + str.gsub(/\t/) { + len = ts - ($`.size + add) % ts + add += len - 1 + ' ' * len + } + end + + end + +end diff --git a/lib/racc/pre-setup b/lib/racc/pre-setup new file mode 100644 index 0000000000..5027d865b7 --- /dev/null +++ b/lib/racc/pre-setup @@ -0,0 +1,13 @@ +def generate_parser_text_rb(target) + return if File.exist?(srcfile(target)) + $stderr.puts "generating #{target}..." + File.open(target, 'w') {|f| + f.puts "module Racc" + f.puts " PARSER_TEXT = <<'__end_of_file__'" + f.puts File.read(srcfile('parser.rb')) + f.puts "__end_of_file__" + f.puts "end" + } +end + +generate_parser_text_rb 'parser-text.rb' diff --git a/lib/racc/sourcetext.rb b/lib/racc/sourcetext.rb new file mode 100644 index 0000000000..b33ba29291 --- /dev/null +++ b/lib/racc/sourcetext.rb @@ -0,0 +1,34 @@ +# +# $Id: 3b2d89d9ada2f5fcb043837dcc5c9631856d5b70 $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of LGPL, see the file "COPYING". +# + +module Racc + + class SourceText + def initialize(text, filename, lineno) + @text = text + @filename = filename + @lineno = lineno + end + + attr_reader :text + attr_reader :filename + attr_reader :lineno + + def to_s + "#<SourceText #{location()}>" + end + + def location + "#{@filename}:#{@lineno}" + end + end + +end diff --git a/lib/racc/state.rb b/lib/racc/state.rb new file mode 100644 index 0000000000..347a74329a --- /dev/null +++ b/lib/racc/state.rb @@ -0,0 +1,969 @@ +# +# $Id: a101d6acb72abc392f7757cda89bf6f0a683a43d $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the same terms of ruby. +# see the file "COPYING". + +require 'racc/iset' +require 'racc/statetransitiontable' +require 'racc/exception' +require 'forwardable' + +module Racc + + # A table of LALR states. + class States + + include Enumerable + + def initialize(grammar, debug_flags = DebugFlags.new) + @grammar = grammar + @symboltable = grammar.symboltable + @d_state = debug_flags.state + @d_la = debug_flags.la + @d_prec = debug_flags.prec + @states = [] + @statecache = {} + @actions = ActionTable.new(@grammar, self) + @nfa_computed = false + @dfa_computed = false + end + + attr_reader :grammar + attr_reader :actions + + def size + @states.size + end + + def inspect + '#<state table>' + end + + alias to_s inspect + + def [](i) + @states[i] + end + + def each_state(&block) + @states.each(&block) + end + + alias each each_state + + def each_index(&block) + @states.each_index(&block) + end + + extend Forwardable + + def_delegator "@actions", :shift_n + def_delegator "@actions", :reduce_n + def_delegator "@actions", :nt_base + + def should_report_srconflict? + srconflict_exist? and + (n_srconflicts() != @grammar.n_expected_srconflicts) + end + + def srconflict_exist? + n_srconflicts() != 0 + end + + def n_srconflicts + @n_srconflicts ||= inject(0) {|sum, st| sum + st.n_srconflicts } + end + + def rrconflict_exist? + n_rrconflicts() != 0 + end + + def n_rrconflicts + @n_rrconflicts ||= inject(0) {|sum, st| sum + st.n_rrconflicts } + end + + def state_transition_table + @state_transition_table ||= StateTransitionTable.generate(self.dfa) + end + + # + # NFA (Non-deterministic Finite Automaton) Computation + # + + public + + def nfa + return self if @nfa_computed + compute_nfa + @nfa_computed = true + self + end + + private + + def compute_nfa + @grammar.init + # add state 0 + core_to_state [ @grammar[0].ptrs[0] ] + # generate LALR states + cur = 0 + @gotos = [] + while cur < @states.size + generate_states @states[cur] # state is added here + cur += 1 + end + @actions.init + end + + def generate_states(state) + puts "dstate: #{state}" if @d_state + + table = {} + state.closure.each do |ptr| + if sym = ptr.dereference + addsym table, sym, ptr.next + end + end + table.each do |sym, core| + puts "dstate: sym=#{sym} ncore=#{core}" if @d_state + + dest = core_to_state(core.to_a) + state.goto_table[sym] = dest + id = sym.nonterminal?() ? @gotos.size : nil + g = Goto.new(id, sym, state, dest) + @gotos.push g if sym.nonterminal? + state.gotos[sym] = g + puts "dstate: #{state.ident} --#{sym}--> #{dest.ident}" if @d_state + + # check infinite recursion + if state.ident == dest.ident and state.closure.size == 1 + raise CompileError, + sprintf("Infinite recursion: state %d, with rule %d", + state.ident, state.ptrs[0].rule.ident) + end + end + end + + def addsym(table, sym, ptr) + unless s = table[sym] + table[sym] = s = ISet.new + end + s.add ptr + end + + def core_to_state(core) + # + # convert CORE to a State object. + # If matching state does not exist, create it and add to the table. + # + + k = fingerprint(core) + unless dest = @statecache[k] + # not registered yet + dest = State.new(@states.size, core) + @states.push dest + + @statecache[k] = dest + + puts "core_to_state: create state ID #{dest.ident}" if @d_state + else + if @d_state + puts "core_to_state: dest is cached ID #{dest.ident}" + puts "core_to_state: dest core #{dest.core.join(' ')}" + end + end + + dest + end + + def fingerprint(arr) + arr.map {|i| i.ident }.pack('L*') + end + + # + # DFA (Deterministic Finite Automaton) Generation + # + + public + + def dfa + return self if @dfa_computed + nfa + compute_dfa + @dfa_computed = true + self + end + + private + + def compute_dfa + la = lookahead() + @states.each do |state| + state.la = la + resolve state + end + set_accept + @states.each do |state| + pack state + end + check_useless + end + + def lookahead + # + # lookahead algorithm ver.3 -- from bison 1.26 + # + + gotos = @gotos + if @d_la + puts "\n--- goto ---" + gotos.each_with_index {|g, i| print i, ' '; p g } + end + + ### initialize_LA() + ### set_goto_map() + la_rules = [] + @states.each do |state| + state.check_la la_rules + end + + ### initialize_F() + f = create_tmap(gotos.size) + reads = [] + edge = [] + gotos.each do |goto| + goto.to_state.goto_table.each do |t, st| + if t.terminal? + f[goto.ident] |= (1 << t.ident) + elsif t.nullable? + edge.push goto.to_state.gotos[t].ident + end + end + if edge.empty? + reads.push nil + else + reads.push edge + edge = [] + end + end + digraph f, reads + if @d_la + puts "\n--- F1 (reads) ---" + print_tab gotos, reads, f + end + + ### build_relations() + ### compute_FOLLOWS + path = nil + edge = [] + lookback = Array.new(la_rules.size, nil) + includes = [] + gotos.each do |goto| + goto.symbol.heads.each do |ptr| + path = record_path(goto.from_state, ptr.rule) + lastgoto = path.last + st = lastgoto ? lastgoto.to_state : goto.from_state + if st.conflict? + addrel lookback, st.rruleid(ptr.rule), goto + end + path.reverse_each do |g| + break if g.symbol.terminal? + edge.push g.ident + break unless g.symbol.nullable? + end + end + if edge.empty? + includes.push nil + else + includes.push edge + edge = [] + end + end + includes = transpose(includes) + digraph f, includes + if @d_la + puts "\n--- F2 (includes) ---" + print_tab gotos, includes, f + end + + ### compute_lookaheads + la = create_tmap(la_rules.size) + lookback.each_with_index do |arr, i| + if arr + arr.each do |g| + la[i] |= f[g.ident] + end + end + end + if @d_la + puts "\n--- LA (lookback) ---" + print_tab la_rules, lookback, la + end + + la + end + + def create_tmap(size) + Array.new(size, 0) # use Integer as bitmap + end + + def addrel(tbl, i, item) + if a = tbl[i] + a.push item + else + tbl[i] = [item] + end + end + + def record_path(begst, rule) + st = begst + path = [] + rule.symbols.each do |t| + goto = st.gotos[t] + path.push goto + st = goto.to_state + end + path + end + + def transpose(rel) + new = Array.new(rel.size, nil) + rel.each_with_index do |arr, idx| + if arr + arr.each do |i| + addrel new, i, idx + end + end + end + new + end + + def digraph(map, relation) + n = relation.size + index = Array.new(n, nil) + vertices = [] + @infinity = n + 2 + + index.each_index do |i| + if not index[i] and relation[i] + traverse i, index, vertices, map, relation + end + end + end + + def traverse(i, index, vertices, map, relation) + vertices.push i + index[i] = height = vertices.size + + if rp = relation[i] + rp.each do |proci| + unless index[proci] + traverse proci, index, vertices, map, relation + end + if index[i] > index[proci] + # circulative recursion !!! + index[i] = index[proci] + end + map[i] |= map[proci] + end + end + + if index[i] == height + while true + proci = vertices.pop + index[proci] = @infinity + break if i == proci + + map[proci] |= map[i] + end + end + end + + # for debug + def print_atab(idx, tab) + tab.each_with_index do |i,ii| + printf '%-20s', idx[ii].inspect + p i + end + end + + def print_tab(idx, rel, tab) + tab.each_with_index do |bin,i| + print i, ' ', idx[i].inspect, ' << '; p rel[i] + print ' ' + each_t(@symboltable, bin) {|t| print ' ', t } + puts + end + end + + # for debug + def print_tab_i(idx, rel, tab, i) + bin = tab[i] + print i, ' ', idx[i].inspect, ' << '; p rel[i] + print ' ' + each_t(@symboltable, bin) {|t| print ' ', t } + end + + # for debug + def printb(i) + each_t(@symboltable, i) do |t| + print t, ' ' + end + puts + end + + def each_t(tbl, set) + 0.upto( set.size ) do |i| + (0..7).each do |ii| + if set[idx = i * 8 + ii] == 1 + yield tbl[idx] + end + end + end + end + + # + # resolve + # + + def resolve(state) + if state.conflict? + resolve_rr state, state.ritems + resolve_sr state, state.stokens + else + if state.rrules.empty? + # shift + state.stokens.each do |t| + state.action[t] = @actions.shift(state.goto_table[t]) + end + else + # reduce + state.defact = @actions.reduce(state.rrules[0]) + end + end + end + + def resolve_rr(state, r) + r.each do |item| + item.each_la(@symboltable) do |t| + act = state.action[t] + if act + unless act.kind_of?(Reduce) + raise "racc: fatal: #{act.class} in action table" + end + # Cannot resolve R/R conflict (on t). + # Reduce with upper rule as default. + state.rr_conflict act.rule, item.rule, t + else + # No conflict. + state.action[t] = @actions.reduce(item.rule) + end + end + end + end + + def resolve_sr(state, s) + s.each do |stok| + goto = state.goto_table[stok] + act = state.action[stok] + + unless act + # no conflict + state.action[stok] = @actions.shift(goto) + else + unless act.kind_of?(Reduce) + puts 'DEBUG -------------------------------' + p stok + p act + state.action.each do |k,v| + print k.inspect, ' ', v.inspect, "\n" + end + raise "racc: fatal: #{act.class} in action table" + end + + # conflict on stok + + rtok = act.rule.precedence + case do_resolve_sr(stok, rtok) + when :Reduce + # action is already set + + when :Shift + # overwrite + act.decref + state.action[stok] = @actions.shift(goto) + + when :Error + act.decref + state.action[stok] = @actions.error + + when :CantResolve + # shift as default + act.decref + state.action[stok] = @actions.shift(goto) + state.sr_conflict stok, act.rule + end + end + end + end + + ASSOC = { + :Left => :Reduce, + :Right => :Shift, + :Nonassoc => :Error + } + + def do_resolve_sr(stok, rtok) + puts "resolve_sr: s/r conflict: rtok=#{rtok}, stok=#{stok}" if @d_prec + + unless rtok and rtok.precedence + puts "resolve_sr: no prec for #{rtok}(R)" if @d_prec + return :CantResolve + end + rprec = rtok.precedence + + unless stok and stok.precedence + puts "resolve_sr: no prec for #{stok}(S)" if @d_prec + return :CantResolve + end + sprec = stok.precedence + + ret = if rprec == sprec + ASSOC[rtok.assoc] or + raise "racc: fatal: #{rtok}.assoc is not Left/Right/Nonassoc" + else + (rprec > sprec) ? (:Reduce) : (:Shift) + end + + puts "resolve_sr: resolved as #{ret.id2name}" if @d_prec + ret + end + + # + # complete + # + + def set_accept + anch = @symboltable.anchor + init_state = @states[0].goto_table[@grammar.start] + targ_state = init_state.action[anch].goto_state + acc_state = targ_state.action[anch].goto_state + + acc_state.action.clear + acc_state.goto_table.clear + acc_state.defact = @actions.accept + end + + def pack(state) + ### find most frequently used reduce rule + act = state.action + arr = Array.new(@grammar.size, 0) + act.each do |t, a| + arr[a.ruleid] += 1 if a.kind_of?(Reduce) + end + i = arr.max + s = (i > 0) ? arr.index(i) : nil + + ### set & delete default action + if s + r = @actions.reduce(s) + if not state.defact or state.defact == r + act.delete_if {|t, a| a == r } + state.defact = r + end + else + state.defact ||= @actions.error + end + end + + def check_useless + used = [] + @actions.each_reduce do |act| + if not act or act.refn == 0 + act.rule.useless = true + else + t = act.rule.target + used[t.ident] = t + end + end + @symboltable.nt_base.upto(@symboltable.nt_max - 1) do |n| + unless used[n] + @symboltable[n].useless = true + end + end + end + + end # class StateTable + + + # A LALR state. + class State + + def initialize(ident, core) + @ident = ident + @core = core + @goto_table = {} + @gotos = {} + @stokens = nil + @ritems = nil + @action = {} + @defact = nil + @rrconf = nil + @srconf = nil + + @closure = make_closure(@core) + end + + attr_reader :ident + alias stateid ident + alias hash ident + + attr_reader :core + attr_reader :closure + + attr_reader :goto_table + attr_reader :gotos + + attr_reader :stokens + attr_reader :ritems + attr_reader :rrules + + attr_reader :action + attr_accessor :defact # default action + + attr_reader :rrconf + attr_reader :srconf + + def inspect + "<state #{@ident}>" + end + + alias to_s inspect + + def ==(oth) + @ident == oth.ident + end + + alias eql? == + + def make_closure(core) + set = ISet.new + core.each do |ptr| + set.add ptr + if t = ptr.dereference and t.nonterminal? + set.update_a t.expand + end + end + set.to_a + end + + def check_la(la_rules) + @conflict = false + s = [] + r = [] + @closure.each do |ptr| + if t = ptr.dereference + if t.terminal? + s[t.ident] = t + if t.ident == 1 # $error + @conflict = true + end + end + else + r.push ptr.rule + end + end + unless r.empty? + if not s.empty? or r.size > 1 + @conflict = true + end + end + s.compact! + @stokens = s + @rrules = r + + if @conflict + @la_rules_i = la_rules.size + @la_rules = r.map {|i| i.ident } + la_rules.concat r + else + @la_rules_i = @la_rules = nil + end + end + + def conflict? + @conflict + end + + def rruleid(rule) + if i = @la_rules.index(rule.ident) + @la_rules_i + i + else + puts '/// rruleid' + p self + p rule + p @rrules + p @la_rules_i + raise 'racc: fatal: cannot get reduce rule id' + end + end + + def la=(la) + return unless @conflict + i = @la_rules_i + @ritems = r = [] + @rrules.each do |rule| + r.push Item.new(rule, la[i]) + i += 1 + end + end + + def rr_conflict(high, low, ctok) + c = RRconflict.new(@ident, high, low, ctok) + + @rrconf ||= {} + if a = @rrconf[ctok] + a.push c + else + @rrconf[ctok] = [c] + end + end + + def sr_conflict(shift, reduce) + c = SRconflict.new(@ident, shift, reduce) + + @srconf ||= {} + if a = @srconf[shift] + a.push c + else + @srconf[shift] = [c] + end + end + + def n_srconflicts + @srconf ? @srconf.size : 0 + end + + def n_rrconflicts + @rrconf ? @rrconf.size : 0 + end + + end # class State + + + # + # Represents a transition on the grammar. + # "Real goto" means a transition by nonterminal, + # but this class treats also terminal's. + # If one is a terminal transition, .ident returns nil. + # + class Goto + def initialize(ident, sym, from, to) + @ident = ident + @symbol = sym + @from_state = from + @to_state = to + end + + attr_reader :ident + attr_reader :symbol + attr_reader :from_state + attr_reader :to_state + + def inspect + "(#{@from_state.ident}-#{@symbol}->#{@to_state.ident})" + end + end + + + # LALR item. A set of rule and its lookahead tokens. + class Item + def initialize(rule, la) + @rule = rule + @la = la + end + + attr_reader :rule + attr_reader :la + + def each_la(tbl) + la = @la + 0.upto(la.size - 1) do |i| + (0..7).each do |ii| + if la[idx = i * 8 + ii] == 1 + yield tbl[idx] + end + end + end + end + end + + + # The table of LALR actions. Actions are either of + # Shift, Reduce, Accept and Error. + class ActionTable + + def initialize(rt, st) + @grammar = rt + @statetable = st + + @reduce = [] + @shift = [] + @accept = nil + @error = nil + end + + def init + @grammar.each do |rule| + @reduce.push Reduce.new(rule) + end + @statetable.each do |state| + @shift.push Shift.new(state) + end + @accept = Accept.new + @error = Error.new + end + + def reduce_n + @reduce.size + end + + def reduce(i) + case i + when Rule then i = i.ident + when Integer then ; + else + raise "racc: fatal: wrong class #{i.class} for reduce" + end + + r = @reduce[i] or raise "racc: fatal: reduce action #{i.inspect} not exist" + r.incref + r + end + + def each_reduce(&block) + @reduce.each(&block) + end + + def shift_n + @shift.size + end + + def shift(i) + case i + when State then i = i.ident + when Integer then ; + else + raise "racc: fatal: wrong class #{i.class} for shift" + end + + @shift[i] or raise "racc: fatal: shift action #{i} does not exist" + end + + def each_shift(&block) + @shift.each(&block) + end + + attr_reader :accept + attr_reader :error + + end + + + class Shift + def initialize(goto) + @goto_state = goto + end + + attr_reader :goto_state + + def goto_id + @goto_state.ident + end + + def inspect + "<shift #{@goto_state.ident}>" + end + end + + + class Reduce + def initialize(rule) + @rule = rule + @refn = 0 + end + + attr_reader :rule + attr_reader :refn + + def ruleid + @rule.ident + end + + def inspect + "<reduce #{@rule.ident}>" + end + + def incref + @refn += 1 + end + + def decref + @refn -= 1 + raise 'racc: fatal: act.refn < 0' if @refn < 0 + end + end + + class Accept + def inspect + "<accept>" + end + end + + class Error + def inspect + "<error>" + end + end + + class SRconflict + def initialize(sid, shift, reduce) + @stateid = sid + @shift = shift + @reduce = reduce + end + + attr_reader :stateid + attr_reader :shift + attr_reader :reduce + + def to_s + sprintf('state %d: S/R conflict rule %d reduce and shift %s', + @stateid, @reduce.ruleid, @shift.to_s) + end + end + + class RRconflict + def initialize(sid, high, low, tok) + @stateid = sid + @high_prec = high + @low_prec = low + @token = tok + end + + attr_reader :stateid + attr_reader :high_prec + attr_reader :low_prec + attr_reader :token + + def to_s + sprintf('state %d: R/R conflict with rule %d and %d on %s', + @stateid, @high_prec.ident, @low_prec.ident, @token.to_s) + end + end + +end diff --git a/lib/racc/statetransitiontable.rb b/lib/racc/statetransitiontable.rb new file mode 100644 index 0000000000..23df4102ec --- /dev/null +++ b/lib/racc/statetransitiontable.rb @@ -0,0 +1,316 @@ +# +# $Id: 4c5f4311663b6d03050953d64d6a0e7905ff2216 $ +# +# Copyright (c) 1999-2006 Minero Aoki +# +# This program is free software. +# You can distribute/modify this program under the terms of +# the GNU LGPL, Lesser General Public License version 2.1. +# For details of LGPL, see the file "COPYING". +# + +require 'racc/parser' + +unless Object.method_defined?(:funcall) + class Object + alias funcall __send__ + end +end + +module Racc + + StateTransitionTable = Struct.new(:action_table, + :action_check, + :action_default, + :action_pointer, + :goto_table, + :goto_check, + :goto_default, + :goto_pointer, + :token_table, + :reduce_table, + :reduce_n, + :shift_n, + :nt_base, + :token_to_s_table, + :use_result_var, + :debug_parser) + class StateTransitionTable # reopen + def StateTransitionTable.generate(states) + StateTransitionTableGenerator.new(states).generate + end + + def initialize(states) + super() + @states = states + @grammar = states.grammar + self.use_result_var = true + self.debug_parser = true + end + + attr_reader :states + attr_reader :grammar + + def parser_class + ParserClassGenerator.new(@states).generate + end + + def token_value_table + h = {} + token_table().each do |sym, i| + h[sym.value] = i + end + h + end + end + + + class StateTransitionTableGenerator + + def initialize(states) + @states = states + @grammar = states.grammar + end + + def generate + t = StateTransitionTable.new(@states) + gen_action_tables t, @states + gen_goto_tables t, @grammar + t.token_table = token_table(@grammar) + t.reduce_table = reduce_table(@grammar) + t.reduce_n = @states.reduce_n + t.shift_n = @states.shift_n + t.nt_base = @grammar.nonterminal_base + t.token_to_s_table = @grammar.symbols.map {|sym| sym.to_s } + t + end + + def reduce_table(grammar) + t = [0, 0, :racc_error] + grammar.each_with_index do |rule, idx| + next if idx == 0 + t.push rule.size + t.push rule.target.ident + t.push(if rule.action.empty? # and @params.omit_action_call? + then :_reduce_none + else "_reduce_#{idx}".intern + end) + end + t + end + + def token_table(grammar) + h = {} + grammar.symboltable.terminals.each do |t| + h[t] = t.ident + end + h + end + + def gen_action_tables(t, states) + t.action_table = yytable = [] + t.action_check = yycheck = [] + t.action_default = yydefact = [] + t.action_pointer = yypact = [] + e1 = [] + e2 = [] + states.each do |state| + yydefact.push act2actid(state.defact) + if state.action.empty? + yypact.push nil + next + end + vector = [] + state.action.each do |tok, act| + vector[tok.ident] = act2actid(act) + end + addent e1, vector, state.ident, yypact + end + set_table e1, e2, yytable, yycheck, yypact + end + + def gen_goto_tables(t, grammar) + t.goto_table = yytable2 = [] + t.goto_check = yycheck2 = [] + t.goto_pointer = yypgoto = [] + t.goto_default = yydefgoto = [] + e1 = [] + e2 = [] + grammar.each_nonterminal do |tok| + tmp = [] + + # decide default + freq = Array.new(@states.size, 0) + @states.each do |state| + st = state.goto_table[tok] + if st + st = st.ident + freq[st] += 1 + end + tmp[state.ident] = st + end + max = freq.max + if max > 1 + default = freq.index(max) + tmp.map! {|i| default == i ? nil : i } + else + default = nil + end + yydefgoto.push default + + # delete default value + tmp.pop until tmp.last or tmp.empty? + if tmp.compact.empty? + # only default + yypgoto.push nil + next + end + + addent e1, tmp, (tok.ident - grammar.nonterminal_base), yypgoto + end + set_table e1, e2, yytable2, yycheck2, yypgoto + end + + def addent(all, arr, chkval, ptr) + max = arr.size + min = nil + arr.each_with_index do |item, idx| + if item + min ||= idx + end + end + ptr.push(-7777) # mark + arr = arr[min...max] + all.push [arr, chkval, mkmapexp(arr), min, ptr.size - 1] + end + + n = 2 ** 16 + begin + Regexp.compile("a{#{n}}") + RE_DUP_MAX = n + rescue RegexpError + n /= 2 + retry + end + + def mkmapexp(arr) + i = ii = 0 + as = arr.size + map = '' + maxdup = RE_DUP_MAX + curr = nil + while i < as + ii = i + 1 + if arr[i] + ii += 1 while ii < as and arr[ii] + curr = '-' + else + ii += 1 while ii < as and not arr[ii] + curr = '.' + end + + offset = ii - i + if offset == 1 + map << curr + else + while offset > maxdup + map << "#{curr}{#{maxdup}}" + offset -= maxdup + end + map << "#{curr}{#{offset}}" if offset > 1 + end + i = ii + end + Regexp.compile(map, 'n') + end + + def set_table(entries, dummy, tbl, chk, ptr) + upper = 0 + map = '-' * 10240 + + # sort long to short + entries.sort! {|a,b| b[0].size <=> a[0].size } + + entries.each do |arr, chkval, expr, min, ptri| + if upper + arr.size > map.size + map << '-' * (arr.size + 1024) + end + idx = map.index(expr) + ptr[ptri] = idx - min + arr.each_with_index do |item, i| + if item + i += idx + tbl[i] = item + chk[i] = chkval + map[i] = ?o + end + end + upper = idx + arr.size + end + end + + def act2actid(act) + case act + when Shift then act.goto_id + when Reduce then -act.ruleid + when Accept then @states.shift_n + when Error then @states.reduce_n * -1 + else + raise "racc: fatal: wrong act type #{act.class} in action table" + end + end + + end + + + class ParserClassGenerator + + def initialize(states) + @states = states + @grammar = states.grammar + end + + def generate + table = @states.state_transition_table + c = Class.new(::Racc::Parser) + c.const_set :Racc_arg, [table.action_table, + table.action_check, + table.action_default, + table.action_pointer, + table.goto_table, + table.goto_check, + table.goto_default, + table.goto_pointer, + table.nt_base, + table.reduce_table, + table.token_value_table, + table.shift_n, + table.reduce_n, + false] + c.const_set :Racc_token_to_s_table, table.token_to_s_table + c.const_set :Racc_debug_parser, true + define_actions c + c + end + + private + + def define_actions(c) + c.module_eval "def _reduce_none(vals, vstack) vals[0] end" + @grammar.each do |rule| + if rule.action.empty? + c.funcall(:alias_method, "_reduce_#{rule.ident}", :_reduce_none) + else + c.funcall(:define_method, "_racc_action_#{rule.ident}", &rule.action.proc) + c.module_eval(<<-End, __FILE__, __LINE__ + 1) + def _reduce_#{rule.ident}(vals, vstack) + _racc_action_#{rule.ident}(*vals) + end + End + end + end + end + + end + +end # module Racc diff --git a/lib/racc/static.rb b/lib/racc/static.rb new file mode 100644 index 0000000000..bebbeb5aa6 --- /dev/null +++ b/lib/racc/static.rb @@ -0,0 +1,5 @@ +require 'racc' +require 'racc/parser' +require 'racc/grammarfileparser' +require 'racc/parserfilegenerator' +require 'racc/logfilegenerator' |