diff options
author | yui-knk <[email protected]> | 2023-08-04 19:16:38 +0900 |
---|---|---|
committer | Yuichiro Kaneko <[email protected]> | 2023-08-05 10:32:41 +0900 |
commit | f07ef1d54c3bbae62e2ddd7266932c294db91daf (patch) | |
tree | 5e38085e23ee02133e463c250b796943fd556702 | |
parent | e80752f9bbc5228dba3066cd95a81e2e496bd9d7 (diff) |
Lrama v0.5.3
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/8175
29 files changed, 774 insertions, 561 deletions
diff --git a/tool/lrama/LEGAL.md b/tool/lrama/LEGAL.md index b1a15b96b5..f708ae7a1c 100644 --- a/tool/lrama/LEGAL.md +++ b/tool/lrama/LEGAL.md @@ -5,22 +5,7 @@ mentioned below. ## GNU General Public License version 3 -These files are licensed under the GNU General Public License version 3. See these files for more information. +These files are licensed under the GNU General Public License version 3 or later. See these files for more information. * template/bison/yacc.c * template/bison/yacc.h - -## Same with Ruby - -These files are licensed same with Ruby. See https://github.com/ruby/ruby/blob/master/COPYING for more information. - -* spec/fixtures/integration/ruby_3_0_5/parse.tmp.y -* spec/fixtures/integration/ruby_3_0_5/y.tab.c -* spec/fixtures/integration/ruby_3_0_5/y.tab.h -* spec/fixtures/integration/ruby_3_1_0/parse.tmp.y -* spec/fixtures/integration/ruby_3_1_0/y.tab.c -* spec/fixtures/integration/ruby_3_1_0/y.tab.h -* spec/fixtures/integration/ruby_3_2_0/parse.tmp.y -* spec/fixtures/integration/ruby_3_2_0/y.tab.c -* spec/fixtures/integration/ruby_3_2_0/y.tab.h - diff --git a/tool/lrama/exe/lex_state b/tool/lrama/exe/lex_state new file mode 100755 index 0000000000..0acdeaadd6 --- /dev/null +++ b/tool/lrama/exe/lex_state @@ -0,0 +1,14 @@ +#!/usr/bin/env ruby + + +$LOAD_PATH << File.join(__dir__, "../lib") +require "lrama" + +grammar_file = ARGV.shift +y = File.read(grammar_file) +warning = Lrama::Warning.new +grammar = Lrama::Parser.new(y).parse +states = Lrama::States.new(grammar, warning) +states.compute + +Lrama::LexState.new(states).compute diff --git a/tool/lrama/exe/lrama b/tool/lrama/exe/lrama index 5c61e1a684..7988afe507 100755 --- a/tool/lrama/exe/lrama +++ b/tool/lrama/exe/lrama @@ -4,4 +4,4 @@ $LOAD_PATH << File.join(__dir__, "../lib") require "lrama" -Lrama::Command.new.run(ARGV.dup) +Lrama::Command.new(ARGV.dup).run diff --git a/tool/lrama/lib/lrama/command.rb b/tool/lrama/lib/lrama/command.rb index b1485b73c5..2d37639d69 100644 --- a/tool/lrama/lib/lrama/command.rb +++ b/tool/lrama/lib/lrama/command.rb @@ -2,94 +2,57 @@ require 'optparse' module Lrama class Command - def run(argv) - opt = OptionParser.new - - # opt.on('-h') {|v| p v } - opt.on('-V', '--version') {|v| puts Lrama::VERSION ; exit 0 } - - # Tuning the Parser - skeleton = "bison/yacc.c" - - opt.on('-S', '--skeleton=FILE') {|v| skeleton = v } - opt.on('-t') { } # Do nothing - - # Output Files: - header = false - header_file = nil - report = [] - report_file = nil - outfile = "y.tab.c" - - opt.on('-h', '--header=[FILE]') {|v| header = true; header_file = v } - opt.on('-d') { header = true } - opt.on('-r', '--report=THINGS') {|v| report = v.split(',') } - opt.on('--report-file=FILE') {|v| report_file = v } - opt.on('-v') { } # Do nothing - opt.on('-o', '--output=FILE') {|v| outfile = v } - - # Hidden - trace = [] - opt.on('--trace=THINGS') {|v| trace = v.split(',') } - - # Error Recovery - error_recovery = false - opt.on('-e') {|v| error_recovery = true } - - opt.parse!(argv) - - trace_opts = validate_trace(trace) - report_opts = validate_report(report) - - grammar_file = argv.shift - - if !report.empty? && report_file.nil? && grammar_file - report_file = File.dirname(grammar_file) + "/" + File.basename(grammar_file, ".*") + ".output" - end + def initialize(argv) + @argv = argv + + @version = nil + @skeleton = "bison/yacc.c" + @header = false + @header_file = nil + @report = [] + @report_file = nil + @outfile = "y.tab.c" + @trace = [] + @error_recovery = false + @grammar_file = nil + @report_file = nil + @trace_opts = nil + @report_opts = nil + end - if !header_file && header - case - when outfile - header_file = File.dirname(outfile) + "/" + File.basename(outfile, ".*") + ".h" - when grammar_file - header_file = File.dirname(grammar_file) + "/" + File.basename(grammar_file, ".*") + ".h" - end - end + def run + parse_option - if !grammar_file - abort "File should be specified\n" + if @version + puts Lrama::VERSION + exit 0 end - Report::Duration.enable if trace_opts[:time] + Report::Duration.enable if @trace_opts[:time] warning = Lrama::Warning.new - if grammar_file == '-' - grammar_file = argv.shift or abort "File name for STDIN should be specified\n" - y = STDIN.read - else - y = File.read(grammar_file) - end - grammar = Lrama::Parser.new(y).parse - states = Lrama::States.new(grammar, warning, trace_state: (trace_opts[:automaton] || trace_opts[:closure])) + grammar = Lrama::Parser.new(@y.read).parse + states = Lrama::States.new(grammar, warning, trace_state: (@trace_opts[:automaton] || @trace_opts[:closure])) states.compute context = Lrama::Context.new(states) - if report_file + if @report_file reporter = Lrama::StatesReporter.new(states) - File.open(report_file, "w+") do |f| - reporter.report(f, **report_opts) + File.open(@report_file, "w+") do |f| + reporter.report(f, **@report_opts) end end - File.open(outfile, "w+") do |f| + File.open(@outfile, "w+") do |f| Lrama::Output.new( out: f, - output_file_path: outfile, - template_name: skeleton, - grammar_file_path: grammar_file, - header_file_path: header_file, + output_file_path: @outfile, + template_name: @skeleton, + grammar_file_path: @grammar_file, + header_file_path: @header_file, context: context, grammar: grammar, + error_recovery: @error_recovery, ).render end @@ -144,5 +107,61 @@ module Lrama return h end + + def parse_option + opt = OptionParser.new + + # opt.on('-h') {|v| p v } + opt.on('-V', '--version') {|v| @version = true } + + # Tuning the Parser + opt.on('-S', '--skeleton=FILE') {|v| @skeleton = v } + opt.on('-t') { } # Do nothing + + # Output Files: + opt.on('-h', '--header=[FILE]') {|v| @header = true; @header_file = v } + opt.on('-d') { @header = true } + opt.on('-r', '--report=THINGS') {|v| @report = v.split(',') } + opt.on('--report-file=FILE') {|v| @report_file = v } + opt.on('-v') { } # Do nothing + opt.on('-o', '--output=FILE') {|v| @outfile = v } + + # Hidden + opt.on('--trace=THINGS') {|v| @trace = v.split(',') } + + # Error Recovery + opt.on('-e') {|v| @error_recovery = true } + + opt.parse!(@argv) + + @trace_opts = validate_trace(@trace) + @report_opts = validate_report(@report) + + @grammar_file = @argv.shift + + if !@grammar_file + abort "File should be specified\n" + end + + if @grammar_file == '-' + @grammar_file = @argv.shift or abort "File name for STDIN should be specified\n" + @y = STDIN + else + @y = File.open(@grammar_file, 'r') + end + + if [email protected]? && @report_file.nil? && @grammar_file + @report_file = File.dirname(@grammar_file) + "/" + File.basename(@grammar_file, ".*") + ".output" + end + + if !@header_file && @header + case + when @outfile + @header_file = File.dirname(@outfile) + "/" + File.basename(@outfile, ".*") + ".h" + when @grammar_file + @header_file = File.dirname(@grammar_file) + "/" + File.basename(@grammar_file, ".*") + ".h" + end + end + end end end diff --git a/tool/lrama/lib/lrama/context.rb b/tool/lrama/lib/lrama/context.rb index 9086470011..861c4e1137 100644 --- a/tool/lrama/lib/lrama/context.rb +++ b/tool/lrama/lib/lrama/context.rb @@ -1,4 +1,4 @@ -require "lrama/report" +require "lrama/report/duration" module Lrama # This is passed to a template @@ -89,6 +89,16 @@ module Lrama return a end + def yytranslate_inverted + a = Array.new(@states.symbols.count, @states.undef_symbol.token_id) + + @states.terms.each do |term| + a[term.number] = term.token_id + end + + return a + end + # Mapping from rule number to line number of the rule is defined. # Dummy rule is appended as the first element whose value is 0 # because 0 means error in yydefact. diff --git a/tool/lrama/lib/lrama/grammar.rb b/tool/lrama/lib/lrama/grammar.rb index a13c5807b2..a4f98ecc83 100644 --- a/tool/lrama/lib/lrama/grammar.rb +++ b/tool/lrama/lib/lrama/grammar.rb @@ -1,283 +1,15 @@ -require "forwardable" +require "lrama/grammar/code" +require "lrama/grammar/error_token" +require "lrama/grammar/precedence" +require "lrama/grammar/printer" +require "lrama/grammar/reference" +require "lrama/grammar/rule" +require "lrama/grammar/symbol" +require "lrama/grammar/union" require "lrama/lexer" module Lrama - Rule = Struct.new(:id, :lhs, :rhs, :code, :nullable, :precedence_sym, :lineno, keyword_init: true) do - # TODO: Change this to display_name - def to_s - l = lhs.id.s_value - r = rhs.empty? ? "ε" : rhs.map {|r| r.id.s_value }.join(", ") - - "#{l} -> #{r}" - end - - # Used by #user_actions - def as_comment - l = lhs.id.s_value - r = rhs.empty? ? "%empty" : rhs.map {|r| r.display_name }.join(" ") - - "#{l}: #{r}" - end - - def precedence - precedence_sym && precedence_sym.precedence - end - - def initial_rule? - id == 0 - end - - def translated_code - if code - code.translated_code - else - nil - end - end - end - - # Symbol is both of nterm and term - # `number` is both for nterm and term - # `token_id` is tokentype for term, internal sequence number for nterm - # - # TODO: Add validation for ASCII code range for Token::Char - Symbol = Struct.new(:id, :alias_name, :number, :tag, :term, :token_id, :nullable, :precedence, :printer, keyword_init: true) do - attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol - - def term? - term - end - - def nterm? - !term - end - - def eof_symbol? - !!@eof_symbol - end - - def error_symbol? - !!@error_symbol - end - - def undef_symbol? - !!@undef_symbol - end - - def accept_symbol? - !!@accept_symbol - end - - def display_name - if alias_name - alias_name - else - id.s_value - end - end - - # name for yysymbol_kind_t - # - # See: b4_symbol_kind_base - def enum_name - case - when accept_symbol? - name = "YYACCEPT" - when eof_symbol? - name = "YYEOF" - when term? && id.type == Token::Char - if alias_name - name = number.to_s + alias_name - else - name = number.to_s + id.s_value - end - when term? && id.type == Token::Ident - name = id.s_value - when nterm? && (id.s_value.include?("$") || id.s_value.include?("@")) - name = number.to_s + id.s_value - when nterm? - name = id.s_value - else - raise "Unexpected #{self}" - end - - "YYSYMBOL_" + name.gsub(/[^a-zA-Z_0-9]+/, "_") - end - - # comment for yysymbol_kind_t - def comment - case - when accept_symbol? - # YYSYMBOL_YYACCEPT - id.s_value - when eof_symbol? - # YYEOF - alias_name - when (term? && 0 < token_id && token_id < 128) - # YYSYMBOL_3_backslash_, YYSYMBOL_14_ - alias_name || id.s_value - when id.s_value.include?("$") || id.s_value.include?("@") - # YYSYMBOL_21_1 - id.s_value - else - # YYSYMBOL_keyword_class, YYSYMBOL_strings_1 - alias_name || id.s_value - end - end - end - Type = Struct.new(:id, :tag, keyword_init: true) - - Code = Struct.new(:type, :token_code, keyword_init: true) do - extend Forwardable - - def_delegators "token_code", :s_value, :line, :column, :references - - # $$, $n, @$, @n is translated to C code - def translated_code - case type - when :user_code - translated_user_code - when :initial_action - translated_initial_action_code - end - end - - # * ($1) error - # * ($$) *yyvaluep - # * (@1) error - # * (@$) *yylocationp - def translated_printer_code(tag) - t_code = s_value.dup - - references.reverse.each do |ref| - first_column = ref.first_column - last_column = ref.last_column - - case - when ref.value == "$" && ref.type == :dollar # $$ - # Omit "<>" - member = tag.s_value[1..-2] - str = "((*yyvaluep).#{member})" - when ref.value == "$" && ref.type == :at # @$ - str = "(*yylocationp)" - when ref.type == :dollar # $n - raise "$#{ref.value} can not be used in %printer." - when ref.type == :at # @n - raise "@#{ref.value} can not be used in %printer." - else - raise "Unexpected. #{self}, #{ref}" - end - - t_code[first_column..last_column] = str - end - - return t_code - end - - - private - - # * ($1) yyvsp[i] - # * ($$) yyval - # * (@1) yylsp[i] - # * (@$) yyloc - def translated_user_code - t_code = s_value.dup - - references.reverse.each do |ref| - first_column = ref.first_column - last_column = ref.last_column - - case - when ref.value == "$" && ref.type == :dollar # $$ - # Omit "<>" - member = ref.tag.s_value[1..-2] - str = "(yyval.#{member})" - when ref.value == "$" && ref.type == :at # @$ - str = "(yyloc)" - when ref.type == :dollar # $n - i = -ref.position_in_rhs + ref.value - # Omit "<>" - member = ref.tag.s_value[1..-2] - str = "(yyvsp[#{i}].#{member})" - when ref.type == :at # @n - i = -ref.position_in_rhs + ref.value - str = "(yylsp[#{i}])" - else - raise "Unexpected. #{self}, #{ref}" - end - - t_code[first_column..last_column] = str - end - - return t_code - end - - # * ($1) error - # * ($$) yylval - # * (@1) error - # * (@$) yylloc - def translated_initial_action_code - t_code = s_value.dup - - references.reverse.each do |ref| - first_column = ref.first_column - last_column = ref.last_column - - case - when ref.value == "$" && ref.type == :dollar # $$ - str = "yylval" - when ref.value == "$" && ref.type == :at # @$ - str = "yylloc" - when ref.type == :dollar # $n - raise "$#{ref.value} can not be used in initial_action." - when ref.type == :at # @n - raise "@#{ref.value} can not be used in initial_action." - else - raise "Unexpected. #{self}, #{ref}" - end - - t_code[first_column..last_column] = str - end - - return t_code - end - end - - # type: :dollar or :at - # ex_tag: "$<tag>1" (Optional) - Reference = Struct.new(:type, :value, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) do - def tag - if ex_tag - ex_tag - else - referring_symbol.tag - end - end - end - - Precedence = Struct.new(:type, :precedence, keyword_init: true) do - include Comparable - - def <=>(other) - self.precedence <=> other.precedence - end - end - - Printer = Struct.new(:ident_or_tags, :code, :lineno, keyword_init: true) do - def translated_code(member) - code.translated_printer_code(member) - end - end - - Union = Struct.new(:code, :lineno, keyword_init: true) do - def braces_less_code - # Remove braces - code.s_value[1..-2] - end - end - Token = Lrama::Lexer::Token # Grammar is the result of parsing an input grammar file @@ -287,7 +19,7 @@ module Lrama attr_reader :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol, :aux attr_accessor :union, :expect, - :printers, + :printers, :error_tokens, :lex_param, :parse_param, :initial_action, :symbols, :types, :rules, :_rules, @@ -295,6 +27,7 @@ module Lrama def initialize @printers = [] + @error_tokens = [] @symbols = [] @types = [] @_rules = [] @@ -314,6 +47,10 @@ module Lrama @printers << Printer.new(ident_or_tags: ident_or_tags, code: code, lineno: lineno) end + def add_error_token(ident_or_tags:, code:, lineno:) + @error_tokens << ErrorToken.new(ident_or_tags, code, lineno) + end + def add_term(id:, alias_name: nil, tag: nil, token_id: nil, replace: false) if token_id && (sym = @symbols.find {|s| s.token_id == token_id }) if replace @@ -419,12 +156,14 @@ module Lrama fill_sym_to_rules fill_nterm_type fill_symbol_printer + fill_symbol_error_token @symbols.sort_by!(&:number) end # TODO: More validation methods def validate! validate_symbol_number_uniqueness! + validate_no_declared_type_reference! end def compute_nullable @@ -845,6 +584,23 @@ module Lrama end end + def fill_symbol_error_token + @symbols.each do |sym| + @error_tokens.each do |error_token| + error_token.ident_or_tags.each do |ident_or_tag| + case ident_or_tag.type + when Token::Ident + sym.error_token = error_token if sym.id == ident_or_tag + when Token::Tag + sym.error_token = error_token if sym.tag == ident_or_tag + else + raise "Unknown token type. #{error_token}" + end + end + end + end + end + def validate_symbol_number_uniqueness! invalid = @symbols.group_by(&:number).select do |number, syms| syms.count > 1 @@ -854,5 +610,23 @@ module Lrama raise "Symbol number is duplicated. #{invalid}" end + + def validate_no_declared_type_reference! + errors = [] + + rules.each do |rule| + next unless rule.code + + rule.code.references.select do |ref| + ref.type == :dollar && !ref.tag + end.each do |ref| + errors << "$#{ref.value} of '#{rule.lhs.id.s_value}' has no declared type" + end + end + + return if errors.empty? + + raise errors.join("\n") + end end end diff --git a/tool/lrama/lib/lrama/grammar/code.rb b/tool/lrama/lib/lrama/grammar/code.rb new file mode 100644 index 0000000000..0f61ebb58e --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/code.rb @@ -0,0 +1,123 @@ +require "forwardable" + +module Lrama + class Grammar + class Code < Struct.new(:type, :token_code, keyword_init: true) + extend Forwardable + + def_delegators "token_code", :s_value, :line, :column, :references + + # $$, $n, @$, @n is translated to C code + def translated_code + case type + when :user_code + translated_user_code + when :initial_action + translated_initial_action_code + end + end + + # * ($1) error + # * ($$) *yyvaluep + # * (@1) error + # * (@$) *yylocationp + def translated_printer_code(tag) + t_code = s_value.dup + + references.reverse.each do |ref| + first_column = ref.first_column + last_column = ref.last_column + + case + when ref.value == "$" && ref.type == :dollar # $$ + # Omit "<>" + member = tag.s_value[1..-2] + str = "((*yyvaluep).#{member})" + when ref.value == "$" && ref.type == :at # @$ + str = "(*yylocationp)" + when ref.type == :dollar # $n + raise "$#{ref.value} can not be used in %printer." + when ref.type == :at # @n + raise "@#{ref.value} can not be used in %printer." + else + raise "Unexpected. #{self}, #{ref}" + end + + t_code[first_column..last_column] = str + end + + return t_code + end + alias :translated_error_token_code :translated_printer_code + + + private + + # * ($1) yyvsp[i] + # * ($$) yyval + # * (@1) yylsp[i] + # * (@$) yyloc + def translated_user_code + t_code = s_value.dup + + references.reverse.each do |ref| + first_column = ref.first_column + last_column = ref.last_column + + case + when ref.value == "$" && ref.type == :dollar # $$ + # Omit "<>" + member = ref.tag.s_value[1..-2] + str = "(yyval.#{member})" + when ref.value == "$" && ref.type == :at # @$ + str = "(yyloc)" + when ref.type == :dollar # $n + i = -ref.position_in_rhs + ref.value + # Omit "<>" + member = ref.tag.s_value[1..-2] + str = "(yyvsp[#{i}].#{member})" + when ref.type == :at # @n + i = -ref.position_in_rhs + ref.value + str = "(yylsp[#{i}])" + else + raise "Unexpected. #{self}, #{ref}" + end + + t_code[first_column..last_column] = str + end + + return t_code + end + + # * ($1) error + # * ($$) yylval + # * (@1) error + # * (@$) yylloc + def translated_initial_action_code + t_code = s_value.dup + + references.reverse.each do |ref| + first_column = ref.first_column + last_column = ref.last_column + + case + when ref.value == "$" && ref.type == :dollar # $$ + str = "yylval" + when ref.value == "$" && ref.type == :at # @$ + str = "yylloc" + when ref.type == :dollar # $n + raise "$#{ref.value} can not be used in initial_action." + when ref.type == :at # @n + raise "@#{ref.value} can not be used in initial_action." + else + raise "Unexpected. #{self}, #{ref}" + end + + t_code[first_column..last_column] = str + end + + return t_code + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/error_token.rb b/tool/lrama/lib/lrama/grammar/error_token.rb new file mode 100644 index 0000000000..de82523577 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/error_token.rb @@ -0,0 +1,9 @@ +module Lrama + class Grammar + class ErrorToken < Struct.new(:ident_or_tags, :code, :lineno, keyword_init: true) + def translated_code(member) + code.translated_error_token_code(member) + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/precedence.rb b/tool/lrama/lib/lrama/grammar/precedence.rb new file mode 100644 index 0000000000..fed739b3c0 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/precedence.rb @@ -0,0 +1,11 @@ +module Lrama + class Grammar + class Precedence < Struct.new(:type, :precedence, keyword_init: true) + include Comparable + + def <=>(other) + self.precedence <=> other.precedence + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/printer.rb b/tool/lrama/lib/lrama/grammar/printer.rb new file mode 100644 index 0000000000..da49463485 --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/printer.rb @@ -0,0 +1,9 @@ +module Lrama + class Grammar + class Printer < Struct.new(:ident_or_tags, :code, :lineno, keyword_init: true) + def translated_code(member) + code.translated_printer_code(member) + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/reference.rb b/tool/lrama/lib/lrama/grammar/reference.rb new file mode 100644 index 0000000000..bc178e104d --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/reference.rb @@ -0,0 +1,22 @@ +# type: :dollar or :at +# ex_tag: "$<tag>1" (Optional) + +module Lrama + class Grammar + class Reference < Struct.new(:type, :value, :ex_tag, :first_column, :last_column, :referring_symbol, :position_in_rhs, keyword_init: true) + def tag + if ex_tag + ex_tag + else + # FIXME: Remove this class check + if referring_symbol.is_a?(Symbol) + referring_symbol.tag + else + # Lrama::Lexer::Token (User_code) case + nil + end + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/rule.rb b/tool/lrama/lib/lrama/grammar/rule.rb new file mode 100644 index 0000000000..7ed5b3312c --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/rule.rb @@ -0,0 +1,33 @@ +module Lrama + class Grammar + class Rule < Struct.new(:id, :lhs, :rhs, :code, :nullable, :precedence_sym, :lineno, keyword_init: true) + # TODO: Change this to display_name + def to_s + l = lhs.id.s_value + r = rhs.empty? ? "ε" : rhs.map {|r| r.id.s_value }.join(", ") + + "#{l} -> #{r}" + end + + # Used by #user_actions + def as_comment + l = lhs.id.s_value + r = rhs.empty? ? "%empty" : rhs.map(&:display_name).join(" ") + + "#{l}: #{r}" + end + + def precedence + precedence_sym&.precedence + end + + def initial_rule? + id == 0 + end + + def translated_code + code&.translated_code + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/symbol.rb b/tool/lrama/lib/lrama/grammar/symbol.rb new file mode 100644 index 0000000000..28916eb54f --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/symbol.rb @@ -0,0 +1,94 @@ +# Symbol is both of nterm and term +# `number` is both for nterm and term +# `token_id` is tokentype for term, internal sequence number for nterm +# +# TODO: Add validation for ASCII code range for Token::Char + +module Lrama + class Grammar + class Symbol < Struct.new(:id, :alias_name, :number, :tag, :term, :token_id, :nullable, :precedence, :printer, :error_token, keyword_init: true) + attr_writer :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol + + def term? + term + end + + def nterm? + !term + end + + def eof_symbol? + !!@eof_symbol + end + + def error_symbol? + !!@error_symbol + end + + def undef_symbol? + !!@undef_symbol + end + + def accept_symbol? + !!@accept_symbol + end + + def display_name + if alias_name + alias_name + else + id.s_value + end + end + + # name for yysymbol_kind_t + # + # See: b4_symbol_kind_base + def enum_name + case + when accept_symbol? + name = "YYACCEPT" + when eof_symbol? + name = "YYEOF" + when term? && id.type == Token::Char + if alias_name + name = number.to_s + alias_name + else + name = number.to_s + id.s_value + end + when term? && id.type == Token::Ident + name = id.s_value + when nterm? && (id.s_value.include?("$") || id.s_value.include?("@")) + name = number.to_s + id.s_value + when nterm? + name = id.s_value + else + raise "Unexpected #{self}" + end + + "YYSYMBOL_" + name.gsub(/[^a-zA-Z_0-9]+/, "_") + end + + # comment for yysymbol_kind_t + def comment + case + when accept_symbol? + # YYSYMBOL_YYACCEPT + id.s_value + when eof_symbol? + # YYEOF + alias_name + when (term? && 0 < token_id && token_id < 128) + # YYSYMBOL_3_backslash_, YYSYMBOL_14_ + alias_name || id.s_value + when id.s_value.include?("$") || id.s_value.include?("@") + # YYSYMBOL_21_1 + id.s_value + else + # YYSYMBOL_keyword_class, YYSYMBOL_strings_1 + alias_name || id.s_value + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/grammar/union.rb b/tool/lrama/lib/lrama/grammar/union.rb new file mode 100644 index 0000000000..978cebf22d --- /dev/null +++ b/tool/lrama/lib/lrama/grammar/union.rb @@ -0,0 +1,10 @@ +module Lrama + class Grammar + class Union < Struct.new(:code, :lineno, keyword_init: true) + def braces_less_code + # Remove braces + code.s_value[1..-2] + end + end + end +end diff --git a/tool/lrama/lib/lrama/irb.rb b/tool/lrama/lib/lrama/irb.rb new file mode 100644 index 0000000000..6f093cdedd --- /dev/null +++ b/tool/lrama/lib/lrama/irb.rb @@ -0,0 +1,8 @@ +require "lrama/irb/load_grammar" + +# You need to explicitly require this file to add irb commands +# See: "lib/irb/extend-command.rb" in ruby/irb +module Lrama + module IRB + end +end diff --git a/tool/lrama/lib/lrama/irb/load_grammar.rb b/tool/lrama/lib/lrama/irb/load_grammar.rb new file mode 100644 index 0000000000..a4285eaa00 --- /dev/null +++ b/tool/lrama/lib/lrama/irb/load_grammar.rb @@ -0,0 +1,9 @@ +require 'irb/cmd/nop' + +module Lrama + module IRB + class LoadGrammar < IRB::ExtendCommand::Nop + + end + end +end diff --git a/tool/lrama/lib/lrama/json_reporter.rb b/tool/lrama/lib/lrama/json_reporter.rb new file mode 100644 index 0000000000..f6cd2532bd --- /dev/null +++ b/tool/lrama/lib/lrama/json_reporter.rb @@ -0,0 +1,28 @@ +require 'json' + +module Lrama + class JsonReporter + include Lrama::Report::Duration + + def initialize(states) + @states = states + end + + def report(io, **options) + report_duration(:report) do + _report(io, **options) + end + end + + private + + def _report(io, grammar: false, states: false, itemsets: false, lookaheads: false, solved: false, verbose: false) + # TODO: Unused terms + # TODO: Unused rules + + report_conflicts(io) + report_grammar(io) if grammar + report_states(io, itemsets, lookaheads, solved, verbose) + end + end +end diff --git a/tool/lrama/lib/lrama/lexer.rb b/tool/lrama/lib/lrama/lexer.rb index fd79a46bfc..c1b5c8fe4e 100644 --- a/tool/lrama/lib/lrama/lexer.rb +++ b/tool/lrama/lib/lrama/lexer.rb @@ -1,84 +1,12 @@ require "strscan" -require "lrama/report" +require "lrama/report/duration" +require "lrama/lexer/token" module Lrama # Lexer for parse.y class Lexer include Lrama::Report::Duration - # s_value is semantic value - Token = Struct.new(:type, :s_value, :alias, keyword_init: true) do - Type = Struct.new(:id, :name, keyword_init: true) - - attr_accessor :line, :column, :referred - # For User_code - attr_accessor :references - - def to_s - "#{super} line: #{line}, column: #{column}" - end - - def referred_by?(string) - [self.s_value, self.alias].include?(string) - end - - def ==(other) - self.class == other.class && self.type == other.type && self.s_value == other.s_value - end - - def numberize_references(lhs, rhs) - self.references.map! {|ref| - ref_name = ref[1] - if ref_name.is_a?(String) && ref_name != '$' - value = - if lhs.referred_by?(ref_name) - '$' - else - rhs.find_index {|token| token.referred_by?(ref_name) } + 1 - end - [ref[0], value, ref[2], ref[3], ref[4]] - else - ref - end - } - end - - @i = 0 - @types = [] - - def self.define_type(name) - type = Type.new(id: @i, name: name.to_s) - const_set(name, type) - @types << type - @i += 1 - end - - # Token types - define_type(:P_expect) # %expect - define_type(:P_define) # %define - define_type(:P_printer) # %printer - define_type(:P_lex_param) # %lex-param - define_type(:P_parse_param) # %parse-param - define_type(:P_initial_action) # %initial-action - define_type(:P_union) # %union - define_type(:P_token) # %token - define_type(:P_type) # %type - define_type(:P_nonassoc) # %nonassoc - define_type(:P_left) # %left - define_type(:P_right) # %right - define_type(:P_prec) # %prec - define_type(:User_code) # { ... } - define_type(:Tag) # <int> - define_type(:Number) # 0 - define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there) - define_type(:Ident) # api.pure, tNUMBER - define_type(:Named_Ref) # [foo] - define_type(:Semicolon) # ; - define_type(:Bar) # | - define_type(:String) # "str" - define_type(:Char) # '+' - end - # States # # See: https://www.gnu.org/software/bison/manual/html_node/Grammar-Outline.html @@ -207,6 +135,8 @@ module Lrama tokens << create_token(Token::P_define, ss[0], line, ss.pos - column) when ss.scan(/%printer/) tokens << create_token(Token::P_printer, ss[0], line, ss.pos - column) + when ss.scan(/%error-token/) + tokens << create_token(Token::P_error_token, ss[0], line, ss.pos - column) when ss.scan(/%lex-param/) tokens << create_token(Token::P_lex_param, ss[0], line, ss.pos - column) when ss.scan(/%parse-param/) diff --git a/tool/lrama/lib/lrama/lexer/token.rb b/tool/lrama/lib/lrama/lexer/token.rb new file mode 100644 index 0000000000..29ce48b2fa --- /dev/null +++ b/tool/lrama/lib/lrama/lexer/token.rb @@ -0,0 +1,76 @@ +module Lrama + class Lexer + class Token < Struct.new(:type, :s_value, :alias, keyword_init: true) + Type = Struct.new(:id, :name, keyword_init: true) + + attr_accessor :line, :column, :referred + # For User_code + attr_accessor :references + + def to_s + "#{super} line: #{line}, column: #{column}" + end + + def referred_by?(string) + [self.s_value, self.alias].include?(string) + end + + def ==(other) + self.class == other.class && self.type == other.type && self.s_value == other.s_value + end + + def numberize_references(lhs, rhs) + self.references.map! {|ref| + ref_name = ref[1] + if ref_name.is_a?(::String) && ref_name != '$' + value = + if lhs.referred_by?(ref_name) + '$' + else + rhs.find_index {|token| token.referred_by?(ref_name) } + 1 + end + [ref[0], value, ref[2], ref[3], ref[4]] + else + ref + end + } + end + + @i = 0 + @types = [] + + def self.define_type(name) + type = Type.new(id: @i, name: name.to_s) + const_set(name, type) + @types << type + @i += 1 + end + + # Token types + define_type(:P_expect) # %expect + define_type(:P_define) # %define + define_type(:P_printer) # %printer + define_type(:P_error_token) # %error-token + define_type(:P_lex_param) # %lex-param + define_type(:P_parse_param) # %parse-param + define_type(:P_initial_action) # %initial-action + define_type(:P_union) # %union + define_type(:P_token) # %token + define_type(:P_type) # %type + define_type(:P_nonassoc) # %nonassoc + define_type(:P_left) # %left + define_type(:P_right) # %right + define_type(:P_prec) # %prec + define_type(:User_code) # { ... } + define_type(:Tag) # <int> + define_type(:Number) # 0 + define_type(:Ident_Colon) # k_if:, k_if : (spaces can be there) + define_type(:Ident) # api.pure, tNUMBER + define_type(:Named_Ref) # [foo] + define_type(:Semicolon) # ; + define_type(:Bar) # | + define_type(:String) # "str" + define_type(:Char) # '+' + end + end +end diff --git a/tool/lrama/lib/lrama/output.rb b/tool/lrama/lib/lrama/output.rb index 696aa79feb..dd71814a11 100644 --- a/tool/lrama/lib/lrama/output.rb +++ b/tool/lrama/lib/lrama/output.rb @@ -1,20 +1,24 @@ require "erb" require "forwardable" -require "lrama/report" +require "lrama/report/duration" module Lrama class Output extend Forwardable include Report::Duration - attr_reader :grammar_file_path, :context, :grammar + attr_reader :grammar_file_path, :context, :grammar, :error_recovery def_delegators "@context", :yyfinal, :yylast, :yyntokens, :yynnts, :yynrules, :yynstates, :yymaxutok, :yypact_ninf, :yytable_ninf def_delegators "@grammar", :eof_symbol, :error_symbol, :undef_symbol, :accept_symbol - def initialize(out:, output_file_path:, template_name:, grammar_file_path:, header_out: nil, header_file_path: nil, context:, grammar:) + def initialize( + out:, output_file_path:, template_name:, grammar_file_path:, + header_out: nil, header_file_path: nil, + context:, grammar:, error_recovery: false + ) @out = out @output_file_path = output_file_path @template_name = template_name @@ -23,6 +27,7 @@ module Lrama @header_file_path = header_file_path @context = context @grammar = grammar + @error_recovery = error_recovery end if ERB.instance_method(:initialize).parameters.last.first == :key @@ -98,6 +103,10 @@ module Lrama int_array_to_string(@context.yytranslate) end + def yytranslate_inverted + int_array_to_string(@context.yytranslate_inverted) + end + def yyrline int_array_to_string(@context.yyrline) end @@ -155,6 +164,25 @@ module Lrama STR end + def symbol_actions_for_error_token + str = "" + + @grammar.symbols.each do |sym| + next unless sym.error_token + + str << <<-STR + case #{sym.enum_name}: /* #{sym.comment} */ +#line #{sym.error_token.lineno} "#{@grammar_file_path}" + #{sym.error_token.translated_code(sym.tag)} +#line [@oline@] [@ofile@] + break; + + STR + end + + str + end + # b4_user_actions def user_actions str = "" diff --git a/tool/lrama/lib/lrama/parser.rb b/tool/lrama/lib/lrama/parser.rb index 7d83e45dde..4faa402d1b 100644 --- a/tool/lrama/lib/lrama/parser.rb +++ b/tool/lrama/lib/lrama/parser.rb @@ -1,4 +1,4 @@ -require "lrama/report" +require "lrama/report/duration" require "lrama/parser/token_scanner" module Lrama @@ -59,6 +59,13 @@ module Lrama code = grammar.build_code(:printer, code) ident_or_tags = ts.consume_multi(T::Ident, T::Tag) grammar.add_printer(ident_or_tags: ident_or_tags, code: code, lineno: lineno) + when T::P_error_token + lineno = ts.current_token.line + ts.next + code = ts.consume!(T::User_code) + code = grammar.build_code(:printer, code) + ident_or_tags = ts.consume_multi(T::Ident, T::Tag) + grammar.add_error_token(ident_or_tags: ident_or_tags, code: code, lineno: lineno) when T::P_lex_param ts.next code = ts.consume!(T::User_code) diff --git a/tool/lrama/lib/lrama/report.rb b/tool/lrama/lib/lrama/report.rb index 7016a45171..650ac09d52 100644 --- a/tool/lrama/lib/lrama/report.rb +++ b/tool/lrama/lib/lrama/report.rb @@ -1,47 +1,2 @@ -module Lrama - class Report - module Profile - # 1. Wrap target method with Profile.report_profile like below: - # - # Lrama::Report::Profile.report_profile { method } - # - # 2. Run lrama command, for example - # - # $ ./exe/lrama --trace=time spec/fixtures/integration/ruby_3_2_0/parse.tmp.y - # - # 3. Generate html file - # - # $ stackprof --d3-flamegraph tmp/stackprof-cpu-myapp.dump > tmp/flamegraph.html - # - def self.report_profile - require "stackprof" - - StackProf.run(mode: :cpu, raw: true, out: 'tmp/stackprof-cpu-myapp.dump') do - yield - end - end - end - - module Duration - def self.enable - @_report_duration_enabled = true - end - - def self.enabled? - !!@_report_duration_enabled - end - - def report_duration(method_name) - time1 = Time.now.to_f - result = yield - time2 = Time.now.to_f - - if Duration.enabled? - puts sprintf("%s %10.5f s", method_name, time2 - time1) - end - - return result - end - end - end -end +require 'lrama/report/duration' +require 'lrama/report/profile' diff --git a/tool/lrama/lib/lrama/report/duration.rb b/tool/lrama/lib/lrama/report/duration.rb new file mode 100644 index 0000000000..7afe284f1a --- /dev/null +++ b/tool/lrama/lib/lrama/report/duration.rb @@ -0,0 +1,25 @@ +module Lrama + class Report + module Duration + def self.enable + @_report_duration_enabled = true + end + + def self.enabled? + !!@_report_duration_enabled + end + + def report_duration(method_name) + time1 = Time.now.to_f + result = yield + time2 = Time.now.to_f + + if Duration.enabled? + puts sprintf("%s %10.5f s", method_name, time2 - time1) + end + + return result + end + end + end +end diff --git a/tool/lrama/lib/lrama/report/profile.rb b/tool/lrama/lib/lrama/report/profile.rb new file mode 100644 index 0000000000..8265d94c2f --- /dev/null +++ b/tool/lrama/lib/lrama/report/profile.rb @@ -0,0 +1,25 @@ +module Lrama + class Report + module Profile + # 1. Wrap target method with Profile.report_profile like below: + # + # Lrama::Report::Profile.report_profile { method } + # + # 2. Run lrama command, for example + # + # $ ./exe/lrama --trace=time spec/fixtures/integration/ruby_3_2_0/parse.tmp.y + # + # 3. Generate html file + # + # $ stackprof --d3-flamegraph tmp/stackprof-cpu-myapp.dump > tmp/flamegraph.html + # + def self.report_profile + require "stackprof" + + StackProf.run(mode: :cpu, raw: true, out: 'tmp/stackprof-cpu-myapp.dump') do + yield + end + end + end + end +end diff --git a/tool/lrama/lib/lrama/state.rb b/tool/lrama/lib/lrama/state.rb index 65ca3bcb46..b868035e1a 100644 --- a/tool/lrama/lib/lrama/state.rb +++ b/tool/lrama/lib/lrama/state.rb @@ -1,34 +1,9 @@ require "lrama/state/reduce" require "lrama/state/shift" +require "lrama/state/resolved_conflict" module Lrama class State - # * symbol: A symbol under discussion - # * reduce: A reduce under discussion - # * which: For which a conflict is resolved. :shift, :reduce or :error (for nonassociative) - ResolvedConflict = Struct.new(:symbol, :reduce, :which, :same_prec, keyword_init: true) do - def report_message - s = symbol.display_name - r = reduce.rule.precedence_sym.display_name - case - when which == :shift && same_prec - msg = "resolved as #{which} (%right #{s})" - when which == :shift - msg = "resolved as #{which} (#{r} < #{s})" - when which == :reduce && same_prec - msg = "resolved as #{which} (%left #{s})" - when which == :reduce - msg = "resolved as #{which} (#{s} < #{r})" - when which == :error - msg = "resolved as an #{which} (%nonassoc #{s})" - else - raise "Unknown direction. #{self}" - end - - "Conflict between rule #{reduce.rule.id} and token #{s} #{msg}." - end - end - Conflict = Struct.new(:symbols, :reduce, :type, keyword_init: true) attr_reader :id, :accessing_symbol, :kernels, :conflicts, :resolved_conflicts, @@ -96,7 +71,7 @@ module Lrama reduce.look_ahead = look_ahead end - # Returns array of [nterm, next_state] + # Returns array of [Shift, next_state] def nterm_transitions return @nterm_transitions if @nterm_transitions @@ -111,7 +86,7 @@ module Lrama @nterm_transitions end - # Returns array of [term, next_state] + # Returns array of [Shift, next_state] def term_transitions return @term_transitions if @term_transitions diff --git a/tool/lrama/lib/lrama/state/resolved_conflict.rb b/tool/lrama/lib/lrama/state/resolved_conflict.rb new file mode 100644 index 0000000000..02ea892147 --- /dev/null +++ b/tool/lrama/lib/lrama/state/resolved_conflict.rb @@ -0,0 +1,29 @@ +module Lrama + class State + # * symbol: A symbol under discussion + # * reduce: A reduce under discussion + # * which: For which a conflict is resolved. :shift, :reduce or :error (for nonassociative) + class ResolvedConflict < Struct.new(:symbol, :reduce, :which, :same_prec, keyword_init: true) + def report_message + s = symbol.display_name + r = reduce.rule.precedence_sym.display_name + case + when which == :shift && same_prec + msg = "resolved as #{which} (%right #{s})" + when which == :shift + msg = "resolved as #{which} (#{r} < #{s})" + when which == :reduce && same_prec + msg = "resolved as #{which} (%left #{s})" + when which == :reduce + msg = "resolved as #{which} (#{s} < #{r})" + when which == :error + msg = "resolved as an #{which} (%nonassoc #{s})" + else + raise "Unknown direction. #{self}" + end + + "Conflict between rule #{reduce.rule.id} and token #{s} #{msg}." + end + end + end +end diff --git a/tool/lrama/lib/lrama/states.rb b/tool/lrama/lib/lrama/states.rb index 64be781df6..cf26416a37 100644 --- a/tool/lrama/lib/lrama/states.rb +++ b/tool/lrama/lib/lrama/states.rb @@ -1,5 +1,6 @@ require "forwardable" -require "lrama/report" +require "lrama/report/duration" +require "lrama/states/item" module Lrama # States is passed to a template file @@ -11,46 +12,7 @@ module Lrama include Lrama::Report::Duration def_delegators "@grammar", :symbols, :terms, :nterms, :rules, - :accept_symbol, :eof_symbol, :find_symbol_by_s_value! - - # TODO: Validate position is not over rule rhs - Item = Struct.new(:rule, :position, keyword_init: true) do - # Optimization for States#setup_state - def hash - [rule.id, position].hash - end - - def rule_id - rule.id - end - - def next_sym - rule.rhs[position] - end - - def end_of_rule? - rule.rhs.count == position - end - - def new_by_next_position - Item.new(rule: rule, position: position + 1) - end - - def previous_sym - rule.rhs[position - 1] - end - - def display_name - r = rule.rhs.map(&:display_name).insert(position, "•").join(" ") - "#{r} (rule #{rule.id})" - end - - # Right after position - def display_rest - r = rule.rhs[position..-1].map(&:display_name).join(" ") - ". #{r} (rule #{rule.id})" - end - end + :accept_symbol, :eof_symbol, :undef_symbol, :find_symbol_by_s_value! attr_reader :states, :reads_relation, :includes_relation, :lookback_relation diff --git a/tool/lrama/lib/lrama/states/item.rb b/tool/lrama/lib/lrama/states/item.rb new file mode 100644 index 0000000000..5c3696cc7b --- /dev/null +++ b/tool/lrama/lib/lrama/states/item.rb @@ -0,0 +1,43 @@ +# TODO: Validate position is not over rule rhs + +module Lrama + class States + class Item < Struct.new(:rule, :position, keyword_init: true) + # Optimization for States#setup_state + def hash + [rule.id, position].hash + end + + def rule_id + rule.id + end + + def next_sym + rule.rhs[position] + end + + def end_of_rule? + rule.rhs.count == position + end + + def new_by_next_position + Item.new(rule: rule, position: position + 1) + end + + def previous_sym + rule.rhs[position - 1] + end + + def display_name + r = rule.rhs.map(&:display_name).insert(position, "•").join(" ") + "#{r} (rule #{rule.id})" + end + + # Right after position + def display_rest + r = rule.rhs[position..-1].map(&:display_name).join(" ") + ". #{r} (rule #{rule.id})" + end + end + end +end diff --git a/tool/lrama/lib/lrama/version.rb b/tool/lrama/lib/lrama/version.rb index 2adbfd70fb..54eea75bfd 100644 --- a/tool/lrama/lib/lrama/version.rb +++ b/tool/lrama/lib/lrama/version.rb @@ -1,3 +1,3 @@ module Lrama - VERSION = "0.5.2".freeze + VERSION = "0.5.3".freeze end |