diff options
author | yui-knk <[email protected]> | 2023-08-09 11:55:19 +0900 |
---|---|---|
committer | Yuichiro Kaneko <[email protected]> | 2023-08-16 19:02:44 +0900 |
commit | 0982c5fa00f6163d04b17229e72d128263b98d50 (patch) | |
tree | 4193885a1bbc556a59252de756124e8c6495c9f9 | |
parent | fe240b672b709e800f2b4fb996f04f2005329dab (diff) |
Resync Lrama v0.5.3
Previous sync included unrelated files and missed template file update.
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/8229
-rwxr-xr-x | tool/lrama/exe/lex_state | 14 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/irb.rb | 8 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/irb/load_grammar.rb | 9 | ||||
-rw-r--r-- | tool/lrama/lib/lrama/json_reporter.rb | 28 | ||||
-rw-r--r-- | tool/lrama/template/bison/yacc.c | 363 |
5 files changed, 363 insertions, 59 deletions
diff --git a/tool/lrama/exe/lex_state b/tool/lrama/exe/lex_state deleted file mode 100755 index 0acdeaadd6..0000000000 --- a/tool/lrama/exe/lex_state +++ /dev/null @@ -1,14 +0,0 @@ -#!/usr/bin/env ruby - - -$LOAD_PATH << File.join(__dir__, "../lib") -require "lrama" - -grammar_file = ARGV.shift -y = File.read(grammar_file) -warning = Lrama::Warning.new -grammar = Lrama::Parser.new(y).parse -states = Lrama::States.new(grammar, warning) -states.compute - -Lrama::LexState.new(states).compute diff --git a/tool/lrama/lib/lrama/irb.rb b/tool/lrama/lib/lrama/irb.rb deleted file mode 100644 index 6f093cdedd..0000000000 --- a/tool/lrama/lib/lrama/irb.rb +++ /dev/null @@ -1,8 +0,0 @@ -require "lrama/irb/load_grammar" - -# You need to explicitly require this file to add irb commands -# See: "lib/irb/extend-command.rb" in ruby/irb -module Lrama - module IRB - end -end diff --git a/tool/lrama/lib/lrama/irb/load_grammar.rb b/tool/lrama/lib/lrama/irb/load_grammar.rb deleted file mode 100644 index a4285eaa00..0000000000 --- a/tool/lrama/lib/lrama/irb/load_grammar.rb +++ /dev/null @@ -1,9 +0,0 @@ -require 'irb/cmd/nop' - -module Lrama - module IRB - class LoadGrammar < IRB::ExtendCommand::Nop - - end - end -end diff --git a/tool/lrama/lib/lrama/json_reporter.rb b/tool/lrama/lib/lrama/json_reporter.rb deleted file mode 100644 index f6cd2532bd..0000000000 --- a/tool/lrama/lib/lrama/json_reporter.rb +++ /dev/null @@ -1,28 +0,0 @@ -require 'json' - -module Lrama - class JsonReporter - include Lrama::Report::Duration - - def initialize(states) - @states = states - end - - def report(io, **options) - report_duration(:report) do - _report(io, **options) - end - end - - private - - def _report(io, grammar: false, states: false, itemsets: false, lookaheads: false, solved: false, verbose: false) - # TODO: Unused terms - # TODO: Unused rules - - report_conflicts(io) - report_grammar(io) if grammar - report_states(io, itemsets, lookaheads, solved, verbose) - end - end -end diff --git a/tool/lrama/template/bison/yacc.c b/tool/lrama/template/bison/yacc.c index 857afb26c0..61d23d506b 100644 --- a/tool/lrama/template/bison/yacc.c +++ b/tool/lrama/template/bison/yacc.c @@ -542,6 +542,13 @@ static const <%= output.int_type_for(output.context.yytranslate) %> yytranslate[ <%= output.yytranslate %> }; +<%- if output.error_recovery -%> +/* YYTRANSLATE_INVERTED[SYMBOL-NUM] -- Token number corresponding to SYMBOL-NUM */ +static const <%= output.int_type_for(output.context.yytranslate_inverted) %> yytranslate_inverted[] = +{ +<%= output.yytranslate_inverted %> +}; +<%- end -%> #if YYDEBUG /* YYRLINE[YYN] -- Source line where rule number YYN was defined. */ static const <%= output.int_type_for(output.context.yyrline) %> yyrline[] = @@ -1211,6 +1218,303 @@ yydestruct (const char *yymsg, +<%- if output.error_recovery -%> +#ifndef YYMAXREPAIR +# define YYMAXREPAIR 3 +#endif + +enum repair_type { + insert, + delete, + shift, +}; + +struct repair { + enum repair_type type; + yysymbol_kind_t term; +}; +typedef struct repair repair; + +struct repairs { + /* For debug */ + int id; + /* For breadth-first traversing */ + struct repairs *next; + YYPTRDIFF_T stack_length; + /* Bottom of states */ + yy_state_t *states; + /* Top of states */ + yy_state_t *state; + /* repair length */ + int repair_length; + /* */ + struct repairs *prev_repair; + struct repair repair; +}; +typedef struct repairs repairs; + +struct yy_term { + yysymbol_kind_t kind; + YYSTYPE value; + YYLTYPE location; +}; +typedef struct yy_term yy_term; + +struct repair_terms { + int id; + int length; + yy_term terms[]; +}; +typedef struct repair_terms repair_terms; + +static void +yy_error_token_initialize (yysymbol_kind_t yykind, YYSTYPE * const yyvaluep, YYLTYPE * const yylocationp<%= output.user_formals %>) +{ + YY_IGNORE_MAYBE_UNINITIALIZED_BEGIN +switch (yykind) + { +<%= output.symbol_actions_for_error_token -%> + default: + break; + } + YY_IGNORE_MAYBE_UNINITIALIZED_END +} + +static repair_terms * +yy_create_repair_terms(repairs *reps) +{ + repairs *r = reps; + repair_terms *rep_terms; + int count = 0; + + while (r->prev_repair) + { + count++; + r = r->prev_repair; + } + + rep_terms = (repair_terms *) malloc (sizeof (repair_terms) + sizeof (yy_term) * count); + rep_terms->id = reps->id; + rep_terms->length = count; + + r = reps; + while (r->prev_repair) + { + rep_terms->terms[count-1].kind = r->repair.term; + count--; + r = r->prev_repair; + } + + return rep_terms; +} + +static void +yy_print_repairs(repairs *reps) +{ + repairs *r = reps; + + fprintf (stderr, + "id: %d, repair_length: %d, repair_state: %d, prev_repair_id: %d\n", + reps->id, reps->repair_length, *reps->state, reps->prev_repair->id); + + while (r->prev_repair) + { + fprintf (stderr, "%s ", yysymbol_name (r->repair.term)); + r = r->prev_repair; + } + + fprintf (stderr, "\n"); +} + +static void +yy_print_repair_terms(repair_terms *rep_terms) +{ + for (int i = 0; i < rep_terms->length; i++) + fprintf (stderr, "%s ", yysymbol_name (rep_terms->terms[i].kind)); + + fprintf (stderr, "\n"); +} + +static void +yy_free_repairs(repairs *reps) +{ + while (reps) + { + repairs *r = reps; + reps = reps->next; + free (r->states); + free (r); + } +} + +static int +yy_process_repairs(repairs *reps, yysymbol_kind_t token) +{ + int yyn; + int yystate = *reps->state; + int yylen = 0; + yysymbol_kind_t yytoken = token; + + goto yyrecover_backup; + +yyrecover_newstate: + // TODO: check reps->stack_length + reps->state += 1; + *reps->state = (yy_state_t) yystate; + + +yyrecover_backup: + yyn = yypact[yystate]; + if (yypact_value_is_default (yyn)) + goto yyrecover_default; + + /* "Reading a token" */ + if (yytoken == YYSYMBOL_YYEMPTY) + return 1; + + yyn += yytoken; + if (yyn < 0 || YYLAST < yyn || yycheck[yyn] != yytoken) + goto yyrecover_default; + yyn = yytable[yyn]; + if (yyn <= 0) + { + if (yytable_value_is_error (yyn)) + goto yyrecover_errlab; + yyn = -yyn; + goto yyrecover_reduce; + } + + /* shift */ + yystate = yyn; + yytoken = YYSYMBOL_YYEMPTY; + goto yyrecover_newstate; + + +yyrecover_default: + yyn = yydefact[yystate]; + if (yyn == 0) + goto yyrecover_errlab; + goto yyrecover_reduce; + + +yyrecover_reduce: + yylen = yyr2[yyn]; + /* YYPOPSTACK */ + reps->state -= yylen; + yylen = 0; + + { + const int yylhs = yyr1[yyn] - YYNTOKENS; + const int yyi = yypgoto[yylhs] + *reps->state; + yystate = (0 <= yyi && yyi <= YYLAST && yycheck[yyi] == *reps->state + ? yytable[yyi] + : yydefgoto[yylhs]); + } + + goto yyrecover_newstate; + +yyrecover_errlab: + return 0; +} + +static repair_terms * +yyrecover(yy_state_t *yyss, yy_state_t *yyssp, int yychar) +{ + yysymbol_kind_t yytoken = YYTRANSLATE (yychar); + repair_terms *rep_terms = YY_NULLPTR; + int count = 0; + + repairs *head = (repairs *) malloc (sizeof (repairs)); + repairs *current = head; + repairs *tail = head; + YYPTRDIFF_T stack_length = yyssp - yyss + 1; + + head->id = count; + head->next = 0; + head->stack_length = stack_length; + head->states = (yy_state_t *) malloc (sizeof (yy_state_t) * (stack_length)); + head->state = head->states + (yyssp - yyss); + YYCOPY (head->states, yyss, stack_length); + head->repair_length = 0; + head->prev_repair = 0; + + stack_length = (stack_length * 2 > 100) ? (stack_length * 2) : 100; + count++; + + while (current) + { + int yystate = *current->state; + int yyn = yypact[yystate]; + /* See also: yypcontext_expected_tokens */ + if (!yypact_value_is_default (yyn)) + { + int yyxbegin = yyn < 0 ? -yyn : 0; + int yychecklim = YYLAST - yyn + 1; + int yyxend = yychecklim < YYNTOKENS ? yychecklim : YYNTOKENS; + int yyx; + for (yyx = yyxbegin; yyx < yyxend; ++yyx) + { + if (yyx != YYSYMBOL_YYerror) + { + if (current->repair_length + 1 > YYMAXREPAIR) + continue; + + repairs *new = (repairs *) malloc (sizeof (repairs)); + new->id = count; + new->next = 0; + new->stack_length = stack_length; + new->states = (yy_state_t *) malloc (sizeof (yy_state_t) * (stack_length)); + new->state = new->states + (current->state - current->states); + YYCOPY (new->states, current->states, current->state - current->states + 1); + new->repair_length = current->repair_length + 1; + new->prev_repair = current; + new->repair.type = insert; + new->repair.term = (yysymbol_kind_t) yyx; + + /* Process PDA assuming next token is yyx */ + if (! yy_process_repairs (new, yyx)) + { + free (new); + continue; + } + + tail->next = new; + tail = new; + count++; + + if (yyx == yytoken) + { + rep_terms = yy_create_repair_terms (current); + fprintf (stderr, "repair_terms found. id: %d, length: %d\n", rep_terms->id, rep_terms->length); + yy_print_repairs (current); + yy_print_repair_terms (rep_terms); + + goto done; + } + + fprintf (stderr, + "New repairs is enqueued. count: %d, yystate: %d, yyx: %d\n", + count, yystate, yyx); + yy_print_repairs (new); + } + } + } + + current = current->next; + } + +done: + + yy_free_repairs(head); + + if (!rep_terms) + { + fprintf (stderr, "repair_terms not found\n"); + } + + return rep_terms; +} +<%- end -%> @@ -1281,6 +1585,12 @@ YYLTYPE yylloc = yyloc_default; /* The locations where the error started and ended. */ YYLTYPE yyerror_range[3]; +<%- if output.error_recovery -%> + repair_terms *rep_terms = 0; + yy_term term_backup; + int rep_terms_index; + int yychar_backup; +<%- end -%> /* Buffer for error messages, and its allocated size. */ char yymsgbuf[128]; @@ -1415,6 +1725,36 @@ yybackup: /* Not known => get a lookahead token if don't already have one. */ +<%- if output.error_recovery -%> + if (yychar == YYEMPTY && rep_terms) + { + + if (rep_terms_index < rep_terms->length) + { + YYDPRINTF ((stderr, "An error recovery token is used\n")); + yy_term term = rep_terms->terms[rep_terms_index]; + yytoken = term.kind; + yylval = term.value; + yylloc = term.location; + yychar = yytranslate_inverted[yytoken]; + YY_SYMBOL_PRINT ("Next error recovery token is", yytoken, &yylval, &yylloc<%= output.user_args %>); + rep_terms_index++; + } + else + { + YYDPRINTF ((stderr, "Error recovery is completed\n")); + yytoken = term_backup.kind; + yylval = term_backup.value; + yylloc = term_backup.location; + yychar = yychar_backup; + YY_SYMBOL_PRINT ("Next token is", yytoken, &yylval, &yylloc<%= output.user_args %>); + + free (rep_terms); + rep_terms = 0; + yychar_backup = 0; + } + } +<%- end -%> /* YYCHAR is either empty, or end-of-input, or a valid lookahead. */ if (yychar == YYEMPTY) { @@ -1639,6 +1979,29 @@ yyerrorlab: | yyerrlab1 -- common code for both syntax error and YYERROR. | `-------------------------------------------------------------*/ yyerrlab1: +<%- if output.error_recovery -%> + { + rep_terms = yyrecover (yyss, yyssp, yychar); + if (rep_terms) + { + for (int i = 0; i < rep_terms->length; i++) + { + yy_term *term = &rep_terms->terms[i]; + yy_error_token_initialize (term->kind, &term->value, &term->location<%= output.user_args %>); + } + + yychar_backup = yychar; + /* Can be packed into (the tail of) rep_terms? */ + term_backup.kind = yytoken; + term_backup.value = yylval; + term_backup.location = yylloc; + rep_terms_index = 0; + yychar = YYEMPTY; + + goto yybackup; + } + } +<%- end -%> yyerrstatus = 3; /* Each real token shifted decrements this. */ /* Pop stack until we find a state that shifts the error token. */ |