summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authoryui-knk <[email protected]>2024-04-21 09:54:23 +0900
committerYuichiro Kaneko <[email protected]>2024-04-23 07:20:22 +0900
commit2992e1074adf86ed6c06ba1750648a35d877001a (patch)
tree8d21e90b00fed4ef58465c5008d92a15f276d3c6
parented9834efbdcac5bc54df673703095bdecc797c7d (diff)
Refactor parser compile functions
Refactor parser compile functions to reduce the dependence on ruby functions. This commit includes these changes 1. Refactor `gets`, `input` and `gets_` of `parser_params` Parser needs two different data structure to get next line, function (`gets`) and input data (`input`). However `gets_` is used for both function (`call`) and input data (`ptr`). `call` is used for managing general callback function when `rb_ruby_parser_compile_generic` is used. `ptr` is used for managing the current pointer on String when `parser_compile_string` is used. This commit changes parser to used only `gets` and `input` then removes `gets_`. 2. Move parser_compile functions and `gets` functions from parse.y to ruby_parser.c This change reduces the dependence on ruby functions from parser. 3. Change ruby_parser and ripper to take care of `VALUE input` GC mark Move the responsibility of calling `rb_gc_mark` for `VALUE input` from parser to ruby_parser and ripper. `input` is arbitrary data pointer from the viewpoint of parser. 4. Introduce rb_parser_compile_array function Caller of `rb_parser_compile_generic` needs to take care about GC because ruby_parser doesn’t know about the detail of `lex_gets` and `input`. Introduce `rb_parser_compile_array` to reduce the complexity of ast.c.
-rw-r--r--ast.c15
-rw-r--r--common.mk4
-rw-r--r--ext/ripper/ripper_init.c.tmpl59
-rw-r--r--internal/parse.h8
-rw-r--r--internal/ruby_parser.h10
-rw-r--r--parse.y93
-rw-r--r--ruby_parser.c173
-rw-r--r--rubyparser.h3
8 files changed, 243 insertions, 122 deletions
diff --git a/ast.c b/ast.c
index a4c57b898b..297f4983e9 100644
--- a/ast.c
+++ b/ast.c
@@ -129,19 +129,6 @@ rb_ast_parse_file(VALUE path, VALUE keep_script_lines, VALUE error_tolerant, VAL
}
static VALUE
-lex_array(VALUE array, int index)
-{
- VALUE str = rb_ary_entry(array, index);
- if (!NIL_P(str)) {
- StringValue(str);
- if (!rb_enc_asciicompat(rb_enc_get(str))) {
- rb_raise(rb_eArgError, "invalid source encoding");
- }
- }
- return str;
-}
-
-static VALUE
rb_ast_parse_array(VALUE array, VALUE keep_script_lines, VALUE error_tolerant, VALUE keep_tokens)
{
rb_ast_t *ast = 0;
@@ -151,7 +138,7 @@ rb_ast_parse_array(VALUE array, VALUE keep_script_lines, VALUE error_tolerant, V
if (RTEST(keep_script_lines)) rb_parser_set_script_lines(vparser);
if (RTEST(error_tolerant)) rb_parser_error_tolerant(vparser);
if (RTEST(keep_tokens)) rb_parser_keep_tokens(vparser);
- ast = rb_parser_compile_generic(vparser, lex_array, Qnil, array, 1);
+ ast = rb_parser_compile_array(vparser, Qnil, array, 1);
return ast_parse_done(ast);
}
diff --git a/common.mk b/common.mk
index 0239b81b4d..ab40172d59 100644
--- a/common.mk
+++ b/common.mk
@@ -3316,6 +3316,7 @@ compile.$(OBJEXT): $(top_srcdir)/internal/imemo.h
compile.$(OBJEXT): $(top_srcdir)/internal/io.h
compile.$(OBJEXT): $(top_srcdir)/internal/numeric.h
compile.$(OBJEXT): $(top_srcdir)/internal/object.h
+compile.$(OBJEXT): $(top_srcdir)/internal/parse.h
compile.$(OBJEXT): $(top_srcdir)/internal/rational.h
compile.$(OBJEXT): $(top_srcdir)/internal/re.h
compile.$(OBJEXT): $(top_srcdir)/internal/ruby_parser.h
@@ -7494,6 +7495,7 @@ goruby.$(OBJEXT): $(top_srcdir)/internal/fixnum.h
goruby.$(OBJEXT): $(top_srcdir)/internal/gc.h
goruby.$(OBJEXT): $(top_srcdir)/internal/imemo.h
goruby.$(OBJEXT): $(top_srcdir)/internal/numeric.h
+goruby.$(OBJEXT): $(top_srcdir)/internal/parse.h
goruby.$(OBJEXT): $(top_srcdir)/internal/rational.h
goruby.$(OBJEXT): $(top_srcdir)/internal/ruby_parser.h
goruby.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h
@@ -10361,6 +10363,7 @@ miniinit.$(OBJEXT): $(top_srcdir)/internal/fixnum.h
miniinit.$(OBJEXT): $(top_srcdir)/internal/gc.h
miniinit.$(OBJEXT): $(top_srcdir)/internal/imemo.h
miniinit.$(OBJEXT): $(top_srcdir)/internal/numeric.h
+miniinit.$(OBJEXT): $(top_srcdir)/internal/parse.h
miniinit.$(OBJEXT): $(top_srcdir)/internal/rational.h
miniinit.$(OBJEXT): $(top_srcdir)/internal/ruby_parser.h
miniinit.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h
@@ -10820,6 +10823,7 @@ node_dump.$(OBJEXT): $(top_srcdir)/internal/gc.h
node_dump.$(OBJEXT): $(top_srcdir)/internal/hash.h
node_dump.$(OBJEXT): $(top_srcdir)/internal/imemo.h
node_dump.$(OBJEXT): $(top_srcdir)/internal/numeric.h
+node_dump.$(OBJEXT): $(top_srcdir)/internal/parse.h
node_dump.$(OBJEXT): $(top_srcdir)/internal/rational.h
node_dump.$(OBJEXT): $(top_srcdir)/internal/ruby_parser.h
node_dump.$(OBJEXT): $(top_srcdir)/internal/sanitizers.h
diff --git a/ext/ripper/ripper_init.c.tmpl b/ext/ripper/ripper_init.c.tmpl
index c9d381da5b..965c71d668 100644
--- a/ext/ripper/ripper_init.c.tmpl
+++ b/ext/ripper/ripper_init.c.tmpl
@@ -17,15 +17,40 @@
ID id_warn, id_warning, id_gets, id_assoc;
+enum lex_type {
+ lex_type_str,
+ lex_type_io,
+ lex_type_generic,
+};
+
struct ripper {
rb_parser_t *p;
+ enum lex_type type;
+ union {
+ struct lex_pointer_string ptr_str;
+ VALUE val;
+ } data;
};
static void
ripper_parser_mark2(void *ptr)
{
struct ripper *r = (struct ripper*)ptr;
- if (r->p) ripper_parser_mark(r->p);
+ if (r->p) {
+ ripper_parser_mark(r->p);
+
+ switch (r->type) {
+ case lex_type_str:
+ rb_gc_mark(r->data.ptr_str.str);
+ break;
+ case lex_type_io:
+ rb_gc_mark(r->data.val);
+ break;
+ case lex_type_generic:
+ rb_gc_mark(r->data.val);
+ break;
+ }
+ }
}
static void
@@ -54,8 +79,9 @@ static const rb_data_type_t parser_data_type = {
};
static VALUE
-ripper_lex_get_generic(struct parser_params *p, VALUE src)
+ripper_lex_get_generic(struct parser_params *p, rb_parser_input_data input, int line_count)
{
+ VALUE src = (VALUE)input;
VALUE line = rb_funcallv_public(src, id_gets, 0, 0);
if (!NIL_P(line) && !RB_TYPE_P(line, T_STRING)) {
rb_raise(rb_eTypeError,
@@ -79,12 +105,19 @@ ripper_compile_error(struct parser_params *p, const char *fmt, ...)
}
static VALUE
-ripper_lex_io_get(struct parser_params *p, VALUE src)
+ripper_lex_io_get(struct parser_params *p, rb_parser_input_data input, int line_count)
{
+ VALUE src = (VALUE)input;
return rb_io_gets(src);
}
static VALUE
+ripper_lex_get_str(struct parser_params *p, rb_parser_input_data input, int line_count)
+{
+ return rb_parser_lex_get_str((struct lex_pointer_string *)input);
+}
+
+static VALUE
ripper_s_allocate(VALUE klass)
{
struct ripper *r;
@@ -294,26 +327,38 @@ parser_dedent_string(VALUE self, VALUE input, VALUE width)
static VALUE
ripper_initialize(int argc, VALUE *argv, VALUE self)
{
+ struct ripper *r;
struct parser_params *p;
VALUE src, fname, lineno;
- VALUE (*gets)(struct parser_params*,VALUE);
- VALUE input, sourcefile_string;
+ rb_parser_lex_gets_func *gets;
+ VALUE sourcefile_string;
const char *sourcefile;
int sourceline;
+ rb_parser_input_data input;
p = ripper_parser_params(self, false);
+ TypedData_Get_Struct(self, struct ripper, &parser_data_type, r);
rb_scan_args(argc, argv, "12", &src, &fname, &lineno);
if (RB_TYPE_P(src, T_FILE)) {
gets = ripper_lex_io_get;
+ r->type = lex_type_io;
+ r->data.val = src;
+ input = (rb_parser_input_data)src;
}
else if (rb_respond_to(src, id_gets)) {
gets = ripper_lex_get_generic;
+ r->type = lex_type_generic;
+ r->data.val = src;
+ input = (rb_parser_input_data)src;
}
else {
StringValue(src);
- gets = rb_ruby_ripper_lex_get_str;
+ gets = ripper_lex_get_str;
+ r->type = lex_type_str;
+ r->data.ptr_str.str = src;
+ r->data.ptr_str.ptr = 0;
+ input = (rb_parser_input_data)&r->data.ptr_str;
}
- input = src;
if (NIL_P(fname)) {
fname = STR_NEW2("(ripper)");
OBJ_FREEZE(fname);
diff --git a/internal/parse.h b/internal/parse.h
index e882a16429..73a1c2d1b1 100644
--- a/internal/parse.h
+++ b/internal/parse.h
@@ -53,10 +53,9 @@ void rb_ruby_parser_set_options(rb_parser_t *p, int print, int loop, int chomp,
rb_parser_t *rb_ruby_parser_set_context(rb_parser_t *p, const struct rb_iseq_struct *base, int main);
void rb_ruby_parser_set_script_lines(rb_parser_t *p);
void rb_ruby_parser_error_tolerant(rb_parser_t *p);
-rb_ast_t* rb_ruby_parser_compile_file_path(rb_parser_t *p, VALUE fname, VALUE file, int start);
void rb_ruby_parser_keep_tokens(rb_parser_t *p);
-rb_ast_t* rb_ruby_parser_compile_generic(rb_parser_t *p, VALUE (*lex_gets)(VALUE, int), VALUE fname, VALUE input, int start);
-rb_ast_t* rb_ruby_parser_compile_string_path(rb_parser_t *p, VALUE f, VALUE s, int line);
+typedef VALUE (rb_parser_lex_gets_func)(struct parser_params*, rb_parser_input_data, int);
+rb_ast_t *rb_parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, VALUE fname, rb_parser_input_data input, int line);
RUBY_SYMBOL_EXPORT_BEGIN
@@ -91,7 +90,7 @@ VALUE rb_ruby_parser_debug_output(rb_parser_t *p);
void rb_ruby_parser_set_debug_output(rb_parser_t *p, VALUE output);
VALUE rb_ruby_parser_parsing_thread(rb_parser_t *p);
void rb_ruby_parser_set_parsing_thread(rb_parser_t *p, VALUE parsing_thread);
-void rb_ruby_parser_ripper_initialize(rb_parser_t *p, VALUE (*gets)(struct parser_params*,VALUE), VALUE input, VALUE sourcefile_string, const char *sourcefile, int sourceline);
+void rb_ruby_parser_ripper_initialize(rb_parser_t *p, rb_parser_lex_gets_func *gets, rb_parser_input_data input, VALUE sourcefile_string, const char *sourcefile, int sourceline);
VALUE rb_ruby_parser_result(rb_parser_t *p);
rb_encoding *rb_ruby_parser_enc(rb_parser_t *p);
VALUE rb_ruby_parser_ruby_sourcefile_string(rb_parser_t *p);
@@ -99,7 +98,6 @@ int rb_ruby_parser_ruby_sourceline(rb_parser_t *p);
int rb_ruby_parser_lex_state(rb_parser_t *p);
void rb_ruby_ripper_parse0(rb_parser_t *p);
int rb_ruby_ripper_dedent_string(rb_parser_t *p, VALUE string, int width);
-VALUE rb_ruby_ripper_lex_get_str(rb_parser_t *p, VALUE s);
int rb_ruby_ripper_initialized_p(rb_parser_t *p);
void rb_ruby_ripper_parser_initialize(rb_parser_t *p);
long rb_ruby_ripper_column(rb_parser_t *p);
diff --git a/internal/ruby_parser.h b/internal/ruby_parser.h
index 2559724480..afb8909429 100644
--- a/internal/ruby_parser.h
+++ b/internal/ruby_parser.h
@@ -5,10 +5,16 @@
#include "internal/bignum.h"
#include "internal/compilers.h"
#include "internal/complex.h"
+#include "internal/parse.h"
#include "internal/rational.h"
#include "rubyparser.h"
#include "vm.h"
+struct lex_pointer_string {
+ VALUE str;
+ long ptr;
+};
+
RUBY_SYMBOL_EXPORT_BEGIN
#ifdef UNIVERSAL_PARSER
const rb_parser_config_t *rb_ruby_parser_config(void);
@@ -19,6 +25,7 @@ VALUE rb_parser_new(void);
rb_ast_t *rb_parser_compile_string_path(VALUE vparser, VALUE fname, VALUE src, int line);
VALUE rb_str_new_parser_string(rb_parser_string_t *str);
VALUE rb_str_new_mutable_parser_string(rb_parser_string_t *str);
+VALUE rb_parser_lex_get_str(struct lex_pointer_string *ptr_str);
VALUE rb_node_str_string_val(const NODE *);
VALUE rb_node_sym_string_val(const NODE *);
@@ -48,7 +55,8 @@ void rb_parser_keep_tokens(VALUE vparser);
rb_ast_t *rb_parser_compile_string(VALUE, const char*, VALUE, int);
rb_ast_t *rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE input, int line);
-rb_ast_t *rb_parser_compile_generic(VALUE vparser, VALUE (*lex_gets)(VALUE, int), VALUE fname, VALUE input, int line);
+rb_ast_t *rb_parser_compile_generic(VALUE vparser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int line);
+rb_ast_t *rb_parser_compile_array(VALUE vparser, VALUE fname, VALUE array, int start);
enum lex_state_bits {
EXPR_BEG_bit, /* ignore newline, +/- is a sign. */
diff --git a/parse.y b/parse.y
index 3e6daad794..3e95bfd51d 100644
--- a/parse.y
+++ b/parse.y
@@ -81,7 +81,6 @@ syntax_error_new(void)
static NODE *reg_named_capture_assign(struct parser_params* p, VALUE regexp, const YYLTYPE *loc);
#define compile_callback rb_suppress_tracing
-VALUE rb_io_gets_internal(VALUE io);
#endif /* !UNIVERSAL_PARSER */
static int rb_parser_string_hash_cmp(rb_parser_string_t *str1, rb_parser_string_t *str2);
@@ -490,8 +489,8 @@ struct parser_params {
struct {
rb_strterm_t *strterm;
- VALUE (*gets)(struct parser_params*,VALUE);
- VALUE input;
+ VALUE (*gets)(struct parser_params*,rb_parser_input_data,int);
+ rb_parser_input_data input;
parser_string_buffer_t string_buffer;
rb_parser_string_t *lastline;
rb_parser_string_t *nextline;
@@ -499,10 +498,6 @@ struct parser_params {
const char *pcur;
const char *pend;
const char *ptok;
- union {
- long ptr;
- VALUE (*call)(VALUE, int);
- } gets_;
enum lex_state_e state;
/* track the nest level of any parens "()[]{}" */
int paren_nest;
@@ -7803,31 +7798,11 @@ must_be_ascii_compatible(struct parser_params *p, VALUE s)
return enc;
}
-static VALUE
-lex_get_str(struct parser_params *p, VALUE s)
-{
- char *beg, *end, *start;
- long len;
-
- beg = RSTRING_PTR(s);
- len = RSTRING_LEN(s);
- start = beg;
- if (p->lex.gets_.ptr) {
- if (len == p->lex.gets_.ptr) return Qnil;
- beg += p->lex.gets_.ptr;
- len -= p->lex.gets_.ptr;
- }
- end = memchr(beg, '\n', len);
- if (end) len = ++end - beg;
- p->lex.gets_.ptr += len;
- return rb_str_subseq(s, beg - start, len);
-}
-
static rb_parser_string_t *
lex_getline(struct parser_params *p)
{
rb_parser_string_t *str;
- VALUE line = (*p->lex.gets)(p, p->lex.input);
+ VALUE line = (*p->lex.gets)(p, p->lex.input, p->line_count);
if (NIL_P(line)) return 0;
must_be_ascii_compatible(p, line);
p->line_count++;
@@ -7837,61 +7812,14 @@ lex_getline(struct parser_params *p)
}
#ifndef RIPPER
-static rb_ast_t*
-parser_compile_string(rb_parser_t *p, VALUE fname, VALUE s, int line)
-{
- p->lex.gets = lex_get_str;
- p->lex.gets_.ptr = 0;
- p->lex.input = rb_str_new_frozen(s);
- p->lex.pbeg = p->lex.pcur = p->lex.pend = 0;
-
- return yycompile(p, fname, line);
-}
-
-rb_ast_t*
-rb_ruby_parser_compile_string_path(rb_parser_t *p, VALUE f, VALUE s, int line)
-{
- must_be_ascii_compatible(p, s);
- return parser_compile_string(p, f, s, line);
-}
-
rb_ast_t*
-rb_ruby_parser_compile_string(rb_parser_t *p, const char *f, VALUE s, int line)
-{
- return rb_ruby_parser_compile_string_path(p, rb_filesystem_str_new_cstr(f), s, line);
-}
-
-static VALUE
-lex_io_gets(struct parser_params *p, VALUE io)
+rb_parser_compile(rb_parser_t *p, rb_parser_lex_gets_func *gets, VALUE fname, rb_parser_input_data input, int line)
{
- return rb_io_gets_internal(io);
-}
-
-rb_ast_t*
-rb_ruby_parser_compile_file_path(rb_parser_t *p, VALUE fname, VALUE file, int start)
-{
- p->lex.gets = lex_io_gets;
- p->lex.input = file;
- p->lex.pbeg = p->lex.pcur = p->lex.pend = 0;
-
- return yycompile(p, fname, start);
-}
-
-static VALUE
-lex_generic_gets(struct parser_params *p, VALUE input)
-{
- return (*p->lex.gets_.call)(input, p->line_count);
-}
-
-rb_ast_t*
-rb_ruby_parser_compile_generic(rb_parser_t *p, VALUE (*lex_gets)(VALUE, int), VALUE fname, VALUE input, int start)
-{
- p->lex.gets = lex_generic_gets;
- p->lex.gets_.call = lex_gets;
+ p->lex.gets = gets;
p->lex.input = input;
p->lex.pbeg = p->lex.pcur = p->lex.pend = 0;
- return yycompile(p, fname, start);
+ return yycompile(p, fname, line);
}
#endif /* !RIPPER */
@@ -15880,7 +15808,6 @@ rb_ruby_parser_mark(void *ptr)
{
struct parser_params *p = (struct parser_params*)ptr;
- rb_gc_mark(p->lex.input);
rb_gc_mark(p->ruby_sourcefile_string);
rb_gc_mark((VALUE)p->ast);
#ifndef RIPPER
@@ -16090,7 +16017,7 @@ rb_ruby_parser_set_parsing_thread(rb_parser_t *p, VALUE parsing_thread)
}
void
-rb_ruby_parser_ripper_initialize(rb_parser_t *p, VALUE (*gets)(struct parser_params*,VALUE), VALUE input, VALUE sourcefile_string, const char *sourcefile, int sourceline)
+rb_ruby_parser_ripper_initialize(rb_parser_t *p, rb_parser_lex_gets_func *gets, rb_parser_input_data input, VALUE sourcefile_string, const char *sourcefile, int sourceline)
{
p->lex.gets = gets;
p->lex.input = input;
@@ -16162,12 +16089,6 @@ rb_ruby_ripper_dedent_string(rb_parser_t *p, VALUE string, int width)
return i;
}
-VALUE
-rb_ruby_ripper_lex_get_str(rb_parser_t *p, VALUE s)
-{
- return lex_get_str(p, s);
-}
-
int
rb_ruby_ripper_initialized_p(rb_parser_t *p)
{
diff --git a/ruby_parser.c b/ruby_parser.c
index ef0b53a1bc..ded17b0a42 100644
--- a/ruby_parser.c
+++ b/ruby_parser.c
@@ -504,8 +504,25 @@ static const rb_parser_config_t rb_global_parser_config = {
};
#endif
+enum lex_type {
+ lex_type_str,
+ lex_type_io,
+ lex_type_array,
+ lex_type_generic,
+};
+
struct ruby_parser {
rb_parser_t *parser_params;
+ enum lex_type type;
+ union {
+ struct lex_pointer_string lex_str;
+ struct {
+ VALUE file;
+ } lex_io;
+ struct {
+ VALUE ary;
+ } lex_array;
+ } data;
};
static void
@@ -513,6 +530,21 @@ parser_mark(void *ptr)
{
struct ruby_parser *parser = (struct ruby_parser*)ptr;
rb_ruby_parser_mark(parser->parser_params);
+
+ switch (parser->type) {
+ case lex_type_str:
+ rb_gc_mark(parser->data.lex_str.str);
+ break;
+ case lex_type_io:
+ rb_gc_mark(parser->data.lex_io.file);
+ break;
+ case lex_type_array:
+ rb_gc_mark(parser->data.lex_array.ary);
+ break;
+ case lex_type_generic:
+ /* noop. Caller of rb_parser_compile_generic should mark the objects. */
+ break;
+ }
}
static void
@@ -615,6 +647,127 @@ rb_parser_error_tolerant(VALUE vparser)
rb_ruby_parser_error_tolerant(parser->parser_params);
}
+void
+rb_parser_keep_tokens(VALUE vparser)
+{
+ struct ruby_parser *parser;
+
+ TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
+ rb_ruby_parser_keep_tokens(parser->parser_params);
+}
+
+VALUE
+rb_parser_lex_get_str(struct lex_pointer_string *ptr_str)
+{
+ char *beg, *end, *start;
+ long len;
+ VALUE s = ptr_str->str;
+
+ beg = RSTRING_PTR(s);
+ len = RSTRING_LEN(s);
+ start = beg;
+ if (ptr_str->ptr) {
+ if (len == ptr_str->ptr) return Qnil;
+ beg += ptr_str->ptr;
+ len -= ptr_str->ptr;
+ }
+ end = memchr(beg, '\n', len);
+ if (end) len = ++end - beg;
+ ptr_str->ptr += len;
+ return rb_str_subseq(s, beg - start, len);
+}
+
+static VALUE
+lex_get_str(struct parser_params *p, rb_parser_input_data input, int line_count)
+{
+ return rb_parser_lex_get_str((struct lex_pointer_string *)input);
+}
+
+static rb_ast_t*
+parser_compile_string0(struct ruby_parser *parser, VALUE fname, VALUE s, int line)
+{
+ VALUE str = rb_str_new_frozen(s);
+
+ parser->type = lex_type_str;
+ parser->data.lex_str.str = str;
+ parser->data.lex_str.ptr = 0;
+
+ return rb_parser_compile(parser->parser_params, lex_get_str, fname, (rb_parser_input_data)&parser->data, line);
+}
+
+static rb_encoding *
+must_be_ascii_compatible(VALUE s)
+{
+ rb_encoding *enc = rb_enc_get(s);
+ if (!rb_enc_asciicompat(enc)) {
+ rb_raise(rb_eArgError, "invalid source encoding");
+ }
+ return enc;
+}
+
+static rb_ast_t*
+parser_compile_string_path(struct ruby_parser *parser, VALUE f, VALUE s, int line)
+{
+ must_be_ascii_compatible(s);
+ return parser_compile_string0(parser, f, s, line);
+}
+
+static rb_ast_t*
+parser_compile_string(struct ruby_parser *parser, const char *f, VALUE s, int line)
+{
+ return parser_compile_string_path(parser, rb_filesystem_str_new_cstr(f), s, line);
+}
+
+VALUE rb_io_gets_internal(VALUE io);
+
+static VALUE
+lex_io_gets(struct parser_params *p, rb_parser_input_data input, int line_count)
+{
+ VALUE io = (VALUE)input;
+
+ return rb_io_gets_internal(io);
+}
+
+static VALUE
+lex_gets_array(struct parser_params *p, rb_parser_input_data data, int index)
+{
+ VALUE array = (VALUE)data;
+ VALUE str = rb_ary_entry(array, index);
+ if (!NIL_P(str)) {
+ StringValue(str);
+ if (!rb_enc_asciicompat(rb_enc_get(str))) {
+ rb_raise(rb_eArgError, "invalid source encoding");
+ }
+ }
+ return str;
+}
+
+static rb_ast_t*
+parser_compile_file_path(struct ruby_parser *parser, VALUE fname, VALUE file, int start)
+{
+ parser->type = lex_type_io;
+ parser->data.lex_io.file = file;
+
+ return rb_parser_compile(parser->parser_params, lex_io_gets, fname, (rb_parser_input_data)file, start);
+}
+
+static rb_ast_t*
+parser_compile_array(struct ruby_parser *parser, VALUE fname, VALUE array, int start)
+{
+ parser->type = lex_type_array;
+ parser->data.lex_array.ary = array;
+
+ return rb_parser_compile(parser->parser_params, lex_gets_array, fname, (rb_parser_input_data)array, start);
+}
+
+static rb_ast_t*
+parser_compile_generic(struct ruby_parser *parser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start)
+{
+ parser->type = lex_type_generic;
+
+ return rb_parser_compile(parser->parser_params, lex_gets, fname, (rb_parser_input_data)input, start);
+}
+
rb_ast_t*
rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE file, int start)
{
@@ -622,29 +775,33 @@ rb_parser_compile_file_path(VALUE vparser, VALUE fname, VALUE file, int start)
rb_ast_t *ast;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
- ast = rb_ruby_parser_compile_file_path(parser->parser_params, fname, file, start);
+ ast = parser_compile_file_path(parser, fname, file, start);
RB_GC_GUARD(vparser);
return ast;
}
-void
-rb_parser_keep_tokens(VALUE vparser)
+rb_ast_t*
+rb_parser_compile_array(VALUE vparser, VALUE fname, VALUE array, int start)
{
struct ruby_parser *parser;
+ rb_ast_t *ast;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
- rb_ruby_parser_keep_tokens(parser->parser_params);
+ ast = parser_compile_array(parser, fname, array, start);
+ RB_GC_GUARD(vparser);
+
+ return ast;
}
rb_ast_t*
-rb_parser_compile_generic(VALUE vparser, VALUE (*lex_gets)(VALUE, int), VALUE fname, VALUE input, int start)
+rb_parser_compile_generic(VALUE vparser, rb_parser_lex_gets_func *lex_gets, VALUE fname, VALUE input, int start)
{
struct ruby_parser *parser;
rb_ast_t *ast;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
- ast = rb_ruby_parser_compile_generic(parser->parser_params, lex_gets, fname, input, start);
+ ast = parser_compile_generic(parser, lex_gets, fname, input, start);
RB_GC_GUARD(vparser);
return ast;
@@ -657,7 +814,7 @@ rb_parser_compile_string(VALUE vparser, const char *f, VALUE s, int line)
rb_ast_t *ast;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
- ast = rb_ruby_parser_compile_string(parser->parser_params, f, s, line);
+ ast = parser_compile_string(parser, f, s, line);
RB_GC_GUARD(vparser);
return ast;
@@ -670,7 +827,7 @@ rb_parser_compile_string_path(VALUE vparser, VALUE f, VALUE s, int line)
rb_ast_t *ast;
TypedData_Get_Struct(vparser, struct ruby_parser, &ruby_parser_data_type, parser);
- ast = rb_ruby_parser_compile_string_path(parser->parser_params, f, s, line);
+ ast = parser_compile_string_path(parser, f, s, line);
RB_GC_GUARD(vparser);
return ast;
diff --git a/rubyparser.h b/rubyparser.h
index 2d898275e3..5bf9c9aa1b 100644
--- a/rubyparser.h
+++ b/rubyparser.h
@@ -73,6 +73,8 @@ enum rb_parser_shareability {
rb_parser_shareable_everything,
};
+typedef void* rb_parser_input_data;
+
/*
* AST Node
*/
@@ -1419,7 +1421,6 @@ typedef struct rb_parser_config_struct {
RUBY_SYMBOL_EXPORT_BEGIN
void rb_ruby_parser_free(void *ptr);
-rb_ast_t* rb_ruby_parser_compile_string(rb_parser_t *p, const char *f, VALUE s, int line);
#ifdef UNIVERSAL_PARSER
rb_parser_t *rb_ruby_parser_allocate(const rb_parser_config_t *config);