summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--ast.c2
-rw-r--r--common.mk1
-rw-r--r--compile.c20
-rw-r--r--internal/ruby_parser.h1
-rw-r--r--misc/lldb_rb/utils.py2
-rw-r--r--node.c6
-rw-r--r--node_dump.c11
-rw-r--r--parse.y267
-rw-r--r--ruby_parser.c11
-rw-r--r--rubyparser.h14
10 files changed, 283 insertions, 52 deletions
diff --git a/ast.c b/ast.c
index b76547a8e8..3b0b53e64e 100644
--- a/ast.c
+++ b/ast.c
@@ -567,6 +567,8 @@ node_children(rb_ast_t *ast, const NODE *node)
return rb_ary_new_from_args(1, rb_node_rational_literal_val(node));
case NODE_IMAGINARY:
return rb_ary_new_from_args(1, rb_node_imaginary_literal_val(node));
+ case NODE_REGX:
+ return rb_ary_new_from_args(1, rb_node_regx_string_val(node));
case NODE_ONCE:
return rb_ary_new_from_node_args(ast, 1, RNODE_ONCE(node)->nd_body);
case NODE_DSTR:
diff --git a/common.mk b/common.mk
index 9440ad1d0d..51a58c286e 100644
--- a/common.mk
+++ b/common.mk
@@ -15931,6 +15931,7 @@ ruby_parser.$(OBJEXT): $(top_srcdir)/internal/fixnum.h
ruby_parser.$(OBJEXT): $(top_srcdir)/internal/imemo.h
ruby_parser.$(OBJEXT): $(top_srcdir)/internal/numeric.h
ruby_parser.$(OBJEXT): $(top_srcdir)/internal/rational.h
+ruby_parser.$(OBJEXT): $(top_srcdir)/internal/re.h
ruby_parser.$(OBJEXT): $(top_srcdir)/internal/ruby_parser.h
ruby_parser.$(OBJEXT): $(top_srcdir)/internal/serial.h
ruby_parser.$(OBJEXT): $(top_srcdir)/internal/static_assert.h
diff --git a/compile.c b/compile.c
index b2ff9c3513..5d619e0ca6 100644
--- a/compile.c
+++ b/compile.c
@@ -1931,6 +1931,9 @@ iseq_set_arguments_keywords(rb_iseq_t *iseq, LINK_ANCHOR *const optargs,
case NODE_SYM:
dv = rb_node_sym_string_val(val_node);
break;
+ case NODE_REGX:
+ dv = rb_node_regx_string_val(val_node);
+ break;
case NODE_LINE:
dv = rb_node_line_lineno_val(val_node);
break;
@@ -4499,6 +4502,7 @@ compile_branch_condition(rb_iseq_t *iseq, LINK_ANCHOR *ret, const NODE *cond,
case NODE_IMAGINARY: /* NODE_IMAGINARY is always true */
case NODE_TRUE:
case NODE_STR:
+ case NODE_REGX:
case NODE_ZLIST:
case NODE_LAMBDA:
/* printf("useless condition eliminate (%s)\n", ruby_node_name(nd_type(cond))); */
@@ -4702,6 +4706,7 @@ static_literal_node_p(const NODE *node, const rb_iseq_t *iseq, bool hash_key)
switch (nd_type(node)) {
case NODE_LIT:
case NODE_SYM:
+ case NODE_REGX:
case NODE_LINE:
case NODE_ENCODING:
case NODE_INTEGER:
@@ -4740,6 +4745,8 @@ static_literal_value(const NODE *node, rb_iseq_t *iseq)
return Qfalse;
case NODE_SYM:
return rb_node_sym_string_val(node);
+ case NODE_REGX:
+ return rb_node_regx_string_val(node);
case NODE_LINE:
return rb_node_line_lineno_val(node);
case NODE_ENCODING:
@@ -5785,6 +5792,7 @@ defined_expr0(rb_iseq_t *iseq, LINK_ANCHOR *const ret,
case NODE_STR:
case NODE_LIT:
case NODE_SYM:
+ case NODE_REGX:
case NODE_LINE:
case NODE_FILE:
case NODE_ENCODING:
@@ -7212,6 +7220,7 @@ iseq_compile_pattern_each(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *c
}
case NODE_LIT:
case NODE_SYM:
+ case NODE_REGX:
case NODE_LINE:
case NODE_INTEGER:
case NODE_FLOAT:
@@ -9637,7 +9646,7 @@ compile_match(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node, i
INIT_ANCHOR(val);
switch ((int)type) {
case NODE_MATCH:
- ADD_INSN1(recv, node, putobject, RNODE_MATCH(node)->nd_lit);
+ ADD_INSN1(recv, node, putobject, rb_node_regx_string_val(node));
ADD_INSN2(val, node, getspecial, INT2FIX(0),
INT2FIX(0));
break;
@@ -9799,6 +9808,7 @@ compile_kw_arg(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const node,
}
else if (nd_type_p(default_value, NODE_LIT) ||
nd_type_p(default_value, NODE_SYM) ||
+ nd_type_p(default_value, NODE_REGX) ||
nd_type_p(default_value, NODE_LINE) ||
nd_type_p(default_value, NODE_INTEGER) ||
nd_type_p(default_value, NODE_FLOAT) ||
@@ -10385,6 +10395,14 @@ iseq_compile_each0(rb_iseq_t *iseq, LINK_ANCHOR *const ret, const NODE *const no
case NODE_EVSTR:
CHECK(compile_evstr(iseq, ret, RNODE_EVSTR(node)->nd_body, popped));
break;
+ case NODE_REGX:{
+ if (!popped) {
+ VALUE lit = rb_node_regx_string_val(node);
+ ADD_INSN1(ret, node, putobject, lit);
+ RB_OBJ_WRITTEN(iseq, Qundef, lit);
+ }
+ break;
+ }
case NODE_DREGX:
compile_dregx(iseq, ret, node, popped);
break;
diff --git a/internal/ruby_parser.h b/internal/ruby_parser.h
index 2435c207df..7b4c715268 100644
--- a/internal/ruby_parser.h
+++ b/internal/ruby_parser.h
@@ -23,6 +23,7 @@ VALUE rb_str_new_parser_string(rb_parser_string_t *str);
VALUE rb_node_str_string_val(const NODE *);
VALUE rb_node_sym_string_val(const NODE *);
VALUE rb_node_dstr_string_val(const NODE *);
+VALUE rb_node_regx_string_val(const NODE *);
VALUE rb_node_dregx_string_val(const NODE *);
VALUE rb_node_line_lineno_val(const NODE *);
VALUE rb_node_file_path_val(const NODE *);
diff --git a/misc/lldb_rb/utils.py b/misc/lldb_rb/utils.py
index b52c3906ba..a321426234 100644
--- a/misc/lldb_rb/utils.py
+++ b/misc/lldb_rb/utils.py
@@ -379,6 +379,8 @@ class RbInspector(LLDBInterface):
self._append_expression("*(struct RNode_DXSTR *) %0#x" % val.GetValueAsUnsigned())
elif nd_type == self.ruby_globals["NODE_EVSTR"]:
self._append_expression("*(struct RNode_EVSTR *) %0#x" % val.GetValueAsUnsigned())
+ elif nd_type == self.ruby_globals["NODE_REGX"]:
+ self._append_expression("*(struct RNode_REGX *) %0#x" % val.GetValueAsUnsigned())
elif nd_type == self.ruby_globals["NODE_DREGX"]:
self._append_expression("*(struct RNode_DREGX *) %0#x" % val.GetValueAsUnsigned())
elif nd_type == self.ruby_globals["NODE_ONCE"]:
diff --git a/node.c b/node.c
index 8c579cc272..2828af26fc 100644
--- a/node.c
+++ b/node.c
@@ -195,6 +195,10 @@ free_ast_value(rb_ast_t *ast, void *ctx, NODE *node)
case NODE_SYM:
parser_string_free(ast, RNODE_SYM(node)->string);
break;
+ case NODE_REGX:
+ case NODE_MATCH:
+ parser_string_free(ast, RNODE_REGX(node)->string);
+ break;
case NODE_DSYM:
parser_string_free(ast, RNODE_DSYM(node)->string);
break;
@@ -268,7 +272,6 @@ static bool
nodetype_markable_p(enum node_type type)
{
switch (type) {
- case NODE_MATCH:
case NODE_LIT:
return true;
default:
@@ -374,7 +377,6 @@ mark_and_move_ast_value(rb_ast_t *ast, void *ctx, NODE *node)
#endif
switch (nd_type(node)) {
- case NODE_MATCH:
case NODE_LIT:
rb_gc_mark_and_move(&RNODE_LIT(node)->nd_lit);
break;
diff --git a/node_dump.c b/node_dump.c
index fd6fa20aa4..bf13e6d5d1 100644
--- a/node_dump.c
+++ b/node_dump.c
@@ -678,7 +678,8 @@ dump_node(VALUE buf, VALUE indent, int comment, const NODE * node)
ANN("match expression (against $_ implicitly)");
ANN("format: [nd_lit] (in condition)");
ANN("example: if /foo/; foo; end");
- F_LIT(nd_lit, RNODE_MATCH, "regexp");
+ LAST_NODE;
+ F_VALUE(string, rb_node_regx_string_val(node), "string");
return;
case NODE_MATCH2:
@@ -750,6 +751,14 @@ dump_node(VALUE buf, VALUE indent, int comment, const NODE * node)
F_VALUE(val, rb_node_imaginary_literal_val(node), "val");
return;
+ case NODE_REGX:
+ ANN("regexp literal");
+ ANN("format: [string]");
+ ANN("example: /foo/");
+ LAST_NODE;
+ F_VALUE(string, rb_node_regx_string_val(node), "string");
+ return;
+
case NODE_ONCE:
ANN("once evaluation");
ANN("format: [nd_body]");
diff --git a/parse.y b/parse.y
index b8f03b5473..9ce631dd35 100644
--- a/parse.y
+++ b/parse.y
@@ -88,6 +88,7 @@ hash_literal_key_p(VALUE k)
case NODE_IMAGINARY:
case NODE_STR:
case NODE_SYM:
+ case NODE_REGX:
case NODE_LINE:
case NODE_FILE:
case NODE_ENCODING:
@@ -137,6 +138,13 @@ node_imaginary_cmp(rb_node_imaginary_t *n1, rb_node_imaginary_t *n2)
}
static int
+rb_parser_regx_hash_cmp(rb_node_regx_t *n1, rb_node_regx_t *n2)
+{
+ return (n1->options != n2->options ||
+ rb_parser_string_hash_cmp(n1->string, n2->string));
+}
+
+static int
node_integer_line_cmp(const NODE *node_i, const NODE *line)
{
VALUE num = rb_node_integer_literal_val(node_i);
@@ -190,6 +198,8 @@ node_cdhash_cmp(VALUE val, VALUE lit)
return rb_parser_string_hash_cmp(RNODE_STR(node_val)->string, RNODE_STR(node_lit)->string);
case NODE_SYM:
return rb_parser_string_hash_cmp(RNODE_SYM(node_val)->string, RNODE_SYM(node_lit)->string);
+ case NODE_REGX:
+ return rb_parser_regx_hash_cmp(RNODE_REGX(node_val), RNODE_REGX(node_lit));
case NODE_LINE:
return node_val->nd_loc.beg_pos.lineno != node_lit->nd_loc.beg_pos.lineno;
case NODE_FILE:
@@ -236,6 +246,8 @@ node_cdhash_hash(VALUE a)
return rb_parser_str_hash(RNODE_STR(node)->string);
case NODE_SYM:
return rb_parser_str_hash(RNODE_SYM(node)->string);
+ case NODE_REGX:
+ return rb_parser_str_hash(RNODE_REGX(node)->string);
case NODE_LINE:
/* Same with NODE_INTEGER FIXNUM case */
return (st_index_t)node->nd_loc.beg_pos.lineno;
@@ -1211,6 +1223,7 @@ static rb_node_dstr_t *rb_node_dstr_new(struct parser_params *p, rb_parser_strin
static rb_node_xstr_t *rb_node_xstr_new(struct parser_params *p, rb_parser_string_t *string, const YYLTYPE *loc);
static rb_node_dxstr_t *rb_node_dxstr_new(struct parser_params *p, rb_parser_string_t *string, long nd_alen, NODE *nd_next, const YYLTYPE *loc);
static rb_node_evstr_t *rb_node_evstr_new(struct parser_params *p, NODE *nd_body, const YYLTYPE *loc);
+static rb_node_regx_t *rb_node_regx_new(struct parser_params *p, rb_parser_string_t *string, int options, const YYLTYPE *loc);
static rb_node_once_t *rb_node_once_new(struct parser_params *p, NODE *nd_body, const YYLTYPE *loc);
static rb_node_args_t *rb_node_args_new(struct parser_params *p, const YYLTYPE *loc);
static rb_node_args_aux_t *rb_node_args_aux_new(struct parser_params *p, ID nd_pid, long nd_plen, const YYLTYPE *loc);
@@ -1319,6 +1332,7 @@ static rb_node_error_t *rb_node_error_new(struct parser_params *p, const YYLTYPE
#define NEW_XSTR(s,loc) (NODE *)rb_node_xstr_new(p,s,loc)
#define NEW_DXSTR(s,l,n,loc) (NODE *)rb_node_dxstr_new(p,s,l,n,loc)
#define NEW_EVSTR(n,loc) (NODE *)rb_node_evstr_new(p,n,loc)
+#define NEW_REGX(str,opts,loc) (NODE *)rb_node_regx_new(p,str,opts,loc)
#define NEW_ONCE(b,loc) (NODE *)rb_node_once_new(p,b,loc)
#define NEW_ARGS(loc) rb_node_args_new(p,loc)
#define NEW_ARGS_AUX(r,b,loc) rb_node_args_aux_new(p,r,b,loc)
@@ -1567,8 +1581,8 @@ static NODE *match_op(struct parser_params*,NODE*,NODE*,const YYLTYPE*,const YYL
static rb_ast_id_table_t *local_tbl(struct parser_params*);
-static VALUE reg_compile(struct parser_params*, VALUE, int);
-static void reg_fragment_setenc(struct parser_params*, VALUE, int);
+static VALUE reg_compile(struct parser_params*, rb_parser_string_t*, int);
+static void reg_fragment_setenc(struct parser_params*, rb_parser_string_t*, int);
#define reg_fragment_check rb_parser_reg_fragment_check
int reg_fragment_check(struct parser_params*, rb_parser_string_t*, int);
@@ -1592,7 +1606,7 @@ static int id_is_var(struct parser_params *p, ID id);
RUBY_SYMBOL_EXPORT_BEGIN
VALUE rb_parser_reg_compile(struct parser_params* p, VALUE str, int options);
-int rb_reg_fragment_setenc(struct parser_params*, VALUE, int);
+int rb_reg_fragment_setenc(struct parser_params*, rb_parser_string_t *, int);
enum lex_state_e rb_parser_trace_lex_state(struct parser_params *, enum lex_state_e, enum lex_state_e, int);
VALUE rb_parser_lex_state_name(struct parser_params *p, enum lex_state_e state);
void rb_parser_show_bitstack(struct parser_params *, stack_type, const char *, int);
@@ -1647,6 +1661,9 @@ static void numparam_pop(struct parser_params *p, NODE *prev_inner);
#define idFWD_ALL idDot3
#define arg_FWD_BLOCK idFWD_BLOCK
+#define RE_ONIG_OPTION_IGNORECASE 1
+#define RE_ONIG_OPTION_EXTEND (RE_ONIG_OPTION_IGNORECASE<<1)
+#define RE_ONIG_OPTION_MULTILINE (RE_ONIG_OPTION_EXTEND<<1)
#define RE_OPTION_ONCE (1<<16)
#define RE_OPTION_ENCODING_SHIFT 8
#define RE_OPTION_ENCODING(e) (((e)&0xff)<<RE_OPTION_ENCODING_SHIFT)
@@ -2237,6 +2254,14 @@ rb_parser_str_get_encoding(rb_parser_string_t *str)
return str->enc;
}
+#ifndef RIPPER
+static bool
+PARSER_ENCODING_IS_ASCII8BIT(struct parser_params *p, rb_parser_string_t *str)
+{
+ return rb_parser_str_get_encoding(str) == rb_ascii8bit_encoding();
+}
+#endif
+
static int
PARSER_ENC_CODERANGE(rb_parser_string_t *str)
{
@@ -2257,11 +2282,19 @@ PARSER_ENCODING_CODERANGE_SET(rb_parser_string_t *str, rb_encoding *enc, enum rb
}
static void
-PARSER_ENCODING_CODERANGE_CLEAR(rb_parser_string_t *str)
+PARSER_ENC_CODERANGE_CLEAR(rb_parser_string_t *str)
{
str->coderange = RB_PARSER_ENC_CODERANGE_UNKNOWN;
}
+#ifndef RIPPER
+static bool
+PARSER_ENC_CODERANGE_ASCIIONLY(rb_parser_string_t *str)
+{
+ return PARSER_ENC_CODERANGE(str) == RB_PARSER_ENC_CODERANGE_7BIT;
+}
+#endif
+
static bool
PARSER_ENC_CODERANGE_CLEAN_P(int cr)
{
@@ -2325,6 +2358,21 @@ rb_parser_enc_str_coderange(struct parser_params *p, rb_parser_string_t *str)
return cr;
}
+#ifndef RIPPER
+static rb_parser_string_t *
+rb_parser_enc_associate(struct parser_params *p, rb_parser_string_t *str, rb_encoding *enc)
+{
+ if (rb_parser_str_get_encoding(str) == enc)
+ return str;
+ if (!PARSER_ENC_CODERANGE_ASCIIONLY(str) ||
+ !rb_enc_asciicompat(enc)) {
+ PARSER_ENC_CODERANGE_CLEAR(str);
+ }
+ rb_parser_string_set_encoding(str, enc);
+ return str;
+}
+#endif
+
static bool
rb_parser_is_ascii_string(struct parser_params *p, rb_parser_string_t *str)
{
@@ -2394,7 +2442,7 @@ rb_parser_enc_compatible(struct parser_params *p, rb_parser_string_t *str1, rb_p
static void
rb_parser_str_modify(rb_parser_string_t *str)
{
- PARSER_ENCODING_CODERANGE_CLEAR(str);
+ PARSER_ENC_CODERANGE_CLEAR(str);
}
static void
@@ -2557,7 +2605,7 @@ rb_parser_str_resize(struct parser_params *p, rb_parser_string_t *str, long len)
long slen = PARSER_STRING_LEN(str);
if (slen > len && PARSER_ENC_CODERANGE(str) != RB_PARSER_ENC_CODERANGE_7BIT) {
- PARSER_ENCODING_CODERANGE_CLEAR(str);
+ PARSER_ENC_CODERANGE_CLEAR(str);
}
{
@@ -6828,6 +6876,7 @@ singleton : var_ref
case NODE_DSTR:
case NODE_XSTR:
case NODE_DXSTR:
+ case NODE_REGX:
case NODE_DREGX:
case NODE_LIT:
case NODE_SYM:
@@ -8394,6 +8443,61 @@ tokadd_escape(struct parser_params *p)
}
static int
+char_to_option(int c)
+{
+ int val;
+
+ switch (c) {
+ case 'i':
+ val = RE_ONIG_OPTION_IGNORECASE;
+ break;
+ case 'x':
+ val = RE_ONIG_OPTION_EXTEND;
+ break;
+ case 'm':
+ val = RE_ONIG_OPTION_MULTILINE;
+ break;
+ default:
+ val = 0;
+ break;
+ }
+ return val;
+}
+
+#define ARG_ENCODING_FIXED 16
+#define ARG_ENCODING_NONE 32
+#define ENC_ASCII8BIT 1
+#define ENC_EUC_JP 2
+#define ENC_Windows_31J 3
+#define ENC_UTF8 4
+
+static int
+char_to_option_kcode(int c, int *option, int *kcode)
+{
+ *option = 0;
+
+ switch (c) {
+ case 'n':
+ *kcode = ENC_ASCII8BIT;
+ return (*option = ARG_ENCODING_NONE);
+ case 'e':
+ *kcode = ENC_EUC_JP;
+ break;
+ case 's':
+ *kcode = ENC_Windows_31J;
+ break;
+ case 'u':
+ *kcode = ENC_UTF8;
+ break;
+ default:
+ *kcode = -1;
+ return (*option = char_to_option(c));
+ }
+ *option = ARG_ENCODING_FIXED;
+ return 1;
+}
+
+static int
regx_options(struct parser_params *p)
{
int kcode = 0;
@@ -8406,9 +8510,9 @@ regx_options(struct parser_params *p)
if (c == 'o') {
options |= RE_OPTION_ONCE;
}
- else if (rb_char_to_option_kcode(c, &opt, &kc)) {
+ else if (char_to_option_kcode(c, &opt, &kc)) {
if (kc >= 0) {
- if (kc != rb_ascii8bit_encindex()) kcode = c;
+ if (kc != ENC_ASCII8BIT) kcode = c;
kopt = opt;
}
else {
@@ -12222,6 +12326,16 @@ rb_node_evstr_new(struct parser_params *p, NODE *nd_body, const YYLTYPE *loc)
return n;
}
+static rb_node_regx_t *
+rb_node_regx_new(struct parser_params *p, rb_parser_string_t *string, int options, const YYLTYPE *loc)
+{
+ rb_node_regx_t *n = NODE_NEWNODE(NODE_REGX, rb_node_regx_t, loc);
+ n->string = string;
+ n->options = options & RE_OPTION_MASK;
+
+ return n;
+}
+
static rb_node_call_t *
rb_node_call_new(struct parser_params *p, NODE *nd_recv, ID nd_mid, NODE *nd_args, const YYLTYPE *loc)
{
@@ -12848,6 +12962,18 @@ str2dstr(struct parser_params *p, NODE *node)
}
static NODE *
+str2regx(struct parser_params *p, NODE *node, int options)
+{
+ NODE *new_node = (NODE *)NODE_NEW_INTERNAL(NODE_REGX, rb_node_regx_t);
+ nd_copy_flag(new_node, node);
+ RNODE_REGX(new_node)->string = RNODE_STR(node)->string;
+ RNODE_REGX(new_node)->options = options;
+ RNODE_STR(node)->string = 0;
+
+ return new_node;
+}
+
+static NODE *
evstr2dstr(struct parser_params *p, NODE *node)
{
if (nd_type_p(node, NODE_EVSTR)) {
@@ -12949,9 +13075,9 @@ match_op(struct parser_params *p, NODE *node1, NODE *node2, const YYLTYPE *op_lo
return match;
}
- case NODE_LIT:
- if (RB_TYPE_P(RNODE_LIT(n)->nd_lit, T_REGEXP)) {
- const VALUE lit = RNODE_LIT(n)->nd_lit;
+ case NODE_REGX:
+ {
+ const VALUE lit = rb_node_regx_string_val(n);
NODE *match = NEW_MATCH2(node1, node2, loc);
RNODE_MATCH2(match)->nd_args = reg_named_capture_assign(p, lit, loc);
nd_set_line(match, line);
@@ -12964,9 +13090,6 @@ match_op(struct parser_params *p, NODE *node1, NODE *node2, const YYLTYPE *op_lo
NODE *match3;
switch (nd_type(n)) {
- case NODE_LIT:
- if (!RB_TYPE_P(RNODE_LIT(n)->nd_lit, T_REGEXP)) break;
- /* fallthru */
case NODE_DREGX:
match3 = NEW_MATCH3(node2, node1, loc);
return match3;
@@ -13210,16 +13333,18 @@ new_regexp(struct parser_params *p, NODE *node, int options, const YYLTYPE *loc)
NODE *prev;
if (!node) {
- node = NEW_LIT(reg_compile(p, STR_NEW0(), options), loc);
- RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_LIT(node)->nd_lit);
+ /* Check string is valid regex */
+ rb_parser_string_t *str = STRING_NEW0();
+ reg_compile(p, str, options);
+ node = NEW_REGX(str, options, loc);
return node;
}
switch (nd_type(node)) {
case NODE_STR:
{
- VALUE src = rb_node_str_string_val(node);
- node = NEW_LIT(reg_compile(p, src, options), loc);
- RB_OBJ_WRITTEN(p->ast, Qnil, RNODE_LIT(node)->nd_lit);
+ /* Check string is valid regex */
+ reg_compile(p, RNODE_STR(node)->string, options);
+ node = str2regx(p, node, options);
}
break;
default:
@@ -13255,9 +13380,8 @@ new_regexp(struct parser_params *p, NODE *node, int options, const YYLTYPE *loc)
}
}
if (!RNODE_DREGX(node)->nd_next) {
- VALUE src = rb_node_dregx_string_val(node);
/* Check string is valid regex */
- reg_compile(p, src, options);
+ reg_compile(p, RNODE_DREGX(node)->string, options);
}
if (options & RE_OPTION_ONCE) {
node = NEW_ONCE(node, loc);
@@ -13916,6 +14040,8 @@ shareable_literal_value(struct parser_params *p, NODE *node)
return rb_node_imaginary_literal_val(node);
case NODE_ENCODING:
return rb_node_encoding_val(node);
+ case NODE_REGX:
+ return rb_node_regx_string_val(node);
case NODE_LIT:
return RNODE_LIT(node)->nd_lit;
default:
@@ -13943,6 +14069,7 @@ shareable_literal_constant(struct parser_params *p, enum shareability shareable,
case NODE_NIL:
case NODE_LIT:
case NODE_SYM:
+ case NODE_REGX:
case NODE_LINE:
case NODE_INTEGER:
case NODE_FLOAT:
@@ -14305,6 +14432,7 @@ void_expr(struct parser_params *p, NODE *node)
case NODE_IMAGINARY:
case NODE_STR:
case NODE_DSTR:
+ case NODE_REGX:
case NODE_DREGX:
useless = "a literal";
break;
@@ -14441,6 +14569,7 @@ is_static_content(NODE *node)
} while ((node = RNODE_LIST(node)->nd_next) != 0);
case NODE_LIT:
case NODE_SYM:
+ case NODE_REGX:
case NODE_LINE:
case NODE_FILE:
case NODE_ENCODING:
@@ -14537,6 +14666,11 @@ cond0(struct parser_params *p, NODE *node, enum cond_type type, const YYLTYPE *l
SWITCH_BY_COND_TYPE(type, warn, "string ");
break;
+ case NODE_REGX:
+ if (!e_option_supplied(p)) SWITCH_BY_COND_TYPE(type, warn, "regex ");
+ nd_set_type(node, NODE_MATCH);
+ break;
+
case NODE_DREGX:
if (!e_option_supplied(p)) SWITCH_BY_COND_TYPE(type, warning, "regex ");
@@ -14573,12 +14707,8 @@ cond0(struct parser_params *p, NODE *node, enum cond_type type, const YYLTYPE *l
break;
case NODE_LIT:
- if (RB_TYPE_P(RNODE_LIT(node)->nd_lit, T_REGEXP)) {
- if (!e_option_supplied(p)) SWITCH_BY_COND_TYPE(type, warn, "regex ");
- nd_set_type(node, NODE_MATCH);
- }
- else if (RNODE_LIT(node)->nd_lit == Qtrue ||
- RNODE_LIT(node)->nd_lit == Qfalse) {
+ if (RNODE_LIT(node)->nd_lit == Qtrue ||
+ RNODE_LIT(node)->nd_lit == Qfalse) {
/* booleans are OK, e.g., while true */
}
else if (SYMBOL_P(RNODE_LIT(node)->nd_lit)) {
@@ -14963,6 +15093,7 @@ nd_type_st_key_enable_p(NODE *node)
case NODE_IMAGINARY:
case NODE_STR:
case NODE_SYM:
+ case NODE_REGX:
case NODE_LINE:
case NODE_FILE:
case NODE_ENCODING:
@@ -14984,6 +15115,7 @@ nd_st_key(struct parser_params *p, NODE *node)
case NODE_RATIONAL:
case NODE_IMAGINARY:
case NODE_SYM:
+ case NODE_REGX:
case NODE_LINE:
case NODE_ENCODING:
case NODE_FILE:
@@ -15012,6 +15144,8 @@ nd_value(struct parser_params *p, NODE *node)
return rb_node_imaginary_literal_val(node);
case NODE_SYM:
return rb_node_sym_string_val(node);
+ case NODE_REGX:
+ return rb_node_regx_string_val(node);
case NODE_LINE:
return rb_node_line_lineno_val(node);
case NODE_ENCODING:
@@ -15634,43 +15768,83 @@ dvar_curr(struct parser_params *p, ID id)
}
static void
-reg_fragment_enc_error(struct parser_params* p, VALUE str, int c)
+reg_fragment_enc_error(struct parser_params* p, rb_parser_string_t *str, int c)
{
compile_error(p,
"regexp encoding option '%c' differs from source encoding '%s'",
- c, rb_enc_name(rb_enc_get(str)));
+ c, rb_enc_name(rb_parser_str_get_encoding(str)));
}
#ifndef RIPPER
+static rb_encoding *
+find_enc(struct parser_params* p, const char *name)
+{
+ int idx = rb_enc_find_index(name);
+ if (idx < 0) {
+ rb_bug("unknown encoding name: %s", name);
+ }
+
+ return rb_enc_from_index(idx);
+}
+
+static rb_encoding *
+kcode_to_enc(struct parser_params* p, int kcode)
+{
+ rb_encoding *enc;
+
+ switch (kcode) {
+ case ENC_ASCII8BIT:
+ enc = rb_ascii8bit_encoding();
+ break;
+ case ENC_EUC_JP:
+ enc = find_enc(p, "EUC-JP");
+ break;
+ case ENC_Windows_31J:
+ enc = find_enc(p, "Windows-31J");
+ break;
+ case ENC_UTF8:
+ enc = rb_utf8_encoding();
+ break;
+ default:
+ enc = NULL;
+ break;
+ }
+
+ return enc;
+}
+
int
-rb_reg_fragment_setenc(struct parser_params* p, VALUE str, int options)
+rb_reg_fragment_setenc(struct parser_params* p, rb_parser_string_t *str, int options)
{
int c = RE_OPTION_ENCODING_IDX(options);
if (c) {
int opt, idx;
- rb_char_to_option_kcode(c, &opt, &idx);
- if (idx != ENCODING_GET(str) &&
- !is_ascii_string(str)) {
+ rb_encoding *enc;
+
+ char_to_option_kcode(c, &opt, &idx);
+ enc = kcode_to_enc(p, idx);
+ if (enc != rb_parser_str_get_encoding(str) &&
+ !rb_parser_is_ascii_string(p, str)) {
goto error;
}
- ENCODING_SET(str, idx);
+ rb_parser_string_set_encoding(str, enc);
}
else if (RE_OPTION_ENCODING_NONE(options)) {
- if (!ENCODING_IS_ASCII8BIT(str) &&
- !is_ascii_string(str)) {
+ if (!PARSER_ENCODING_IS_ASCII8BIT(p, str) &&
+ !rb_parser_is_ascii_string(p, str)) {
c = 'n';
goto error;
}
- rb_enc_associate(str, rb_ascii8bit_encoding());
+ rb_parser_enc_associate(p, str, rb_ascii8bit_encoding());
}
else if (rb_is_usascii_enc(p->enc)) {
- if (!is_ascii_string(str)) {
+ if (!rb_parser_is_ascii_string(p, str)) {
/* raise in re.c */
- rb_enc_associate(str, rb_usascii_encoding());
+ rb_parser_enc_associate(p, str, rb_usascii_encoding());
}
else {
- rb_enc_associate(str, rb_ascii8bit_encoding());
+ rb_parser_enc_associate(p, str, rb_ascii8bit_encoding());
}
}
return 0;
@@ -15681,7 +15855,7 @@ rb_reg_fragment_setenc(struct parser_params* p, VALUE str, int options)
#endif
static void
-reg_fragment_setenc(struct parser_params* p, VALUE str, int options)
+reg_fragment_setenc(struct parser_params* p, rb_parser_string_t *str, int options)
{
int c = rb_reg_fragment_setenc(p, str, options);
if (c) reg_fragment_enc_error(p, str, c);
@@ -15692,10 +15866,9 @@ int
reg_fragment_check(struct parser_params* p, rb_parser_string_t *str, int options)
{
VALUE err, str2;
+ reg_fragment_setenc(p, str, options);
/* TODO */
str2 = rb_str_new_parser_string(str);
- reg_fragment_setenc(p, str2, options);
- str->enc = rb_enc_get(str2);
err = rb_reg_check_preprocess(str2);
if (err != Qnil) {
err = rb_obj_as_string(err);
@@ -15769,10 +15942,12 @@ rb_reg_named_capture_assign_iter_impl(struct parser_params *p, const char *s, lo
#endif
static VALUE
-parser_reg_compile(struct parser_params* p, VALUE str, int options)
+parser_reg_compile(struct parser_params* p, rb_parser_string_t *str, int options)
{
+ VALUE str2;
reg_fragment_setenc(p, str, options);
- return rb_parser_reg_compile(p, str, options);
+ str2 = rb_str_new_parser_string(str);
+ return rb_parser_reg_compile(p, str2, options);
}
#ifndef RIPPER
@@ -15784,7 +15959,7 @@ rb_parser_reg_compile(struct parser_params* p, VALUE str, int options)
#endif
static VALUE
-reg_compile(struct parser_params* p, VALUE str, int options)
+reg_compile(struct parser_params* p, rb_parser_string_t *str, int options)
{
VALUE re;
VALUE err;
diff --git a/ruby_parser.c b/ruby_parser.c
index c4e37e3353..c233b9801b 100644
--- a/ruby_parser.c
+++ b/ruby_parser.c
@@ -1,5 +1,6 @@
/* This is a wrapper for parse.y */
+#include "internal/re.h"
#include "internal/ruby_parser.h"
#include "node.h"
@@ -1011,6 +1012,16 @@ rb_node_dregx_string_val(const NODE *node)
}
VALUE
+rb_node_regx_string_val(const NODE *node)
+{
+ rb_node_regx_t *node_reg = RNODE_REGX(node);
+ rb_parser_string_t *string = node_reg->string;
+ VALUE str = rb_enc_str_new(string->ptr, string->len, string->enc);
+
+ return rb_reg_compile(str, node_reg->options, NULL, 0);
+}
+
+VALUE
rb_node_line_lineno_val(const NODE *node)
{
return INT2FIX(node->nd_loc.beg_pos.lineno);
diff --git a/rubyparser.h b/rubyparser.h
index f3bd76d2eb..3fcfd32c9c 100644
--- a/rubyparser.h
+++ b/rubyparser.h
@@ -127,6 +127,7 @@ enum node_type {
NODE_XSTR,
NODE_DXSTR,
NODE_EVSTR,
+ NODE_REGX,
NODE_DREGX,
NODE_ONCE,
NODE_ARGS,
@@ -612,11 +613,12 @@ typedef struct RNode_BACK_REF {
long nd_nth;
} rb_node_back_ref_t;
-/* RNode_MATCH and RNode_LIT should be same structure */
+/* RNode_MATCH and RNode_REGX should be same structure */
typedef struct RNode_MATCH {
NODE node;
- VALUE nd_lit;
+ struct rb_parser_string *string;
+ int options;
} rb_node_match_t;
typedef struct RNode_MATCH2 {
@@ -719,6 +721,13 @@ typedef struct RNode_EVSTR {
struct RNode *nd_body;
} rb_node_evstr_t;
+typedef struct RNode_REGX {
+ NODE node;
+
+ struct rb_parser_string *string;
+ int options;
+} rb_node_regx_t;
+
typedef struct RNode_DREGX {
NODE node;
@@ -1093,6 +1102,7 @@ typedef struct RNode_ERROR {
#define RNODE_XSTR(node) ((struct RNode_XSTR *)(node))
#define RNODE_DXSTR(node) ((struct RNode_DXSTR *)(node))
#define RNODE_EVSTR(node) ((struct RNode_EVSTR *)(node))
+#define RNODE_REGX(node) ((struct RNode_REGX *)(node))
#define RNODE_DREGX(node) ((struct RNode_DREGX *)(node))
#define RNODE_ONCE(node) ((struct RNode_ONCE *)(node))
#define RNODE_ARGS(node) ((struct RNode_ARGS *)(node))