summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNobuyoshi Nakada <[email protected]>2019-05-24 16:10:59 +0900
committerNobuyoshi Nakada <[email protected]>2019-05-24 16:12:17 +0900
commit2893550452f6f3cadb17c670da185813d7d0a835 (patch)
treea19bc3bd37f155cb2155fc1311f85fa851f77b55
parent45ad375acccca2bb0852613b1e809a7af556f5e6 (diff)
Mixed encoding error can continue to parse
-rw-r--r--parse.y19
-rw-r--r--test/ruby/test_parse.rb3
-rw-r--r--test/ruby/test_syntax.rb15
3 files changed, 22 insertions, 15 deletions
diff --git a/parse.y b/parse.y
index 92e4d05113..2b34e8ac2f 100644
--- a/parse.y
+++ b/parse.y
@@ -6180,7 +6180,7 @@ tokadd_codepoint(struct parser_params *p, rb_encoding **encp,
}
/* return value is for ?\u3042 */
-static int
+static void
parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp,
int string_literal, int symbol_literal, int regexp_literal)
{
@@ -6214,7 +6214,7 @@ parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp,
unterminated:
literal_flush(p, p->lex.pcur);
yyerror0("unterminated Unicode escape");
- return 0;
+ return;
}
if (regexp_literal) tokadd(p, close_brace);
@@ -6222,11 +6222,11 @@ parser_tokadd_utf8(struct parser_params *p, rb_encoding **encp,
}
else { /* handle \uxxxx form */
if (!tokadd_codepoint(p, encp, regexp_literal, FALSE)) {
- return 0;
+ return;
}
}
- return TRUE;
+ return;
}
#define ESCAPE_CONTROL 1
@@ -6568,11 +6568,9 @@ tokadd_string(struct parser_params *p,
tokadd(p, '\\');
break;
}
- if (!parser_tokadd_utf8(p, enc, term,
- func & STR_FUNC_SYMBOL,
- func & STR_FUNC_REGEXP)) {
- continue;
- }
+ parser_tokadd_utf8(p, enc, term,
+ func & STR_FUNC_SYMBOL,
+ func & STR_FUNC_REGEXP);
continue;
default:
@@ -8070,8 +8068,7 @@ parse_qmark(struct parser_params *p, int space_seen)
if (peek(p, 'u')) {
nextc(p);
enc = rb_utf8_encoding();
- if (!parser_tokadd_utf8(p, &enc, -1, 0, 0))
- return 0;
+ parser_tokadd_utf8(p, &enc, -1, 0, 0);
}
else if (!lex_eol_p(p) && !(c = *p->lex.pcur, ISASCII(c))) {
nextc(p);
diff --git a/test/ruby/test_parse.rb b/test/ruby/test_parse.rb
index dc4c143241..c59454f8f7 100644
--- a/test/ruby/test_parse.rb
+++ b/test/ruby/test_parse.rb
@@ -562,6 +562,9 @@ class TestParse < Test::Unit::TestCase
assert_raise(SyntaxError) { eval(" ?a\x8a".force_encoding("utf-8")) }
assert_equal("\u{1234}", eval("?\u{1234}"))
assert_equal("\u{1234}", eval('?\u{1234}'))
+ assert_equal("\u{1234}", eval('?\u1234'))
+ e = assert_syntax_error('"#{?\u123}"', 'invalid Unicode escape')
+ assert_not_match(/end-of-input/, e.message)
end
def test_percent
diff --git a/test/ruby/test_syntax.rb b/test/ruby/test_syntax.rb
index 7bf1e0e43c..e640262d90 100644
--- a/test/ruby/test_syntax.rb
+++ b/test/ruby/test_syntax.rb
@@ -775,32 +775,39 @@ eom
end
def test_heredoc_mixed_encoding
- assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
+ e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
#encoding: cp932
<<-TEXT
\xe9\x9d\u1234
TEXT
HEREDOC
- assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
+ assert_not_match(/end-of-input/, e.message)
+
+ e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
#encoding: cp932
<<-TEXT
\xe9\x9d
\u1234
TEXT
HEREDOC
- assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
+ assert_not_match(/end-of-input/, e.message)
+
+ e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
#encoding: cp932
<<-TEXT
\u1234\xe9\x9d
TEXT
HEREDOC
- assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
+ assert_not_match(/end-of-input/, e.message)
+
+ e = assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
#encoding: cp932
<<-TEXT
\u1234
\xe9\x9d
TEXT
HEREDOC
+ assert_not_match(/end-of-input/, e.message)
end
def test_lineno_operation_brace_block