summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNobuyoshi Nakada <[email protected]>2024-12-28 18:40:37 +0900
committerNobuyoshi Nakada <[email protected]>2024-12-28 18:40:37 +0900
commite4ec2128ae9c5c2a43cd599759f19db21fc0238f (patch)
tree14c60a087fbf2fe271b78e784be67913830e6a5c
parent0ccc7657f3f51d973eac5f846d1594062838a2c9 (diff)
[Bug #20990] Reject escaped multibyte char with control/meta prefix
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/12485
-rw-r--r--parse.y4
-rw-r--r--prism/prism.c8
-rw-r--r--test/ripper/test_lexer.rb36
-rw-r--r--test/ruby/test_literal.rb4
4 files changed, 51 insertions, 1 deletions
diff --git a/parse.y b/parse.y
index fb3e846258..463781ce3a 100644
--- a/parse.y
+++ b/parse.y
@@ -8231,6 +8231,10 @@ read_escape(struct parser_params *p, int flags, const char *begin)
return '\0';
default:
+ if (!ISASCII(c)) {
+ tokskip_mbchar(p);
+ goto eof;
+ }
return c;
}
}
diff --git a/prism/prism.c b/prism/prism.c
index eca276a357..bb69f139ae 100644
--- a/prism/prism.c
+++ b/prism/prism.c
@@ -9657,7 +9657,8 @@ escape_read_warn(pm_parser_t *parser, uint8_t flags, uint8_t flag, const char *t
*/
static void
escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expression_buffer, uint8_t flags) {
- switch (peek(parser)) {
+ uint8_t peeked = peek(parser);
+ switch (peeked) {
case '\\': {
parser->current.end++;
escape_write_byte(parser, buffer, regular_expression_buffer, flags, escape_byte('\\', flags));
@@ -10054,6 +10055,11 @@ escape_read(pm_parser_t *parser, pm_buffer_t *buffer, pm_buffer_t *regular_expre
}
/* fallthrough */
default: {
+ if ((flags & (PM_ESCAPE_FLAG_CONTROL | PM_ESCAPE_FLAG_META)) && !char_is_ascii_printable(peeked)) {
+ size_t width = parser->encoding->char_width(parser->current.end, parser->end - parser->current.end);
+ pm_parser_err(parser, parser->current.start, parser->current.end + width, PM_ERR_ESCAPE_INVALID_META);
+ return;
+ }
if (parser->current.end < parser->end) {
escape_write_escape_encoded(parser, buffer, regular_expression_buffer, flags);
} else {
diff --git a/test/ripper/test_lexer.rb b/test/ripper/test_lexer.rb
index 4e8c0003db..a371e8c42d 100644
--- a/test/ripper/test_lexer.rb
+++ b/test/ripper/test_lexer.rb
@@ -355,6 +355,15 @@ world"
]
assert_lexer(expected, code)
+
+ code = %["\\C-\\\u{3042}"]
+ expected = [
+ [[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)],
+ [[1, 1], :on_tstring_content, "\\C-\\\u{3042}", state(:EXPR_BEG)],
+ [[1, 8], :on_tstring_end, '"', state(:EXPR_END)],
+ ]
+
+ assert_lexer(expected, code)
end
def test_invalid_escape_meta_mbchar
@@ -366,6 +375,15 @@ world"
]
assert_lexer(expected, code)
+
+ code = %["\\M-\\\u{3042}"]
+ expected = [
+ [[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)],
+ [[1, 1], :on_tstring_content, "\\M-\\\u{3042}", state(:EXPR_BEG)],
+ [[1, 8], :on_tstring_end, '"', state(:EXPR_END)],
+ ]
+
+ assert_lexer(expected, code)
end
def test_invalid_escape_meta_ctrl_mbchar
@@ -377,6 +395,15 @@ world"
]
assert_lexer(expected, code)
+
+ code = %["\\M-\\C-\\\u{3042}"]
+ expected = [
+ [[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)],
+ [[1, 1], :on_tstring_content, "\\M-\\C-\\\u{3042}", state(:EXPR_BEG)],
+ [[1, 11], :on_tstring_end, '"', state(:EXPR_END)],
+ ]
+
+ assert_lexer(expected, code)
end
def test_invalid_escape_ctrl_meta_mbchar
@@ -388,6 +415,15 @@ world"
]
assert_lexer(expected, code)
+
+ code = %["\\C-\\M-\\\u{3042}"]
+ expected = [
+ [[1, 0], :on_tstring_beg, '"', state(:EXPR_BEG)],
+ [[1, 1], :on_tstring_content, "\\C-\\M-\\\u{3042}", state(:EXPR_BEG)],
+ [[1, 11], :on_tstring_end, '"', state(:EXPR_END)],
+ ]
+
+ assert_lexer(expected, code)
end
def test_invalid_escape_string
diff --git a/test/ruby/test_literal.rb b/test/ruby/test_literal.rb
index 1fdc6aa853..941a4456d0 100644
--- a/test/ruby/test_literal.rb
+++ b/test/ruby/test_literal.rb
@@ -97,6 +97,10 @@ class TestRubyLiteral < Test::Unit::TestCase
assert_equal "ab", eval("?a 'b'")
assert_equal "a\nb", eval("<<A 'b'\na\nA")
+ assert_raise(SyntaxError) {eval('"\C-' "\u3042" '"')}
+ assert_raise(SyntaxError) {eval('"\C-\\' "\u3042" '"')}
+ assert_raise(SyntaxError) {eval('"\M-' "\u3042" '"')}
+ assert_raise(SyntaxError) {eval('"\M-\\' "\u3042" '"')}
ensure
$VERBOSE = verbose_bak
end