diff options
author | Jean Boussier <[email protected]> | 2025-03-11 20:50:26 +0100 |
---|---|---|
committer | Hiroshi SHIBATA <[email protected]> | 2025-03-12 18:02:09 +0900 |
commit | 1d07deb422819ce0be58c5d6dd24d632d30ef817 (patch) | |
tree | 34f39c8ee68aba57c8d8f63fc7ffb6f3b883d240 /ext/json/parser | |
parent | de9ce8c0e09c209d70394f32e0c51e5047b0607e (diff) |
[ruby/json] Raise a ParserError on all incomplete unicode escape sequence.
This was the behavior until `2.10.0` unadvertently changed it.
`"\u1"` would raise, but `"\u1zzz"` wouldn't.
https://github.com/ruby/json/commit/7d0637b9e6
Diffstat (limited to 'ext/json/parser')
-rw-r--r-- | ext/json/parser/parser.c | 85 |
1 files changed, 42 insertions, 43 deletions
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 776eb916f0..0a1d937575 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -341,6 +341,44 @@ static void rvalue_stack_eagerly_release(VALUE handle) } } + +#ifndef HAVE_STRNLEN +static size_t strnlen(const char *s, size_t maxlen) +{ + char *p; + return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen); +} +#endif + +#define PARSE_ERROR_FRAGMENT_LEN 32 +#ifdef RBIMPL_ATTR_NORETURN +RBIMPL_ATTR_NORETURN() +#endif +static void raise_parse_error(const char *format, const char *start) +{ + unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; + + size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0; + const char *ptr = start; + + if (len == PARSE_ERROR_FRAGMENT_LEN) { + MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); + + while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte + len--; + } + + if (buffer[len - 1] >= 0xC0) { // multibyte character start + len--; + } + + buffer[len] = '\0'; + ptr = (const char *)buffer; + } + + rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); +} + /* unicode */ static const signed char digit_values[256] = { @@ -362,21 +400,19 @@ static const signed char digit_values[256] = { static uint32_t unescape_unicode(const unsigned char *p) { - const uint32_t replacement_char = 0xFFFD; - signed char b; uint32_t result = 0; b = digit_values[p[0]]; - if (b < 0) return replacement_char; + if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2); result = (result << 4) | (unsigned char)b; b = digit_values[p[1]]; - if (b < 0) return replacement_char; + if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2); result = (result << 4) | (unsigned char)b; b = digit_values[p[2]]; - if (b < 0) return replacement_char; + if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2); result = (result << 4) | (unsigned char)b; b = digit_values[p[3]]; - if (b < 0) return replacement_char; + if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2); result = (result << 4) | (unsigned char)b; return result; } @@ -440,43 +476,6 @@ typedef struct JSON_ParserStateStruct { static const rb_data_type_t JSON_ParserConfig_type; -#ifndef HAVE_STRNLEN -static size_t strnlen(const char *s, size_t maxlen) -{ - char *p; - return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen); -} -#endif - -#define PARSE_ERROR_FRAGMENT_LEN 32 -#ifdef RBIMPL_ATTR_NORETURN -RBIMPL_ATTR_NORETURN() -#endif -static void raise_parse_error(const char *format, const char *start) -{ - unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1]; - - size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0; - const char *ptr = start; - - if (len == PARSE_ERROR_FRAGMENT_LEN) { - MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN); - - while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte - len--; - } - - if (buffer[len - 1] >= 0xC0) { // multibyte character start - len--; - } - - buffer[len] = '\0'; - ptr = (const char *)buffer; - } - - rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); -} - static const bool whitespace[256] = { [' '] = 1, ['\t'] = 1, |