summaryrefslogtreecommitdiff
path: root/ext/json/parser
diff options
context:
space:
mode:
authorJean Boussier <[email protected]>2025-03-11 20:50:26 +0100
committerHiroshi SHIBATA <[email protected]>2025-03-12 18:02:09 +0900
commit1d07deb422819ce0be58c5d6dd24d632d30ef817 (patch)
tree34f39c8ee68aba57c8d8f63fc7ffb6f3b883d240 /ext/json/parser
parentde9ce8c0e09c209d70394f32e0c51e5047b0607e (diff)
[ruby/json] Raise a ParserError on all incomplete unicode escape sequence.
This was the behavior until `2.10.0` unadvertently changed it. `"\u1"` would raise, but `"\u1zzz"` wouldn't. https://github.com/ruby/json/commit/7d0637b9e6
Diffstat (limited to 'ext/json/parser')
-rw-r--r--ext/json/parser/parser.c85
1 files changed, 42 insertions, 43 deletions
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index 776eb916f0..0a1d937575 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -341,6 +341,44 @@ static void rvalue_stack_eagerly_release(VALUE handle)
}
}
+
+#ifndef HAVE_STRNLEN
+static size_t strnlen(const char *s, size_t maxlen)
+{
+ char *p;
+ return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
+}
+#endif
+
+#define PARSE_ERROR_FRAGMENT_LEN 32
+#ifdef RBIMPL_ATTR_NORETURN
+RBIMPL_ATTR_NORETURN()
+#endif
+static void raise_parse_error(const char *format, const char *start)
+{
+ unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
+
+ size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
+ const char *ptr = start;
+
+ if (len == PARSE_ERROR_FRAGMENT_LEN) {
+ MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
+
+ while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
+ len--;
+ }
+
+ if (buffer[len - 1] >= 0xC0) { // multibyte character start
+ len--;
+ }
+
+ buffer[len] = '\0';
+ ptr = (const char *)buffer;
+ }
+
+ rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
+}
+
/* unicode */
static const signed char digit_values[256] = {
@@ -362,21 +400,19 @@ static const signed char digit_values[256] = {
static uint32_t unescape_unicode(const unsigned char *p)
{
- const uint32_t replacement_char = 0xFFFD;
-
signed char b;
uint32_t result = 0;
b = digit_values[p[0]];
- if (b < 0) return replacement_char;
+ if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
result = (result << 4) | (unsigned char)b;
b = digit_values[p[1]];
- if (b < 0) return replacement_char;
+ if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
result = (result << 4) | (unsigned char)b;
b = digit_values[p[2]];
- if (b < 0) return replacement_char;
+ if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
result = (result << 4) | (unsigned char)b;
b = digit_values[p[3]];
- if (b < 0) return replacement_char;
+ if (b < 0) raise_parse_error("incomplete unicode character escape sequence at '%s'", (char *)p - 2);
result = (result << 4) | (unsigned char)b;
return result;
}
@@ -440,43 +476,6 @@ typedef struct JSON_ParserStateStruct {
static const rb_data_type_t JSON_ParserConfig_type;
-#ifndef HAVE_STRNLEN
-static size_t strnlen(const char *s, size_t maxlen)
-{
- char *p;
- return ((p = memchr(s, '\0', maxlen)) ? p - s : maxlen);
-}
-#endif
-
-#define PARSE_ERROR_FRAGMENT_LEN 32
-#ifdef RBIMPL_ATTR_NORETURN
-RBIMPL_ATTR_NORETURN()
-#endif
-static void raise_parse_error(const char *format, const char *start)
-{
- unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 1];
-
- size_t len = start ? strnlen(start, PARSE_ERROR_FRAGMENT_LEN) : 0;
- const char *ptr = start;
-
- if (len == PARSE_ERROR_FRAGMENT_LEN) {
- MEMCPY(buffer, start, char, PARSE_ERROR_FRAGMENT_LEN);
-
- while (buffer[len - 1] >= 0x80 && buffer[len - 1] < 0xC0) { // Is continuation byte
- len--;
- }
-
- if (buffer[len - 1] >= 0xC0) { // multibyte character start
- len--;
- }
-
- buffer[len] = '\0';
- ptr = (const char *)buffer;
- }
-
- rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
-}
-
static const bool whitespace[256] = {
[' '] = 1,
['\t'] = 1,