summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <[email protected]>2024-11-05 16:35:58 +0100
committerJean Boussier <[email protected]>2024-11-05 18:00:36 +0100
commitca8f21ace86e9c7ab184b3f0087ff458c06b178c (patch)
treee5a4c05ba4bff8d9dc19963936d2c81248200881
parente8522f06b51f86214d28259118c736ac8951d8cd (diff)
[ruby/json] Resync
Notes
Notes: Merged: https://github.com/ruby/ruby/pull/12003
-rw-r--r--ext/json/fbuffer/fbuffer.h51
-rw-r--r--ext/json/generator/generator.c29
-rw-r--r--ext/json/generator/generator.h91
-rw-r--r--ext/json/lib/json.rb7
-rw-r--r--ext/json/lib/json/common.rb23
-rw-r--r--ext/json/lib/json/ext.rb6
-rw-r--r--ext/json/lib/json/ext/generator/state.rb (renamed from lib/json/ext/generator/state.rb)0
-rw-r--r--ext/json/parser/extconf.rb5
-rw-r--r--ext/json/parser/parser.c1752
-rw-r--r--ext/json/parser/parser.h78
-rw-r--r--ext/json/parser/parser.rl747
-rw-r--r--test/json/fixtures/fail4.json1
-rw-r--r--test/json/fixtures/fail9.json1
-rw-r--r--test/json/json_common_interface_test.rb4
-rw-r--r--test/json/json_ext_parser_test.rb74
-rwxr-xr-xtest/json/json_generator_test.rb48
-rw-r--r--test/json/json_parser_test.rb92
-rw-r--r--test/json/test_helper.rb24
18 files changed, 2167 insertions, 866 deletions
diff --git a/ext/json/fbuffer/fbuffer.h b/ext/json/fbuffer/fbuffer.h
index 367ebd89ff..3e154a5fa8 100644
--- a/ext/json/fbuffer/fbuffer.h
+++ b/ext/json/fbuffer/fbuffer.h
@@ -4,9 +4,40 @@
#include "ruby.h"
#include "ruby/encoding.h"
+/* shims */
+/* This is the fallback definition from Ruby 3.4 */
+
+#ifndef RBIMPL_STDBOOL_H
+#if defined(__cplusplus)
+# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
+# include <cstdbool>
+# endif
+#elif defined(HAVE_STDBOOL_H)
+# include <stdbool.h>
+#elif !defined(HAVE__BOOL)
+typedef unsigned char _Bool;
+# define bool _Bool
+# define true ((_Bool)+1)
+# define false ((_Bool)+0)
+# define __bool_true_false_are_defined
+#endif
+#endif
+
+#ifndef RB_UNLIKELY
+#define RB_UNLIKELY(expr) expr
+#endif
+
+#ifndef RB_LIKELY
+#define RB_LIKELY(expr) expr
+#endif
+
+#ifndef MAYBE_UNUSED
+# define MAYBE_UNUSED(x) x
+#endif
+
enum fbuffer_type {
- HEAP = 0,
- STACK = 1,
+ FBUFFER_HEAP_ALLOCATED = 0,
+ FBUFFER_STACK_ALLOCATED = 1,
};
typedef struct FBufferStruct {
@@ -38,19 +69,11 @@ static inline void fbuffer_append_char(FBuffer *fb, char newchr);
static VALUE fbuffer_to_s(FBuffer *fb);
#endif
-#ifndef RB_UNLIKELY
-#define RB_UNLIKELY(expr) expr
-#endif
-
-#ifndef RB_LIKELY
-#define RB_LIKELY(expr) expr
-#endif
-
static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *stack_buffer, long stack_buffer_size)
{
fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT;
if (stack_buffer) {
- fb->type = STACK;
+ fb->type = FBUFFER_STACK_ALLOCATED;
fb->ptr = stack_buffer;
fb->capa = stack_buffer_size;
}
@@ -58,7 +81,7 @@ static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *
static void fbuffer_free(FBuffer *fb)
{
- if (fb->ptr && fb->type == HEAP) {
+ if (fb->ptr && fb->type == FBUFFER_HEAP_ALLOCATED) {
ruby_xfree(fb->ptr);
}
}
@@ -82,10 +105,10 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested)
for (required = fb->capa; requested > required - fb->len; required <<= 1);
if (required > fb->capa) {
- if (fb->type == STACK) {
+ if (fb->type == FBUFFER_STACK_ALLOCATED) {
const char *old_buffer = fb->ptr;
fb->ptr = ALLOC_N(char, required);
- fb->type = HEAP;
+ fb->type = FBUFFER_HEAP_ALLOCATED;
MEMCPY(fb->ptr, old_buffer, char, fb->len);
} else {
REALLOC_N(fb->ptr, char, required);
diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c
index c32b5ce093..80539af6c8 100644
--- a/ext/json/generator/generator.c
+++ b/ext/json/generator/generator.c
@@ -1,5 +1,27 @@
+#include "ruby.h"
#include "../fbuffer/fbuffer.h"
-#include "generator.h"
+
+#include <math.h>
+#include <ctype.h>
+
+/* ruby api and some helpers */
+
+typedef struct JSON_Generator_StateStruct {
+ VALUE indent;
+ VALUE space;
+ VALUE space_before;
+ VALUE object_nl;
+ VALUE array_nl;
+
+ long max_nesting;
+ long depth;
+ long buffer_initial_length;
+
+ bool allow_nan;
+ bool ascii_only;
+ bool script_safe;
+ bool strict;
+} JSON_Generator_State;
#ifndef RB_UNLIKELY
#define RB_UNLIKELY(cond) (cond)
@@ -31,6 +53,7 @@ struct generate_json_data {
generator_func func;
};
+static VALUE cState_from_state_s(VALUE self, VALUE opts);
static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func);
static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
@@ -1013,6 +1036,10 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc)
struct generate_json_data *data = (struct generate_json_data *)d;
fbuffer_free(data->buffer);
+ if (RBASIC_CLASS(exc) == rb_path2class("Encoding::UndefinedConversionError")) {
+ exc = rb_exc_new_str(eGeneratorError, rb_funcall(exc, rb_intern("message"), 0));
+ }
+
rb_exc_raise(exc);
return Qundef;
diff --git a/ext/json/generator/generator.h b/ext/json/generator/generator.h
deleted file mode 100644
index 749a627a10..0000000000
--- a/ext/json/generator/generator.h
+++ /dev/null
@@ -1,91 +0,0 @@
-#ifndef _GENERATOR_H_
-#define _GENERATOR_H_
-
-#include <math.h>
-#include <ctype.h>
-
-#include "ruby.h"
-
-/* This is the fallback definition from Ruby 3.4 */
-#ifndef RBIMPL_STDBOOL_H
-#if defined(__cplusplus)
-# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
-# include <cstdbool>
-# endif
-#elif defined(HAVE_STDBOOL_H)
-# include <stdbool.h>
-#elif !defined(HAVE__BOOL)
-typedef unsigned char _Bool;
-# define bool _Bool
-# define true ((_Bool)+1)
-# define false ((_Bool)+0)
-# define __bool_true_false_are_defined
-#endif
-#endif
-
-/* ruby api and some helpers */
-
-typedef struct JSON_Generator_StateStruct {
- VALUE indent;
- VALUE space;
- VALUE space_before;
- VALUE object_nl;
- VALUE array_nl;
-
- long max_nesting;
- long depth;
- long buffer_initial_length;
-
- bool allow_nan;
- bool ascii_only;
- bool script_safe;
- bool strict;
-} JSON_Generator_State;
-
-static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self);
-#ifdef RUBY_INTEGER_UNIFICATION
-static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self);
-#else
-static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self);
-#endif
-static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mString_included_s(VALUE self, VALUE modul);
-static VALUE mString_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mString_to_json_raw_object(VALUE self);
-static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self);
-static VALUE mString_Extend_json_create(VALUE self, VALUE o);
-static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self);
-static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self);
-static void State_free(void *state);
-static VALUE cState_s_allocate(VALUE klass);
-
-static VALUE cState_generate(VALUE self, VALUE obj);
-static VALUE cState_from_state_s(VALUE self, VALUE opts);
-static VALUE cState_indent(VALUE self);
-static VALUE cState_indent_set(VALUE self, VALUE indent);
-static VALUE cState_space(VALUE self);
-static VALUE cState_space_set(VALUE self, VALUE space);
-static VALUE cState_space_before(VALUE self);
-static VALUE cState_space_before_set(VALUE self, VALUE space_before);
-static VALUE cState_object_nl(VALUE self);
-static VALUE cState_object_nl_set(VALUE self, VALUE object_nl);
-static VALUE cState_array_nl(VALUE self);
-static VALUE cState_array_nl_set(VALUE self, VALUE array_nl);
-static VALUE cState_max_nesting(VALUE self);
-static VALUE cState_max_nesting_set(VALUE self, VALUE depth);
-static VALUE cState_allow_nan_p(VALUE self);
-static VALUE cState_ascii_only_p(VALUE self);
-static VALUE cState_depth(VALUE self);
-static VALUE cState_depth_set(VALUE self, VALUE depth);
-static VALUE cState_script_safe(VALUE self);
-static VALUE cState_script_safe_set(VALUE self, VALUE depth);
-static VALUE cState_strict(VALUE self);
-static VALUE cState_strict_set(VALUE self, VALUE strict);
-
-static const rb_data_type_t JSON_Generator_State_type;
-
-#endif
diff --git a/ext/json/lib/json.rb b/ext/json/lib/json.rb
index c28e853e13..dfd9b7dfc2 100644
--- a/ext/json/lib/json.rb
+++ b/ext/json/lib/json.rb
@@ -583,10 +583,5 @@ require 'json/common'
#
module JSON
require 'json/version'
-
- begin
- require 'json/ext'
- rescue LoadError
- require 'json/pure'
- end
+ require 'json/ext'
end
diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb
index 546b6ec801..2269896ba8 100644
--- a/ext/json/lib/json/common.rb
+++ b/ext/json/lib/json/common.rb
@@ -32,9 +32,7 @@ module JSON
JSON.generate(object, opts)
end
- # Returns the JSON parser class that is used by JSON. This is either
- # JSON::Ext::Parser or JSON::Pure::Parser:
- # JSON.parser # => JSON::Ext::Parser
+ # Returns the JSON parser class that is used by JSON.
attr_reader :parser
# Set the JSON parser class _parser_ to be used by JSON.
@@ -97,14 +95,10 @@ module JSON
)
end
- # Returns the JSON generator module that is used by JSON. This is
- # either JSON::Ext::Generator or JSON::Pure::Generator:
- # JSON.generator # => JSON::Ext::Generator
+ # Returns the JSON generator module that is used by JSON.
attr_reader :generator
- # Sets or Returns the JSON generator state class that is used by JSON. This is
- # either JSON::Ext::Generator::State or JSON::Pure::Generator::State:
- # JSON.state # => JSON::Ext::Generator::State
+ # Sets or Returns the JSON generator state class that is used by JSON.
attr_accessor :state
end
@@ -207,16 +201,7 @@ module JSON
# JSON.parse('')
#
def parse(source, opts = nil)
- if opts.nil?
- Parser.new(source).parse
- else
- # NB: The ** shouldn't be required, but we have to deal with
- # different versions of the `json` and `json_pure` gems being
- # loaded concurrently.
- # Prior to 2.7.3, `JSON::Ext::Parser` would only take kwargs.
- # Ref: https://github.com/ruby/json/issues/650
- Parser.new(source, **opts).parse
- end
+ Parser.parse(source, opts)
end
# :call-seq:
diff --git a/ext/json/lib/json/ext.rb b/ext/json/lib/json/ext.rb
index 92ef61eaec..2082cae68f 100644
--- a/ext/json/lib/json/ext.rb
+++ b/ext/json/lib/json/ext.rb
@@ -8,14 +8,12 @@ module JSON
module Ext
if RUBY_ENGINE == 'truffleruby'
require 'json/ext/parser'
- require 'json/pure'
- $DEBUG and warn "Using Ext extension for JSON parser and Pure library for JSON generator."
+ require 'json/truffle_ruby/generator'
JSON.parser = Parser
- JSON.generator = JSON::Pure::Generator
+ JSON.generator = ::JSON::TruffleRuby::Generator
else
require 'json/ext/parser'
require 'json/ext/generator'
- $DEBUG and warn "Using Ext extension for JSON."
JSON.parser = Parser
JSON.generator = Generator
end
diff --git a/lib/json/ext/generator/state.rb b/ext/json/lib/json/ext/generator/state.rb
index 6cd9496e67..6cd9496e67 100644
--- a/lib/json/ext/generator/state.rb
+++ b/ext/json/lib/json/ext/generator/state.rb
diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb
index c3c23d2cb4..f9104de12d 100644
--- a/ext/json/parser/extconf.rb
+++ b/ext/json/parser/extconf.rb
@@ -2,7 +2,10 @@
require 'mkmf'
have_func("rb_enc_interned_str", "ruby.h") # RUBY_VERSION >= 3.0
-have_func("rb_gc_mark_locations") # Missing on TruffleRuby
+have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2
+have_func("rb_gc_mark_locations", "ruby.h") # Missing on TruffleRuby
+have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby
+
append_cflags("-std=c99")
create_makefile 'json/ext/parser'
diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c
index 758dba4694..382e21e12d 100644
--- a/ext/json/parser/parser.c
+++ b/ext/json/parser/parser.c
@@ -1,7 +1,310 @@
/* This file is automatically generated from parser.rl by using ragel */
#line 1 "parser.rl"
+#include "ruby.h"
#include "../fbuffer/fbuffer.h"
-#include "parser.h"
+
+static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
+static VALUE CNaN, CInfinity, CMinusInfinity;
+
+static ID i_json_creatable_p, i_json_create, i_create_id,
+ i_chr, i_deep_const_get, i_match, i_aset, i_aref,
+ i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
+
+static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
+ sym_create_additions, sym_create_id, sym_object_class, sym_array_class,
+ sym_decimal_class, sym_match_string;
+
+static int binary_encindex;
+static int utf8_encindex;
+
+#ifndef HAVE_RB_GC_MARK_LOCATIONS
+// For TruffleRuby
+void rb_gc_mark_locations(const VALUE *start, const VALUE *end)
+{
+ VALUE *value = start;
+
+ while (value < end) {
+ rb_gc_mark(*value);
+ value++;
+ }
+}
+#endif
+
+#ifndef HAVE_RB_HASH_BULK_INSERT
+// For TruffleRuby
+void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
+{
+ long index = 0;
+ while (index < count) {
+ VALUE name = pairs[index++];
+ VALUE value = pairs[index++];
+ rb_hash_aset(hash, name, value);
+ }
+ RB_GC_GUARD(hash);
+}
+#endif
+
+/* name cache */
+
+#include <string.h>
+#include <ctype.h>
+
+// Object names are likely to be repeated, and are frozen.
+// As such we can re-use them if we keep a cache of the ones we've seen so far,
+// and save much more expensive lookups into the global fstring table.
+// This cache implementation is deliberately simple, as we're optimizing for compactness,
+// to be able to fit safely on the stack.
+// As such, binary search into a sorted array gives a good tradeoff between compactness and
+// performance.
+#define JSON_RVALUE_CACHE_CAPA 63
+typedef struct rvalue_cache_struct {
+ int length;
+ VALUE entries[JSON_RVALUE_CACHE_CAPA];
+} rvalue_cache;
+
+static rb_encoding *enc_utf8;
+
+#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
+
+static inline VALUE build_interned_string(const char *str, const long length)
+{
+# ifdef HAVE_RB_ENC_INTERNED_STR
+ return rb_enc_interned_str(str, length, enc_utf8);
+# else
+ VALUE rstring = rb_utf8_str_new(str, length);
+ return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
+# endif
+}
+
+static inline VALUE build_symbol(const char *str, const long length)
+{
+ return rb_str_intern(build_interned_string(str, length));
+}
+
+static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
+{
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
+ cache->length++;
+ cache->entries[index] = rstring;
+}
+
+static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
+{
+ long rstring_length = RSTRING_LEN(rstring);
+ if (length == rstring_length) {
+ return memcmp(str, RSTRING_PTR(rstring), length);
+ } else {
+ return (int)(length - rstring_length);
+ }
+}
+
+static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
+{
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
+ // Common names aren't likely to be very long. So we just don't
+ // cache names above an arbitrary threshold.
+ return Qfalse;
+ }
+
+ if (RB_UNLIKELY(!isalpha(str[0]))) {
+ // Simple heuristic, if the first character isn't a letter,
+ // we're much less likely to see this string again.
+ // We mostly want to cache strings that are likely to be repeated.
+ return Qfalse;
+ }
+
+ int low = 0;
+ int high = cache->length - 1;
+ int mid = 0;
+ int last_cmp = 0;
+
+ while (low <= high) {
+ mid = (high + low) >> 1;
+ VALUE entry = cache->entries[mid];
+ last_cmp = rstring_cache_cmp(str, length, entry);
+
+ if (last_cmp == 0) {
+ return entry;
+ } else if (last_cmp > 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
+ // We assume the overwhelming majority of names don't need to be escaped.
+ // But if they do, we have to fallback to the slow path.
+ return Qfalse;
+ }
+
+ VALUE rstring = build_interned_string(str, length);
+
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
+ if (last_cmp > 0) {
+ mid += 1;
+ }
+
+ rvalue_cache_insert_at(cache, mid, rstring);
+ }
+ return rstring;
+}
+
+static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
+{
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
+ // Common names aren't likely to be very long. So we just don't
+ // cache names above an arbitrary threshold.
+ return Qfalse;
+ }
+
+ if (RB_UNLIKELY(!isalpha(str[0]))) {
+ // Simple heuristic, if the first character isn't a letter,
+ // we're much less likely to see this string again.
+ // We mostly want to cache strings that are likely to be repeated.
+ return Qfalse;
+ }
+
+ int low = 0;
+ int high = cache->length - 1;
+ int mid = 0;
+ int last_cmp = 0;
+
+ while (low <= high) {
+ mid = (high + low) >> 1;
+ VALUE entry = cache->entries[mid];
+ last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
+
+ if (last_cmp == 0) {
+ return entry;
+ } else if (last_cmp > 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
+ // We assume the overwhelming majority of names don't need to be escaped.
+ // But if they do, we have to fallback to the slow path.
+ return Qfalse;
+ }
+
+ VALUE rsymbol = build_symbol(str, length);
+
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
+ if (last_cmp > 0) {
+ mid += 1;
+ }
+
+ rvalue_cache_insert_at(cache, mid, rsymbol);
+ }
+ return rsymbol;
+}
+
+/* rvalue stack */
+
+#define RVALUE_STACK_INITIAL_CAPA 128
+
+enum rvalue_stack_type {
+ RVALUE_STACK_HEAP_ALLOCATED = 0,
+ RVALUE_STACK_STACK_ALLOCATED = 1,
+};
+
+typedef struct rvalue_stack_struct {
+ enum rvalue_stack_type type;
+ long capa;
+ long head;
+ VALUE *ptr;
+} rvalue_stack;
+
+static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
+
+static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
+{
+ long required = stack->capa * 2;
+
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
+ stack = rvalue_stack_spill(stack, handle, stack_ref);
+ } else {
+ REALLOC_N(stack->ptr, VALUE, required);
+ stack->capa = required;
+ }
+ return stack;
+}
+
+static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
+{
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
+ stack = rvalue_stack_grow(stack, handle, stack_ref);
+ }
+ stack->ptr[stack->head] = value;
+ stack->head++;
+}
+
+static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
+{
+ return stack->ptr + (stack->head - count);
+}
+
+static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
+{
+ stack->head -= count;
+}
+
+static void rvalue_stack_mark(void *ptr)
+{
+ rvalue_stack *stack = (rvalue_stack *)ptr;
+ rb_gc_mark_locations(stack->ptr, stack->ptr + stack->head);
+}
+
+static void rvalue_stack_free(void *ptr)
+{
+ rvalue_stack *stack = (rvalue_stack *)ptr;
+ if (stack) {
+ ruby_xfree(stack->ptr);
+ ruby_xfree(stack);
+ }
+}
+
+static size_t rvalue_stack_memsize(const void *ptr)
+{
+ const rvalue_stack *stack = (const rvalue_stack *)ptr;
+ return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
+}
+
+static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
+ "JSON::Ext::Parser/rvalue_stack",
+ {
+ .dmark = rvalue_stack_mark,
+ .dfree = rvalue_stack_free,
+ .dsize = rvalue_stack_memsize,
+ },
+ 0, 0,
+ RUBY_TYPED_FREE_IMMEDIATELY,
+};
+
+static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
+{
+ rvalue_stack *stack;
+ *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
+ *stack_ref = stack;
+ MEMCPY(stack, old_stack, rvalue_stack, 1);
+
+ stack->capa = old_stack->capa << 1;
+ stack->ptr = ALLOC_N(VALUE, stack->capa);
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
+ MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
+ return stack;
+}
+
+static void rvalue_stack_eagerly_release(VALUE handle)
+{
+ rvalue_stack *stack;
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
+ RTYPEDDATA_DATA(handle) = NULL;
+ rvalue_stack_free(stack);
+}
/* unicode */
@@ -69,6 +372,50 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
return len;
}
+typedef struct JSON_ParserStruct {
+ VALUE Vsource;
+ char *source;
+ long len;
+ char *memo;
+ VALUE create_id;
+ VALUE object_class;
+ VALUE array_class;
+ VALUE decimal_class;
+ VALUE match_string;
+ FBuffer fbuffer;
+ int max_nesting;
+ bool allow_nan;
+ bool allow_trailing_comma;
+ bool parsing_name;
+ bool symbolize_names;
+ bool freeze;
+ bool create_additions;
+ bool deprecated_create_additions;
+ rvalue_cache name_cache;
+ rvalue_stack *stack;
+ VALUE stack_handle;
+} JSON_Parser;
+
+#define GET_PARSER \
+ GET_PARSER_INIT; \
+ if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance")
+
+#define GET_PARSER_INIT \
+ JSON_Parser *json; \
+ TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json)
+
+#define MinusInfinity "-Infinity"
+#define EVIL 0x666
+
+static const rb_data_type_t JSON_Parser_type;
+static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
+static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
+static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
+
+
#define PARSE_ERROR_FRAGMENT_LEN 32
#ifdef RBIMPL_ATTR_NORETURN
RBIMPL_ATTR_NORETURN()
@@ -86,60 +433,49 @@ static void raise_parse_error(const char *format, const char *start)
ptr = buffer;
}
- rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr);
+ rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
}
-static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
-static VALUE CNaN, CInfinity, CMinusInfinity;
-
-static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
- i_object_class, i_array_class, i_decimal_class,
- i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
- i_leftshift, i_new, i_try_convert, i_freeze, i_uminus, i_encode;
-static int binary_encindex;
-static int utf8_encindex;
+#line 464 "parser.rl"
-#line 129 "parser.rl"
-
-
-#line 111 "parser.c"
+#line 446 "parser.c"
enum {JSON_object_start = 1};
-enum {JSON_object_first_final = 27};
+enum {JSON_object_first_final = 32};
enum {JSON_object_error = 0};
enum {JSON_object_en_main = 1};
-#line 171 "parser.rl"
+#line 504 "parser.rl"
+
+#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack)
static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
{
int cs = EVIL;
- VALUE last_name = Qnil;
- VALUE object_class = json->object_class;
if (json->max_nesting && current_nesting > json->max_nesting) {
rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
}
- *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
+ long stack_head = json->stack->head;
-#line 135 "parser.c"
+#line 470 "parser.c"
{
cs = JSON_object_start;
}
-#line 186 "parser.rl"
+#line 519 "parser.rl"
-#line 142 "parser.c"
+#line 477 "parser.c"
{
+ short _widec;
if ( p == pe )
goto _test_eof;
switch ( cs )
@@ -159,27 +495,30 @@ case 2:
case 13: goto st2;
case 32: goto st2;
case 34: goto tr2;
- case 47: goto st23;
+ case 47: goto st28;
case 125: goto tr4;
}
if ( 9 <= (*p) && (*p) <= 10 )
goto st2;
goto st0;
tr2:
-#line 153 "parser.rl"
+#line 483 "parser.rl"
{
char *np;
- json->parsing_name = 1;
- np = JSON_parse_string(json, p, pe, &last_name);
- json->parsing_name = 0;
- if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;}
+ json->parsing_name = true;
+ np = JSON_parse_string(json, p, pe, result);
+ json->parsing_name = false;
+ if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {
+ PUSH(*result);
+ {p = (( np))-1;}
+ }
}
goto st3;
st3:
if ( ++p == pe )
goto _test_eof3;
case 3:
-#line 183 "parser.c"
+#line 522 "parser.c"
switch( (*p) ) {
case 13: goto st3;
case 32: goto st3;
@@ -230,7 +569,7 @@ case 8:
case 32: goto st8;
case 34: goto tr11;
case 45: goto tr11;
- case 47: goto st19;
+ case 47: goto st24;
case 73: goto tr11;
case 78: goto tr11;
case 91: goto tr11;
@@ -246,19 +585,12 @@ case 8:
goto st8;
goto st0;
tr11:
-#line 137 "parser.rl"
+#line 472 "parser.rl"
{
- VALUE v = Qnil;
- char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
+ char *np = JSON_parse_value(json, p, pe, result, current_nesting);
if (np == NULL) {
p--; {p++; cs = 9; goto _out;}
} else {
- if (NIL_P(json->object_class)) {
- OBJ_FREEZE(last_name);
- rb_hash_aset(*result, last_name, v);
- } else {
- rb_funcall(*result, i_aset, 2, last_name, v);
- }
{p = (( np))-1;}
}
}
@@ -267,16 +599,75 @@ st9:
if ( ++p == pe )
goto _test_eof9;
case 9:
-#line 271 "parser.c"
- switch( (*p) ) {
- case 13: goto st9;
- case 32: goto st9;
- case 44: goto st10;
- case 47: goto st15;
+#line 603 "parser.c"
+ _widec = (*p);
+ if ( (*p) < 13 ) {
+ if ( (*p) > 9 ) {
+ if ( 10 <= (*p) && (*p) <= 10 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) >= 9 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) > 13 ) {
+ if ( (*p) < 44 ) {
+ if ( 32 <= (*p) && (*p) <= 32 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) > 44 ) {
+ if ( 47 <= (*p) && (*p) <= 47 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ switch( _widec ) {
case 125: goto tr4;
- }
- if ( 9 <= (*p) && (*p) <= 10 )
- goto st9;
+ case 269: goto st10;
+ case 288: goto st10;
+ case 300: goto st11;
+ case 303: goto st16;
+ case 525: goto st9;
+ case 544: goto st9;
+ case 556: goto st2;
+ case 559: goto st20;
+ }
+ if ( _widec > 266 ) {
+ if ( 521 <= _widec && _widec <= 522 )
+ goto st9;
+ } else if ( _widec >= 265 )
+ goto st10;
+ goto st0;
+tr4:
+#line 494 "parser.rl"
+ { p--; {p++; cs = 32; goto _out;} }
+ goto st32;
+st32:
+ if ( ++p == pe )
+ goto _test_eof32;
+case 32:
+#line 671 "parser.c"
goto st0;
st10:
if ( ++p == pe )
@@ -285,8 +676,9 @@ case 10:
switch( (*p) ) {
case 13: goto st10;
case 32: goto st10;
- case 34: goto tr2;
- case 47: goto st11;
+ case 44: goto st11;
+ case 47: goto st16;
+ case 125: goto tr4;
}
if ( 9 <= (*p) && (*p) <= 10 )
goto st10;
@@ -296,139 +688,288 @@ st11:
goto _test_eof11;
case 11:
switch( (*p) ) {
- case 42: goto st12;
- case 47: goto st14;
+ case 13: goto st11;
+ case 32: goto st11;
+ case 34: goto tr2;
+ case 47: goto st12;
}
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st11;
goto st0;
st12:
if ( ++p == pe )
goto _test_eof12;
case 12:
- if ( (*p) == 42 )
- goto st13;
- goto st12;
+ switch( (*p) ) {
+ case 42: goto st13;
+ case 47: goto st15;
+ }
+ goto st0;
st13:
if ( ++p == pe )
goto _test_eof13;
case 13:
- switch( (*p) ) {
- case 42: goto st13;
- case 47: goto st10;
- }
- goto st12;
+ if ( (*p) == 42 )
+ goto st14;
+ goto st13;
st14:
if ( ++p == pe )
goto _test_eof14;
case 14:
- if ( (*p) == 10 )
- goto st10;
- goto st14;
+ switch( (*p) ) {
+ case 42: goto st14;
+ case 47: goto st11;
+ }
+ goto st13;
st15:
if ( ++p == pe )
goto _test_eof15;
case 15:
- switch( (*p) ) {
- case 42: goto st16;
- case 47: goto st18;
- }
- goto st0;
+ if ( (*p) == 10 )
+ goto st11;
+ goto st15;
st16:
if ( ++p == pe )
goto _test_eof16;
case 16:
- if ( (*p) == 42 )
- goto st17;
- goto st16;
+ switch( (*p) ) {
+ case 42: goto st17;
+ case 47: goto st19;
+ }
+ goto st0;
st17:
if ( ++p == pe )
goto _test_eof17;
case 17:
- switch( (*p) ) {
- case 42: goto st17;
- case 47: goto st9;
- }
- goto st16;
+ if ( (*p) == 42 )
+ goto st18;
+ goto st17;
st18:
if ( ++p == pe )
goto _test_eof18;
case 18:
- if ( (*p) == 10 )
- goto st9;
- goto st18;
-tr4:
-#line 161 "parser.rl"
- { p--; {p++; cs = 27; goto _out;} }
- goto st27;
-st27:
- if ( ++p == pe )
- goto _test_eof27;
-case 27:
-#line 367 "parser.c"
- goto st0;
+ switch( (*p) ) {
+ case 42: goto st18;
+ case 47: goto st10;
+ }
+ goto st17;
st19:
if ( ++p == pe )
goto _test_eof19;
case 19:
- switch( (*p) ) {
- case 42: goto st20;
- case 47: goto st22;
- }
- goto st0;
+ if ( (*p) == 10 )
+ goto st10;
+ goto st19;
st20:
if ( ++p == pe )
goto _test_eof20;
case 20:
- if ( (*p) == 42 )
- goto st21;
- goto st20;
+ _widec = (*p);
+ if ( (*p) > 42 ) {
+ if ( 47 <= (*p) && (*p) <= 47 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) >= 42 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ switch( _widec ) {
+ case 298: goto st17;
+ case 303: goto st19;
+ case 554: goto st21;
+ case 559: goto st23;
+ }
+ goto st0;
st21:
if ( ++p == pe )
goto _test_eof21;
case 21:
- switch( (*p) ) {
- case 42: goto st21;
- case 47: goto st8;
- }
- goto st20;
+ _widec = (*p);
+ if ( (*p) < 42 ) {
+ if ( (*p) <= 41 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) > 42 ) {
+ if ( 43 <= (*p) )
+ { _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ switch( _widec ) {
+ case 298: goto st18;
+ case 554: goto st22;
+ }
+ if ( _widec > 383 ) {
+ if ( 384 <= _widec && _widec <= 639 )
+ goto st21;
+ } else if ( _widec >= 128 )
+ goto st17;
+ goto st0;
st22:
if ( ++p == pe )
goto _test_eof22;
case 22:
- if ( (*p) == 10 )
- goto st8;
- goto st22;
+ _widec = (*p);
+ if ( (*p) < 43 ) {
+ if ( (*p) > 41 ) {
+ if ( 42 <= (*p) && (*p) <= 42 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) > 46 ) {
+ if ( (*p) > 47 ) {
+ if ( 48 <= (*p) )
+ { _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) >= 47 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ switch( _widec ) {
+ case 298: goto st18;
+ case 303: goto st10;
+ case 554: goto st22;
+ case 559: goto st9;
+ }
+ if ( _widec > 383 ) {
+ if ( 384 <= _widec && _widec <= 639 )
+ goto st21;
+ } else if ( _widec >= 128 )
+ goto st17;
+ goto st0;
st23:
if ( ++p == pe )
goto _test_eof23;
case 23:
- switch( (*p) ) {
- case 42: goto st24;
- case 47: goto st26;
- }
+ _widec = (*p);
+ if ( (*p) < 10 ) {
+ if ( (*p) <= 9 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) > 10 ) {
+ if ( 11 <= (*p) )
+ { _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 481 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ switch( _widec ) {
+ case 266: goto st10;
+ case 522: goto st9;
+ }
+ if ( _widec > 383 ) {
+ if ( 384 <= _widec && _widec <= 639 )
+ goto st23;
+ } else if ( _widec >= 128 )
+ goto st19;
goto st0;
st24:
if ( ++p == pe )
goto _test_eof24;
case 24:
- if ( (*p) == 42 )
- goto st25;
- goto st24;
+ switch( (*p) ) {
+ case 42: goto st25;
+ case 47: goto st27;
+ }
+ goto st0;
st25:
if ( ++p == pe )
goto _test_eof25;
case 25:
- switch( (*p) ) {
- case 42: goto st25;
- case 47: goto st2;
- }
- goto st24;
+ if ( (*p) == 42 )
+ goto st26;
+ goto st25;
st26:
if ( ++p == pe )
goto _test_eof26;
case 26:
+ switch( (*p) ) {
+ case 42: goto st26;
+ case 47: goto st8;
+ }
+ goto st25;
+st27:
+ if ( ++p == pe )
+ goto _test_eof27;
+case 27:
+ if ( (*p) == 10 )
+ goto st8;
+ goto st27;
+st28:
+ if ( ++p == pe )
+ goto _test_eof28;
+case 28:
+ switch( (*p) ) {
+ case 42: goto st29;
+ case 47: goto st31;
+ }
+ goto st0;
+st29:
+ if ( ++p == pe )
+ goto _test_eof29;
+case 29:
+ if ( (*p) == 42 )
+ goto st30;
+ goto st29;
+st30:
+ if ( ++p == pe )
+ goto _test_eof30;
+case 30:
+ switch( (*p) ) {
+ case 42: goto st30;
+ case 47: goto st2;
+ }
+ goto st29;
+st31:
+ if ( ++p == pe )
+ goto _test_eof31;
+case 31:
if ( (*p) == 10 )
goto st2;
- goto st26;
+ goto st31;
}
_test_eof2: cs = 2; goto _test_eof;
_test_eof3: cs = 3; goto _test_eof;
@@ -438,6 +979,7 @@ case 26:
_test_eof7: cs = 7; goto _test_eof;
_test_eof8: cs = 8; goto _test_eof;
_test_eof9: cs = 9; goto _test_eof;
+ _test_eof32: cs = 32; goto _test_eof;
_test_eof10: cs = 10; goto _test_eof;
_test_eof11: cs = 11; goto _test_eof;
_test_eof12: cs = 12; goto _test_eof;
@@ -447,7 +989,6 @@ case 26:
_test_eof16: cs = 16; goto _test_eof;
_test_eof17: cs = 17; goto _test_eof;
_test_eof18: cs = 18; goto _test_eof;
- _test_eof27: cs = 27; goto _test_eof;
_test_eof19: cs = 19; goto _test_eof;
_test_eof20: cs = 20; goto _test_eof;
_test_eof21: cs = 21; goto _test_eof;
@@ -456,20 +997,49 @@ case 26:
_test_eof24: cs = 24; goto _test_eof;
_test_eof25: cs = 25; goto _test_eof;
_test_eof26: cs = 26; goto _test_eof;
+ _test_eof27: cs = 27; goto _test_eof;
+ _test_eof28: cs = 28; goto _test_eof;
+ _test_eof29: cs = 29; goto _test_eof;
+ _test_eof30: cs = 30; goto _test_eof;
+ _test_eof31: cs = 31; goto _test_eof;
_test_eof: {}
_out: {}
}
-#line 187 "parser.rl"
+#line 520 "parser.rl"
if (cs >= JSON_object_first_final) {
- if (json->create_additions) {
+ long count = json->stack->head - stack_head;
+
+ if (RB_UNLIKELY(json->object_class)) {
+ VALUE object = rb_class_new_instance(0, 0, json->object_class);
+ long index = 0;
+ VALUE *items = rvalue_stack_peek(json->stack, count);
+ while (index < count) {
+ VALUE name = items[index++];
+ VALUE value = items[index++];
+ rb_funcall(object, i_aset, 2, name, value);
+ }
+ *result = object;
+ } else {
+ VALUE hash;
+#ifdef HAVE_RB_HASH_NEW_CAPA
+ hash = rb_hash_new_capa(count >> 1);
+#else
+ hash = rb_hash_new();
+#endif
+ rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash);
+ *result = hash;
+ }
+ rvalue_stack_pop(json->stack, count);
+
+ if (RB_UNLIKELY(json->create_additions)) {
VALUE klassname;
- if (NIL_P(json->object_class)) {
- klassname = rb_hash_aref(*result, json->create_id);
+ if (json->object_class) {
+ klassname = rb_funcall(*result, i_aref, 1, json->create_id);
} else {
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
+ klassname = rb_hash_aref(*result, json->create_id);
}
if (!NIL_P(klassname)) {
VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
@@ -488,8 +1058,7 @@ case 26:
}
-
-#line 493 "parser.c"
+#line 1062 "parser.c"
enum {JSON_value_start = 1};
enum {JSON_value_first_final = 29};
enum {JSON_value_error = 0};
@@ -497,7 +1066,7 @@ enum {JSON_value_error = 0};
enum {JSON_value_en_main = 1};
-#line 290 "parser.rl"
+#line 655 "parser.rl"
static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
@@ -505,14 +1074,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
int cs = EVIL;
-#line 509 "parser.c"
+#line 1078 "parser.c"
{
cs = JSON_value_start;
}
-#line 297 "parser.rl"
+#line 662 "parser.rl"
-#line 516 "parser.c"
+#line 1085 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -546,14 +1115,19 @@ st0:
cs = 0;
goto _out;
tr2:
-#line 242 "parser.rl"
+#line 598 "parser.rl"
{
char *np = JSON_parse_string(json, p, pe, result);
- if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;}
+ if (np == NULL) {
+ p--;
+ {p++; cs = 29; goto _out;}
+ } else {
+ {p = (( np))-1;}
+ }
}
goto st29;
tr3:
-#line 247 "parser.rl"
+#line 608 "parser.rl"
{
char *np;
if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) {
@@ -566,14 +1140,18 @@ tr3:
}
}
np = JSON_parse_float(json, p, pe, result);
- if (np != NULL) {p = (( np))-1;}
+ if (np != NULL) {
+ {p = (( np))-1;}
+ }
np = JSON_parse_integer(json, p, pe, result);
- if (np != NULL) {p = (( np))-1;}
+ if (np != NULL) {
+ {p = (( np))-1;}
+ }
p--; {p++; cs = 29; goto _out;}
}
goto st29;
tr7:
-#line 265 "parser.rl"
+#line 630 "parser.rl"
{
char *np;
np = JSON_parse_array(json, p, pe, result, current_nesting + 1);
@@ -581,7 +1159,7 @@ tr7:
}
goto st29;
tr11:
-#line 271 "parser.rl"
+#line 636 "parser.rl"
{
char *np;
np = JSON_parse_object(json, p, pe, result, current_nesting + 1);
@@ -589,7 +1167,7 @@ tr11:
}
goto st29;
tr25:
-#line 235 "parser.rl"
+#line 591 "parser.rl"
{
if (json->allow_nan) {
*result = CInfinity;
@@ -599,7 +1177,7 @@ tr25:
}
goto st29;
tr27:
-#line 228 "parser.rl"
+#line 584 "parser.rl"
{
if (json->allow_nan) {
*result = CNaN;
@@ -609,19 +1187,19 @@ tr27:
}
goto st29;
tr31:
-#line 222 "parser.rl"
+#line 578 "parser.rl"
{
*result = Qfalse;
}
goto st29;
tr34:
-#line 219 "parser.rl"
+#line 575 "parser.rl"
{
*result = Qnil;
}
goto st29;
tr37:
-#line 225 "parser.rl"
+#line 581 "parser.rl"
{
*result = Qtrue;
}
@@ -630,9 +1208,9 @@ st29:
if ( ++p == pe )
goto _test_eof29;
case 29:
-#line 277 "parser.rl"
+#line 642 "parser.rl"
{ p--; {p++; cs = 29; goto _out;} }
-#line 636 "parser.c"
+#line 1214 "parser.c"
switch( (*p) ) {
case 13: goto st29;
case 32: goto st29;
@@ -873,13 +1451,14 @@ case 28:
_out: {}
}
-#line 298 "parser.rl"
+#line 663 "parser.rl"
if (json->freeze) {
OBJ_FREEZE(*result);
}
if (cs >= JSON_value_first_final) {
+ PUSH(*result);
return p;
} else {
return NULL;
@@ -887,7 +1466,7 @@ case 28:
}
-#line 891 "parser.c"
+#line 1470 "parser.c"
enum {JSON_integer_start = 1};
enum {JSON_integer_first_final = 3};
enum {JSON_integer_error = 0};
@@ -895,7 +1474,7 @@ enum {JSON_integer_error = 0};
enum {JSON_integer_en_main = 1};
-#line 318 "parser.rl"
+#line 684 "parser.rl"
static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result)
@@ -903,15 +1482,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res
int cs = EVIL;
-#line 907 "parser.c"
+#line 1486 "parser.c"
{
cs = JSON_integer_start;
}
-#line 325 "parser.rl"
+#line 691 "parser.rl"
json->memo = p;
-#line 915 "parser.c"
+#line 1494 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -945,14 +1524,14 @@ case 3:
goto st0;
goto tr4;
tr4:
-#line 315 "parser.rl"
+#line 681 "parser.rl"
{ p--; {p++; cs = 4; goto _out;} }
goto st4;
st4:
if ( ++p == pe )
goto _test_eof4;
case 4:
-#line 956 "parser.c"
+#line 1535 "parser.c"
goto st0;
st5:
if ( ++p == pe )
@@ -971,7 +1550,7 @@ case 5:
_out: {}
}
-#line 327 "parser.rl"
+#line 693 "parser.rl"
if (cs >= JSON_integer_first_final) {
long len = p - json->memo;
@@ -986,7 +1565,7 @@ case 5:
}
-#line 990 "parser.c"
+#line 1569 "parser.c"
enum {JSON_float_start = 1};
enum {JSON_float_first_final = 8};
enum {JSON_float_error = 0};
@@ -994,7 +1573,7 @@ enum {JSON_float_error = 0};
enum {JSON_float_en_main = 1};
-#line 352 "parser.rl"
+#line 718 "parser.rl"
static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result)
@@ -1002,15 +1581,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
int cs = EVIL;
-#line 1006 "parser.c"
+#line 1585 "parser.c"
{
cs = JSON_float_start;
}
-#line 359 "parser.rl"
+#line 725 "parser.rl"
json->memo = p;
-#line 1014 "parser.c"
+#line 1593 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -1068,14 +1647,14 @@ case 8:
goto st0;
goto tr9;
tr9:
-#line 346 "parser.rl"
+#line 712 "parser.rl"
{ p--; {p++; cs = 9; goto _out;} }
goto st9;
st9:
if ( ++p == pe )
goto _test_eof9;
case 9:
-#line 1079 "parser.c"
+#line 1658 "parser.c"
goto st0;
st5:
if ( ++p == pe )
@@ -1136,12 +1715,12 @@ case 7:
_out: {}
}
-#line 361 "parser.rl"
+#line 727 "parser.rl"
if (cs >= JSON_float_first_final) {
VALUE mod = Qnil;
ID method_id = 0;
- if (!NIL_P(json->decimal_class)) {
+ if (json->decimal_class) {
if (rb_respond_to(json->decimal_class, i_try_convert)) {
mod = json->decimal_class;
method_id = i_try_convert;
@@ -1189,37 +1768,37 @@ case 7:
-#line 1193 "parser.c"
+#line 1772 "parser.c"
enum {JSON_array_start = 1};
-enum {JSON_array_first_final = 17};
+enum {JSON_array_first_final = 22};
enum {JSON_array_error = 0};
enum {JSON_array_en_main = 1};
-#line 441 "parser.rl"
+#line 804 "parser.rl"
static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
{
int cs = EVIL;
- VALUE array_class = json->array_class;
if (json->max_nesting && current_nesting > json->max_nesting) {
rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
}
- *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
+ long stack_head = json->stack->head;
-#line 1215 "parser.c"
+#line 1793 "parser.c"
{
cs = JSON_array_start;
}
-#line 454 "parser.rl"
+#line 816 "parser.rl"
-#line 1222 "parser.c"
+#line 1800 "parser.c"
{
+ short _widec;
if ( p == pe )
goto _test_eof;
switch ( cs )
@@ -1240,7 +1819,7 @@ case 2:
case 32: goto st2;
case 34: goto tr2;
case 45: goto tr2;
- case 47: goto st13;
+ case 47: goto st18;
case 73: goto tr2;
case 78: goto tr2;
case 91: goto tr2;
@@ -1257,18 +1836,13 @@ case 2:
goto st2;
goto st0;
tr2:
-#line 418 "parser.rl"
+#line 784 "parser.rl"
{
VALUE v = Qnil;
char *np = JSON_parse_value(json, p, pe, &v, current_nesting);
if (np == NULL) {
p--; {p++; cs = 3; goto _out;}
} else {
- if (NIL_P(json->array_class)) {
- rb_ary_push(*result, v);
- } else {
- rb_funcall(*result, i_leftshift, 1, v);
- }
{p = (( np))-1;}
}
}
@@ -1277,15 +1851,23 @@ st3:
if ( ++p == pe )
goto _test_eof3;
case 3:
-#line 1281 "parser.c"
- switch( (*p) ) {
+#line 1855 "parser.c"
+ _widec = (*p);
+ if ( 44 <= (*p) && (*p) <= 44 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ switch( _widec ) {
case 13: goto st3;
case 32: goto st3;
- case 44: goto st4;
- case 47: goto st9;
+ case 47: goto st4;
case 93: goto tr4;
+ case 300: goto st8;
+ case 556: goto st13;
}
- if ( 9 <= (*p) && (*p) <= 10 )
+ if ( 9 <= _widec && _widec <= 10 )
goto st3;
goto st0;
st4:
@@ -1293,57 +1875,67 @@ st4:
goto _test_eof4;
case 4:
switch( (*p) ) {
- case 13: goto st4;
- case 32: goto st4;
- case 34: goto tr2;
- case 45: goto tr2;
- case 47: goto st5;
- case 73: goto tr2;
- case 78: goto tr2;
- case 91: goto tr2;
- case 102: goto tr2;
- case 110: goto tr2;
- case 116: goto tr2;
- case 123: goto tr2;
+ case 42: goto st5;
+ case 47: goto st7;
}
- if ( (*p) > 10 ) {
- if ( 48 <= (*p) && (*p) <= 57 )
- goto tr2;
- } else if ( (*p) >= 9 )
- goto st4;
goto st0;
st5:
if ( ++p == pe )
goto _test_eof5;
case 5:
- switch( (*p) ) {
- case 42: goto st6;
- case 47: goto st8;
- }
- goto st0;
+ if ( (*p) == 42 )
+ goto st6;
+ goto st5;
st6:
if ( ++p == pe )
goto _test_eof6;
case 6:
- if ( (*p) == 42 )
- goto st7;
- goto st6;
+ switch( (*p) ) {
+ case 42: goto st6;
+ case 47: goto st3;
+ }
+ goto st5;
st7:
if ( ++p == pe )
goto _test_eof7;
case 7:
- switch( (*p) ) {
- case 42: goto st7;
- case 47: goto st4;
- }
- goto st6;
+ if ( (*p) == 10 )
+ goto st3;
+ goto st7;
+tr4:
+#line 796 "parser.rl"
+ { p--; {p++; cs = 22; goto _out;} }
+ goto st22;
+st22:
+ if ( ++p == pe )
+ goto _test_eof22;
+case 22:
+#line 1914 "parser.c"
+ goto st0;
st8:
if ( ++p == pe )
goto _test_eof8;
case 8:
- if ( (*p) == 10 )
- goto st4;
- goto st8;
+ switch( (*p) ) {
+ case 13: goto st8;
+ case 32: goto st8;
+ case 34: goto tr2;
+ case 45: goto tr2;
+ case 47: goto st9;
+ case 73: goto tr2;
+ case 78: goto tr2;
+ case 91: goto tr2;
+ case 102: goto tr2;
+ case 110: goto tr2;
+ case 116: goto tr2;
+ case 123: goto tr2;
+ }
+ if ( (*p) > 10 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr2;
+ } else if ( (*p) >= 9 )
+ goto st8;
+ goto st0;
st9:
if ( ++p == pe )
goto _test_eof9;
@@ -1366,7 +1958,7 @@ st11:
case 11:
switch( (*p) ) {
case 42: goto st11;
- case 47: goto st3;
+ case 47: goto st8;
}
goto st10;
st12:
@@ -1374,50 +1966,252 @@ st12:
goto _test_eof12;
case 12:
if ( (*p) == 10 )
- goto st3;
+ goto st8;
goto st12;
-tr4:
-#line 433 "parser.rl"
- { p--; {p++; cs = 17; goto _out;} }
- goto st17;
-st17:
- if ( ++p == pe )
- goto _test_eof17;
-case 17:
-#line 1388 "parser.c"
- goto st0;
st13:
if ( ++p == pe )
goto _test_eof13;
case 13:
- switch( (*p) ) {
- case 42: goto st14;
- case 47: goto st16;
- }
+ _widec = (*p);
+ if ( (*p) < 13 ) {
+ if ( (*p) > 9 ) {
+ if ( 10 <= (*p) && (*p) <= 10 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) >= 9 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) > 13 ) {
+ if ( (*p) > 32 ) {
+ if ( 47 <= (*p) && (*p) <= 47 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) >= 32 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ switch( _widec ) {
+ case 34: goto tr2;
+ case 45: goto tr2;
+ case 73: goto tr2;
+ case 78: goto tr2;
+ case 91: goto tr2;
+ case 93: goto tr4;
+ case 102: goto tr2;
+ case 110: goto tr2;
+ case 116: goto tr2;
+ case 123: goto tr2;
+ case 269: goto st8;
+ case 288: goto st8;
+ case 303: goto st9;
+ case 525: goto st13;
+ case 544: goto st13;
+ case 559: goto st14;
+ }
+ if ( _widec < 265 ) {
+ if ( 48 <= _widec && _widec <= 57 )
+ goto tr2;
+ } else if ( _widec > 266 ) {
+ if ( 521 <= _widec && _widec <= 522 )
+ goto st13;
+ } else
+ goto st8;
goto st0;
st14:
if ( ++p == pe )
goto _test_eof14;
case 14:
- if ( (*p) == 42 )
- goto st15;
- goto st14;
+ _widec = (*p);
+ if ( (*p) > 42 ) {
+ if ( 47 <= (*p) && (*p) <= 47 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) >= 42 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ switch( _widec ) {
+ case 298: goto st10;
+ case 303: goto st12;
+ case 554: goto st15;
+ case 559: goto st17;
+ }
+ goto st0;
st15:
if ( ++p == pe )
goto _test_eof15;
case 15:
- switch( (*p) ) {
- case 42: goto st15;
- case 47: goto st2;
- }
- goto st14;
+ _widec = (*p);
+ if ( (*p) < 42 ) {
+ if ( (*p) <= 41 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) > 42 ) {
+ if ( 43 <= (*p) )
+ { _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ switch( _widec ) {
+ case 298: goto st11;
+ case 554: goto st16;
+ }
+ if ( _widec > 383 ) {
+ if ( 384 <= _widec && _widec <= 639 )
+ goto st15;
+ } else if ( _widec >= 128 )
+ goto st10;
+ goto st0;
st16:
if ( ++p == pe )
goto _test_eof16;
case 16:
+ _widec = (*p);
+ if ( (*p) < 43 ) {
+ if ( (*p) > 41 ) {
+ if ( 42 <= (*p) && (*p) <= 42 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) > 46 ) {
+ if ( (*p) > 47 ) {
+ if ( 48 <= (*p) )
+ { _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) >= 47 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ switch( _widec ) {
+ case 298: goto st11;
+ case 303: goto st8;
+ case 554: goto st16;
+ case 559: goto st13;
+ }
+ if ( _widec > 383 ) {
+ if ( 384 <= _widec && _widec <= 639 )
+ goto st15;
+ } else if ( _widec >= 128 )
+ goto st10;
+ goto st0;
+st17:
+ if ( ++p == pe )
+ goto _test_eof17;
+case 17:
+ _widec = (*p);
+ if ( (*p) < 10 ) {
+ if ( (*p) <= 9 ) {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else if ( (*p) > 10 ) {
+ if ( 11 <= (*p) )
+ { _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ } else {
+ _widec = (short)(128 + ((*p) - -128));
+ if (
+#line 794 "parser.rl"
+ json->allow_trailing_comma ) _widec += 256;
+ }
+ switch( _widec ) {
+ case 266: goto st8;
+ case 522: goto st13;
+ }
+ if ( _widec > 383 ) {
+ if ( 384 <= _widec && _widec <= 639 )
+ goto st17;
+ } else if ( _widec >= 128 )
+ goto st12;
+ goto st0;
+st18:
+ if ( ++p == pe )
+ goto _test_eof18;
+case 18:
+ switch( (*p) ) {
+ case 42: goto st19;
+ case 47: goto st21;
+ }
+ goto st0;
+st19:
+ if ( ++p == pe )
+ goto _test_eof19;
+case 19:
+ if ( (*p) == 42 )
+ goto st20;
+ goto st19;
+st20:
+ if ( ++p == pe )
+ goto _test_eof20;
+case 20:
+ switch( (*p) ) {
+ case 42: goto st20;
+ case 47: goto st2;
+ }
+ goto st19;
+st21:
+ if ( ++p == pe )
+ goto _test_eof21;
+case 21:
if ( (*p) == 10 )
goto st2;
- goto st16;
+ goto st21;
}
_test_eof2: cs = 2; goto _test_eof;
_test_eof3: cs = 3; goto _test_eof;
@@ -1425,24 +2219,45 @@ case 16:
_test_eof5: cs = 5; goto _test_eof;
_test_eof6: cs = 6; goto _test_eof;
_test_eof7: cs = 7; goto _test_eof;
+ _test_eof22: cs = 22; goto _test_eof;
_test_eof8: cs = 8; goto _test_eof;
_test_eof9: cs = 9; goto _test_eof;
_test_eof10: cs = 10; goto _test_eof;
_test_eof11: cs = 11; goto _test_eof;
_test_eof12: cs = 12; goto _test_eof;
- _test_eof17: cs = 17; goto _test_eof;
_test_eof13: cs = 13; goto _test_eof;
_test_eof14: cs = 14; goto _test_eof;
_test_eof15: cs = 15; goto _test_eof;
_test_eof16: cs = 16; goto _test_eof;
+ _test_eof17: cs = 17; goto _test_eof;
+ _test_eof18: cs = 18; goto _test_eof;
+ _test_eof19: cs = 19; goto _test_eof;
+ _test_eof20: cs = 20; goto _test_eof;
+ _test_eof21: cs = 21; goto _test_eof;
_test_eof: {}
_out: {}
}
-#line 455 "parser.rl"
+#line 817 "parser.rl"
if(cs >= JSON_array_first_final) {
+ long count = json->stack->head - stack_head;
+
+ if (RB_UNLIKELY(json->array_class)) {
+ VALUE array = rb_class_new_instance(0, 0, json->array_class);
+ VALUE *items = rvalue_stack_peek(json->stack, count);
+ long index;
+ for (index = 0; index < count; index++) {
+ rb_funcall(array, i_leftshift, 1, items[index]);
+ }
+ *result = array;
+ } else {
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count));
+ *result = array;
+ }
+ rvalue_stack_pop(json->stack, count);
+
return p + 1;
} else {
raise_parse_error("unexpected token at '%s'", p);
@@ -1458,7 +2273,7 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
VALUE result;
# ifdef HAVE_RB_ENC_INTERNED_STR
if (intern) {
- result = rb_enc_interned_str(start, (long)(end - start), rb_utf8_encoding());
+ result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
} else {
result = rb_utf8_str_new(start, (long)(end - start));
}
@@ -1476,13 +2291,26 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
return result;
}
-static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize)
+static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
{
size_t bufferSize = stringEnd - string;
char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
int unescape_len;
char buf[4];
+ if (is_name) {
+ VALUE cached_key;
+ if (RB_UNLIKELY(symbolize)) {
+ cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
+ } else {
+ cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
+ }
+
+ if (RB_LIKELY(cached_key)) {
+ return cached_key;
+ }
+ }
+
pe = memchr(p, '\\', bufferSize);
if (RB_LIKELY(pe == NULL)) {
return build_string(string, stringEnd, intern, symbolize);
@@ -1585,7 +2413,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo
}
-#line 1589 "parser.c"
+#line 2417 "parser.c"
enum {JSON_string_start = 1};
enum {JSON_string_first_final = 8};
enum {JSON_string_error = 0};
@@ -1593,7 +2421,7 @@ enum {JSON_string_error = 0};
enum {JSON_string_en_main = 1};
-#line 617 "parser.rl"
+#line 1008 "parser.rl"
static int
@@ -1614,15 +2442,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu
VALUE match_string;
-#line 1618 "parser.c"
+#line 2446 "parser.c"
{
cs = JSON_string_start;
}
-#line 637 "parser.rl"
+#line 1028 "parser.rl"
json->memo = p;
-#line 1626 "parser.c"
+#line 2454 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -1647,9 +2475,9 @@ case 2:
goto st0;
goto st2;
tr2:
-#line 604 "parser.rl"
+#line 995 "parser.rl"
{
- *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
+ *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
if (NIL_P(*result)) {
p--;
{p++; cs = 8; goto _out;}
@@ -1657,14 +2485,14 @@ tr2:
{p = (( p + 1))-1;}
}
}
-#line 614 "parser.rl"
+#line 1005 "parser.rl"
{ p--; {p++; cs = 8; goto _out;} }
goto st8;
st8:
if ( ++p == pe )
goto _test_eof8;
case 8:
-#line 1668 "parser.c"
+#line 2496 "parser.c"
goto st0;
st3:
if ( ++p == pe )
@@ -1740,7 +2568,7 @@ case 7:
_out: {}
}
-#line 639 "parser.rl"
+#line 1030 "parser.rl"
if (json->create_additions && RTEST(match_string = json->match_string)) {
VALUE klass;
@@ -1776,7 +2604,7 @@ static VALUE convert_encoding(VALUE source)
{
int encindex = RB_ENCODING_GET(source);
- if (encindex == utf8_encindex) {
+ if (RB_LIKELY(encindex == utf8_encindex)) {
return source;
}
@@ -1788,6 +2616,68 @@ static VALUE convert_encoding(VALUE source)
return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
}
+static int configure_parser_i(VALUE key, VALUE val, VALUE data)
+{
+ JSON_Parser *json = (JSON_Parser *)data;
+
+ if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
+ else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); }
+ else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); }
+ else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); }
+ else if (key == sym_freeze) { json->freeze = RTEST(val); }
+ else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_create_additions) {
+ if (NIL_P(val)) {
+ json->create_additions = true;
+ json->deprecated_create_additions = true;
+ } else {
+ json->create_additions = RTEST(val);
+ json->deprecated_create_additions = false;
+ }
+ }
+
+ return ST_CONTINUE;
+}
+
+static void parser_init(JSON_Parser *json, VALUE source, VALUE opts)
+{
+ if (json->Vsource) {
+ rb_raise(rb_eTypeError, "already initialized instance");
+ }
+
+ json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
+ json->max_nesting = 100;
+
+ if (!NIL_P(opts)) {
+ Check_Type(opts, T_HASH);
+ if (RHASH_SIZE(opts) > 0) {
+ // We assume in most cases few keys are set so it's faster to go over
+ // the provided keys than to check all possible keys.
+ rb_hash_foreach(opts, configure_parser_i, (VALUE)json);
+
+ if (json->symbolize_names && json->create_additions) {
+ rb_raise(rb_eArgError,
+ "options :symbolize_names and :create_additions cannot be "
+ " used in conjunction");
+ }
+
+ if (json->create_additions && !json->create_id) {
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
+ }
+ }
+
+ }
+ source = convert_encoding(StringValue(source));
+ StringValue(source);
+ json->len = RSTRING_LEN(source);
+ json->source = RSTRING_PTR(source);
+ json->Vsource = source;
+}
+
/*
* call-seq: new(source, opts => {})
*
@@ -1822,122 +2712,16 @@ static VALUE convert_encoding(VALUE source)
*/
static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
{
- VALUE source, opts;
GET_PARSER_INIT;
- if (json->Vsource) {
- rb_raise(rb_eTypeError, "already initialized instance");
- }
-
rb_check_arity(argc, 1, 2);
- source = argv[0];
- opts = Qnil;
- if (argc == 2) {
- opts = argv[1];
- Check_Type(argv[1], T_HASH);
- if (RHASH_SIZE(argv[1]) > 0) {
- opts = argv[1];
- }
- }
- if (!NIL_P(opts)) {
- VALUE tmp = ID2SYM(i_max_nesting);
- if (option_given_p(opts, tmp)) {
- VALUE max_nesting = rb_hash_aref(opts, tmp);
- if (RTEST(max_nesting)) {
- Check_Type(max_nesting, T_FIXNUM);
- json->max_nesting = FIX2INT(max_nesting);
- } else {
- json->max_nesting = 0;
- }
- } else {
- json->max_nesting = 100;
- }
- tmp = ID2SYM(i_allow_nan);
- if (option_given_p(opts, tmp)) {
- json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
- } else {
- json->allow_nan = 0;
- }
- tmp = ID2SYM(i_symbolize_names);
- if (option_given_p(opts, tmp)) {
- json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
- } else {
- json->symbolize_names = 0;
- }
- tmp = ID2SYM(i_freeze);
- if (option_given_p(opts, tmp)) {
- json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
- } else {
- json->freeze = 0;
- }
- tmp = ID2SYM(i_create_additions);
- if (option_given_p(opts, tmp)) {
- tmp = rb_hash_aref(opts, tmp);
- if (NIL_P(tmp)) {
- json->create_additions = 1;
- json->deprecated_create_additions = 1;
- } else {
- json->create_additions = RTEST(tmp);
- json->deprecated_create_additions = 0;
- }
- }
-
- if (json->symbolize_names && json->create_additions) {
- rb_raise(rb_eArgError,
- "options :symbolize_names and :create_additions cannot be "
- " used in conjunction");
- }
- tmp = ID2SYM(i_create_id);
- if (option_given_p(opts, tmp)) {
- json->create_id = rb_hash_aref(opts, tmp);
- } else {
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
- }
- tmp = ID2SYM(i_object_class);
- if (option_given_p(opts, tmp)) {
- json->object_class = rb_hash_aref(opts, tmp);
- } else {
- json->object_class = Qnil;
- }
- tmp = ID2SYM(i_array_class);
- if (option_given_p(opts, tmp)) {
- json->array_class = rb_hash_aref(opts, tmp);
- } else {
- json->array_class = Qnil;
- }
- tmp = ID2SYM(i_decimal_class);
- if (option_given_p(opts, tmp)) {
- json->decimal_class = rb_hash_aref(opts, tmp);
- } else {
- json->decimal_class = Qnil;
- }
- tmp = ID2SYM(i_match_string);
- if (option_given_p(opts, tmp)) {
- VALUE match_string = rb_hash_aref(opts, tmp);
- json->match_string = RTEST(match_string) ? match_string : Qnil;
- } else {
- json->match_string = Qnil;
- }
- } else {
- json->max_nesting = 100;
- json->allow_nan = 0;
- json->create_additions = 0;
- json->create_id = Qnil;
- json->object_class = Qnil;
- json->array_class = Qnil;
- json->decimal_class = Qnil;
- }
- source = convert_encoding(StringValue(source));
- StringValue(source);
- json->len = RSTRING_LEN(source);
- json->source = RSTRING_PTR(source);
- json->Vsource = source;
+ parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil);
return self;
}
-#line 1941 "parser.c"
+#line 2725 "parser.c"
enum {JSON_start = 1};
enum {JSON_first_final = 10};
enum {JSON_error = 0};
@@ -1945,7 +2729,7 @@ enum {JSON_error = 0};
enum {JSON_en_main = 1};
-#line 849 "parser.rl"
+#line 1196 "parser.rl"
/*
@@ -1962,17 +2746,28 @@ static VALUE cParser_parse(VALUE self)
VALUE result = Qnil;
GET_PARSER;
+ char stack_buffer[FBUFFER_STACK_SIZE];
+ fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
+
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
+ rvalue_stack stack = {
+ .type = RVALUE_STACK_STACK_ALLOCATED,
+ .ptr = rvalue_stack_buffer,
+ .capa = RVALUE_STACK_INITIAL_CAPA,
+ };
+ json->stack = &stack;
-#line 1967 "parser.c"
+
+#line 2762 "parser.c"
{
cs = JSON_start;
}
-#line 866 "parser.rl"
+#line 1224 "parser.rl"
p = json->source;
pe = p + json->len;
-#line 1976 "parser.c"
+#line 2771 "parser.c"
{
if ( p == pe )
goto _test_eof;
@@ -2006,7 +2801,7 @@ st0:
cs = 0;
goto _out;
tr2:
-#line 841 "parser.rl"
+#line 1188 "parser.rl"
{
char *np = JSON_parse_value(json, p, pe, &result, 0);
if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
@@ -2016,7 +2811,7 @@ st10:
if ( ++p == pe )
goto _test_eof10;
case 10:
-#line 2020 "parser.c"
+#line 2815 "parser.c"
switch( (*p) ) {
case 13: goto st10;
case 32: goto st10;
@@ -2105,7 +2900,11 @@ case 9:
_out: {}
}
-#line 869 "parser.rl"
+#line 1227 "parser.rl"
+
+ if (json->stack_handle) {
+ rvalue_stack_eagerly_release(json->stack_handle);
+ }
if (cs >= JSON_first_final && p == pe) {
return result;
@@ -2115,18 +2914,183 @@ case 9:
}
}
-#ifndef HAVE_RB_GC_MARK_LOCATIONS
-// For TruffleRuby
-void rb_gc_mark_locations(const VALUE *start, const VALUE *end)
+static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts)
{
- VALUE *value = start;
+ char *p, *pe;
+ int cs = EVIL;
+ VALUE result = Qnil;
- while (value < end) {
- rb_gc_mark(*value);
- value++;
+ JSON_Parser _parser = {0};
+ JSON_Parser *json = &_parser;
+ parser_init(json, source, opts);
+
+ char stack_buffer[FBUFFER_STACK_SIZE];
+ fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
+
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
+ rvalue_stack stack = {
+ .type = RVALUE_STACK_STACK_ALLOCATED,
+ .ptr = rvalue_stack_buffer,
+ .capa = RVALUE_STACK_INITIAL_CAPA,
+ };
+ json->stack = &stack;
+
+
+#line 2940 "parser.c"
+ {
+ cs = JSON_start;
+ }
+
+#line 1262 "parser.rl"
+ p = json->source;
+ pe = p + json->len;
+
+#line 2949 "parser.c"
+ {
+ if ( p == pe )
+ goto _test_eof;
+ switch ( cs )
+ {
+st1:
+ if ( ++p == pe )
+ goto _test_eof1;
+case 1:
+ switch( (*p) ) {
+ case 13: goto st1;
+ case 32: goto st1;
+ case 34: goto tr2;
+ case 45: goto tr2;
+ case 47: goto st6;
+ case 73: goto tr2;
+ case 78: goto tr2;
+ case 91: goto tr2;
+ case 102: goto tr2;
+ case 110: goto tr2;
+ case 116: goto tr2;
+ case 123: goto tr2;
+ }
+ if ( (*p) > 10 ) {
+ if ( 48 <= (*p) && (*p) <= 57 )
+ goto tr2;
+ } else if ( (*p) >= 9 )
+ goto st1;
+ goto st0;
+st0:
+cs = 0;
+ goto _out;
+tr2:
+#line 1188 "parser.rl"
+ {
+ char *np = JSON_parse_value(json, p, pe, &result, 0);
+ if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;}
+ }
+ goto st10;
+st10:
+ if ( ++p == pe )
+ goto _test_eof10;
+case 10:
+#line 2993 "parser.c"
+ switch( (*p) ) {
+ case 13: goto st10;
+ case 32: goto st10;
+ case 47: goto st2;
+ }
+ if ( 9 <= (*p) && (*p) <= 10 )
+ goto st10;
+ goto st0;
+st2:
+ if ( ++p == pe )
+ goto _test_eof2;
+case 2:
+ switch( (*p) ) {
+ case 42: goto st3;
+ case 47: goto st5;
+ }
+ goto st0;
+st3:
+ if ( ++p == pe )
+ goto _test_eof3;
+case 3:
+ if ( (*p) == 42 )
+ goto st4;
+ goto st3;
+st4:
+ if ( ++p == pe )
+ goto _test_eof4;
+case 4:
+ switch( (*p) ) {
+ case 42: goto st4;
+ case 47: goto st10;
+ }
+ goto st3;
+st5:
+ if ( ++p == pe )
+ goto _test_eof5;
+case 5:
+ if ( (*p) == 10 )
+ goto st10;
+ goto st5;
+st6:
+ if ( ++p == pe )
+ goto _test_eof6;
+case 6:
+ switch( (*p) ) {
+ case 42: goto st7;
+ case 47: goto st9;
+ }
+ goto st0;
+st7:
+ if ( ++p == pe )
+ goto _test_eof7;
+case 7:
+ if ( (*p) == 42 )
+ goto st8;
+ goto st7;
+st8:
+ if ( ++p == pe )
+ goto _test_eof8;
+case 8:
+ switch( (*p) ) {
+ case 42: goto st8;
+ case 47: goto st1;
+ }
+ goto st7;
+st9:
+ if ( ++p == pe )
+ goto _test_eof9;
+case 9:
+ if ( (*p) == 10 )
+ goto st1;
+ goto st9;
+ }
+ _test_eof1: cs = 1; goto _test_eof;
+ _test_eof10: cs = 10; goto _test_eof;
+ _test_eof2: cs = 2; goto _test_eof;
+ _test_eof3: cs = 3; goto _test_eof;
+ _test_eof4: cs = 4; goto _test_eof;
+ _test_eof5: cs = 5; goto _test_eof;
+ _test_eof6: cs = 6; goto _test_eof;
+ _test_eof7: cs = 7; goto _test_eof;
+ _test_eof8: cs = 8; goto _test_eof;
+ _test_eof9: cs = 9; goto _test_eof;
+
+ _test_eof: {}
+ _out: {}
+ }
+
+#line 1265 "parser.rl"
+
+ if (json->stack_handle) {
+ rvalue_stack_eagerly_release(json->stack_handle);
+ }
+
+ if (cs >= JSON_first_final && p == pe) {
+ return result;
+ } else {
+ raise_parse_error("unexpected token at '%s'", p);
+ return Qnil;
}
}
-#endif
static void JSON_mark(void *ptr)
{
@@ -2137,6 +3101,8 @@ static void JSON_mark(void *ptr)
rb_gc_mark(json->array_class);
rb_gc_mark(json->decimal_class);
rb_gc_mark(json->match_string);
+ rb_gc_mark(json->stack_handle);
+
const VALUE *name_cache_entries = &json->name_cache.entries[0];
rb_gc_mark_locations(name_cache_entries, name_cache_entries + json->name_cache.length);
}
@@ -2199,6 +3165,8 @@ void Init_parser(void)
rb_define_method(cParser, "parse", cParser_parse, 0);
rb_define_method(cParser, "source", cParser_source, 0);
+ rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
+
CNaN = rb_const_get(mJSON, rb_intern("NaN"));
rb_gc_register_mark_object(CNaN);
@@ -2211,31 +3179,35 @@ void Init_parser(void)
rb_global_variable(&Encoding_UTF_8);
Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
+ sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
+ sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
+ sym_freeze = ID2SYM(rb_intern("freeze"));
+ sym_create_additions = ID2SYM(rb_intern("create_additions"));
+ sym_create_id = ID2SYM(rb_intern("create_id"));
+ sym_object_class = ID2SYM(rb_intern("object_class"));
+ sym_array_class = ID2SYM(rb_intern("array_class"));
+ sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
+ sym_match_string = ID2SYM(rb_intern("match_string"));
+
+ i_create_id = rb_intern("create_id");
i_json_creatable_p = rb_intern("json_creatable?");
i_json_create = rb_intern("json_create");
- i_create_id = rb_intern("create_id");
- i_create_additions = rb_intern("create_additions");
i_chr = rb_intern("chr");
- i_max_nesting = rb_intern("max_nesting");
- i_allow_nan = rb_intern("allow_nan");
- i_symbolize_names = rb_intern("symbolize_names");
- i_object_class = rb_intern("object_class");
- i_array_class = rb_intern("array_class");
- i_decimal_class = rb_intern("decimal_class");
i_match = rb_intern("match");
- i_match_string = rb_intern("match_string");
i_deep_const_get = rb_intern("deep_const_get");
i_aset = rb_intern("[]=");
i_aref = rb_intern("[]");
i_leftshift = rb_intern("<<");
i_new = rb_intern("new");
i_try_convert = rb_intern("try_convert");
- i_freeze = rb_intern("freeze");
i_uminus = rb_intern("-@");
i_encode = rb_intern("encode");
binary_encindex = rb_ascii8bit_encindex();
utf8_encindex = rb_utf8_encindex();
+ enc_utf8 = rb_utf8_encoding();
}
/*
diff --git a/ext/json/parser/parser.h b/ext/json/parser/parser.h
deleted file mode 100644
index d1863a2b9a..0000000000
--- a/ext/json/parser/parser.h
+++ /dev/null
@@ -1,78 +0,0 @@
-#ifndef _PARSER_H_
-#define _PARSER_H_
-
-#include "ruby.h"
-
-/* This is the fallback definition from Ruby 3.4 */
-#ifndef RBIMPL_STDBOOL_H
-#if defined(__cplusplus)
-# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L)
-# include <cstdbool>
-# endif
-#elif defined(HAVE_STDBOOL_H)
-# include <stdbool.h>
-#elif !defined(HAVE__BOOL)
-typedef unsigned char _Bool;
-# define bool _Bool
-# define true ((_Bool)+1)
-# define false ((_Bool)+0)
-# define __bool_true_false_are_defined
-#endif
-#endif
-
-#ifndef MAYBE_UNUSED
-# define MAYBE_UNUSED(x) x
-#endif
-
-#define option_given_p(opts, key) (rb_hash_lookup2(opts, key, Qundef) != Qundef)
-
-typedef struct JSON_ParserStruct {
- VALUE Vsource;
- char *source;
- long len;
- char *memo;
- VALUE create_id;
- VALUE object_class;
- VALUE array_class;
- VALUE decimal_class;
- VALUE match_string;
- FBuffer fbuffer;
- int max_nesting;
- char allow_nan;
- char parsing_name;
- char symbolize_names;
- char freeze;
- char create_additions;
- char deprecated_create_additions;
-} JSON_Parser;
-
-#define GET_PARSER \
- GET_PARSER_INIT; \
- if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance")
-#define GET_PARSER_INIT \
- JSON_Parser *json; \
- TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json)
-
-#define MinusInfinity "-Infinity"
-#define EVIL 0x666
-
-static uint32_t unescape_unicode(const unsigned char *p);
-static int convert_UTF32_to_UTF8(char *buf, uint32_t ch);
-static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
-static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
-static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
-static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
-static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
-static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize);
-static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
-static VALUE convert_encoding(VALUE source);
-static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self);
-static VALUE cParser_parse(VALUE self);
-static void JSON_mark(void *json);
-static void JSON_free(void *json);
-static VALUE cJSON_parser_s_allocate(VALUE klass);
-static VALUE cParser_source(VALUE self);
-
-static const rb_data_type_t JSON_Parser_type;
-
-#endif
diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl
index 15ec2b6843..6d4cc7a5b0 100644
--- a/ext/json/parser/parser.rl
+++ b/ext/json/parser/parser.rl
@@ -1,5 +1,308 @@
+#include "ruby.h"
#include "../fbuffer/fbuffer.h"
-#include "parser.h"
+
+static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
+static VALUE CNaN, CInfinity, CMinusInfinity;
+
+static ID i_json_creatable_p, i_json_create, i_create_id,
+ i_chr, i_deep_const_get, i_match, i_aset, i_aref,
+ i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
+
+static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
+ sym_create_additions, sym_create_id, sym_object_class, sym_array_class,
+ sym_decimal_class, sym_match_string;
+
+static int binary_encindex;
+static int utf8_encindex;
+
+#ifndef HAVE_RB_GC_MARK_LOCATIONS
+// For TruffleRuby
+void rb_gc_mark_locations(const VALUE *start, const VALUE *end)
+{
+ VALUE *value = start;
+
+ while (value < end) {
+ rb_gc_mark(*value);
+ value++;
+ }
+}
+#endif
+
+#ifndef HAVE_RB_HASH_BULK_INSERT
+// For TruffleRuby
+void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash)
+{
+ long index = 0;
+ while (index < count) {
+ VALUE name = pairs[index++];
+ VALUE value = pairs[index++];
+ rb_hash_aset(hash, name, value);
+ }
+ RB_GC_GUARD(hash);
+}
+#endif
+
+/* name cache */
+
+#include <string.h>
+#include <ctype.h>
+
+// Object names are likely to be repeated, and are frozen.
+// As such we can re-use them if we keep a cache of the ones we've seen so far,
+// and save much more expensive lookups into the global fstring table.
+// This cache implementation is deliberately simple, as we're optimizing for compactness,
+// to be able to fit safely on the stack.
+// As such, binary search into a sorted array gives a good tradeoff between compactness and
+// performance.
+#define JSON_RVALUE_CACHE_CAPA 63
+typedef struct rvalue_cache_struct {
+ int length;
+ VALUE entries[JSON_RVALUE_CACHE_CAPA];
+} rvalue_cache;
+
+static rb_encoding *enc_utf8;
+
+#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
+
+static inline VALUE build_interned_string(const char *str, const long length)
+{
+# ifdef HAVE_RB_ENC_INTERNED_STR
+ return rb_enc_interned_str(str, length, enc_utf8);
+# else
+ VALUE rstring = rb_utf8_str_new(str, length);
+ return rb_funcall(rb_str_freeze(rstring), i_uminus, 0);
+# endif
+}
+
+static inline VALUE build_symbol(const char *str, const long length)
+{
+ return rb_str_intern(build_interned_string(str, length));
+}
+
+static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring)
+{
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
+ cache->length++;
+ cache->entries[index] = rstring;
+}
+
+static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
+{
+ long rstring_length = RSTRING_LEN(rstring);
+ if (length == rstring_length) {
+ return memcmp(str, RSTRING_PTR(rstring), length);
+ } else {
+ return (int)(length - rstring_length);
+ }
+}
+
+static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
+{
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
+ // Common names aren't likely to be very long. So we just don't
+ // cache names above an arbitrary threshold.
+ return Qfalse;
+ }
+
+ if (RB_UNLIKELY(!isalpha(str[0]))) {
+ // Simple heuristic, if the first character isn't a letter,
+ // we're much less likely to see this string again.
+ // We mostly want to cache strings that are likely to be repeated.
+ return Qfalse;
+ }
+
+ int low = 0;
+ int high = cache->length - 1;
+ int mid = 0;
+ int last_cmp = 0;
+
+ while (low <= high) {
+ mid = (high + low) >> 1;
+ VALUE entry = cache->entries[mid];
+ last_cmp = rstring_cache_cmp(str, length, entry);
+
+ if (last_cmp == 0) {
+ return entry;
+ } else if (last_cmp > 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
+ // We assume the overwhelming majority of names don't need to be escaped.
+ // But if they do, we have to fallback to the slow path.
+ return Qfalse;
+ }
+
+ VALUE rstring = build_interned_string(str, length);
+
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
+ if (last_cmp > 0) {
+ mid += 1;
+ }
+
+ rvalue_cache_insert_at(cache, mid, rstring);
+ }
+ return rstring;
+}
+
+static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length)
+{
+ if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
+ // Common names aren't likely to be very long. So we just don't
+ // cache names above an arbitrary threshold.
+ return Qfalse;
+ }
+
+ if (RB_UNLIKELY(!isalpha(str[0]))) {
+ // Simple heuristic, if the first character isn't a letter,
+ // we're much less likely to see this string again.
+ // We mostly want to cache strings that are likely to be repeated.
+ return Qfalse;
+ }
+
+ int low = 0;
+ int high = cache->length - 1;
+ int mid = 0;
+ int last_cmp = 0;
+
+ while (low <= high) {
+ mid = (high + low) >> 1;
+ VALUE entry = cache->entries[mid];
+ last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
+
+ if (last_cmp == 0) {
+ return entry;
+ } else if (last_cmp > 0) {
+ low = mid + 1;
+ } else {
+ high = mid - 1;
+ }
+ }
+
+ if (RB_UNLIKELY(memchr(str, '\\', length))) {
+ // We assume the overwhelming majority of names don't need to be escaped.
+ // But if they do, we have to fallback to the slow path.
+ return Qfalse;
+ }
+
+ VALUE rsymbol = build_symbol(str, length);
+
+ if (cache->length < JSON_RVALUE_CACHE_CAPA) {
+ if (last_cmp > 0) {
+ mid += 1;
+ }
+
+ rvalue_cache_insert_at(cache, mid, rsymbol);
+ }
+ return rsymbol;
+}
+
+/* rvalue stack */
+
+#define RVALUE_STACK_INITIAL_CAPA 128
+
+enum rvalue_stack_type {
+ RVALUE_STACK_HEAP_ALLOCATED = 0,
+ RVALUE_STACK_STACK_ALLOCATED = 1,
+};
+
+typedef struct rvalue_stack_struct {
+ enum rvalue_stack_type type;
+ long capa;
+ long head;
+ VALUE *ptr;
+} rvalue_stack;
+
+static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref);
+
+static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref)
+{
+ long required = stack->capa * 2;
+
+ if (stack->type == RVALUE_STACK_STACK_ALLOCATED) {
+ stack = rvalue_stack_spill(stack, handle, stack_ref);
+ } else {
+ REALLOC_N(stack->ptr, VALUE, required);
+ stack->capa = required;
+ }
+ return stack;
+}
+
+static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref)
+{
+ if (RB_UNLIKELY(stack->head >= stack->capa)) {
+ stack = rvalue_stack_grow(stack, handle, stack_ref);
+ }
+ stack->ptr[stack->head] = value;
+ stack->head++;
+}
+
+static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count)
+{
+ return stack->ptr + (stack->head - count);
+}
+
+static inline void rvalue_stack_pop(rvalue_stack *stack, long count)
+{
+ stack->head -= count;
+}
+
+static void rvalue_stack_mark(void *ptr)
+{
+ rvalue_stack *stack = (rvalue_stack *)ptr;
+ rb_gc_mark_locations(stack->ptr, stack->ptr + stack->head);
+}
+
+static void rvalue_stack_free(void *ptr)
+{
+ rvalue_stack *stack = (rvalue_stack *)ptr;
+ if (stack) {
+ ruby_xfree(stack->ptr);
+ ruby_xfree(stack);
+ }
+}
+
+static size_t rvalue_stack_memsize(const void *ptr)
+{
+ const rvalue_stack *stack = (const rvalue_stack *)ptr;
+ return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa;
+}
+
+static const rb_data_type_t JSON_Parser_rvalue_stack_type = {
+ "JSON::Ext::Parser/rvalue_stack",
+ {
+ .dmark = rvalue_stack_mark,
+ .dfree = rvalue_stack_free,
+ .dsize = rvalue_stack_memsize,
+ },
+ 0, 0,
+ RUBY_TYPED_FREE_IMMEDIATELY,
+};
+
+static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref)
+{
+ rvalue_stack *stack;
+ *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
+ *stack_ref = stack;
+ MEMCPY(stack, old_stack, rvalue_stack, 1);
+
+ stack->capa = old_stack->capa << 1;
+ stack->ptr = ALLOC_N(VALUE, stack->capa);
+ stack->type = RVALUE_STACK_HEAP_ALLOCATED;
+ MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head);
+ return stack;
+}
+
+static void rvalue_stack_eagerly_release(VALUE handle)
+{
+ rvalue_stack *stack;
+ TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack);
+ RTYPEDDATA_DATA(handle) = NULL;
+ rvalue_stack_free(stack);
+}
/* unicode */
@@ -67,6 +370,50 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
return len;
}
+typedef struct JSON_ParserStruct {
+ VALUE Vsource;
+ char *source;
+ long len;
+ char *memo;
+ VALUE create_id;
+ VALUE object_class;
+ VALUE array_class;
+ VALUE decimal_class;
+ VALUE match_string;
+ FBuffer fbuffer;
+ int max_nesting;
+ bool allow_nan;
+ bool allow_trailing_comma;
+ bool parsing_name;
+ bool symbolize_names;
+ bool freeze;
+ bool create_additions;
+ bool deprecated_create_additions;
+ rvalue_cache name_cache;
+ rvalue_stack *stack;
+ VALUE stack_handle;
+} JSON_Parser;
+
+#define GET_PARSER \
+ GET_PARSER_INIT; \
+ if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance")
+
+#define GET_PARSER_INIT \
+ JSON_Parser *json; \
+ TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json)
+
+#define MinusInfinity "-Infinity"
+#define EVIL 0x666
+
+static const rb_data_type_t JSON_Parser_type;
+static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
+static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
+static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result);
+static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting);
+
+
#define PARSE_ERROR_FRAGMENT_LEN 32
#ifdef RBIMPL_ATTR_NORETURN
RBIMPL_ATTR_NORETURN()
@@ -84,21 +431,9 @@ static void raise_parse_error(const char *format, const char *start)
ptr = buffer;
}
- rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr);
+ rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr);
}
-static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8;
-static VALUE CNaN, CInfinity, CMinusInfinity;
-
-static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions,
- i_chr, i_max_nesting, i_allow_nan, i_symbolize_names,
- i_object_class, i_array_class, i_decimal_class,
- i_deep_const_get, i_match, i_match_string, i_aset, i_aref,
- i_leftshift, i_new, i_try_convert, i_freeze, i_uminus, i_encode;
-
-static int binary_encindex;
-static int utf8_encindex;
-
%%{
machine JSON_common;
@@ -135,27 +470,25 @@ static int utf8_encindex;
write data;
action parse_value {
- VALUE v = Qnil;
- char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting);
+ char *np = JSON_parse_value(json, fpc, pe, result, current_nesting);
if (np == NULL) {
fhold; fbreak;
} else {
- if (NIL_P(json->object_class)) {
- OBJ_FREEZE(last_name);
- rb_hash_aset(*result, last_name, v);
- } else {
- rb_funcall(*result, i_aset, 2, last_name, v);
- }
fexec np;
}
}
+ action allow_trailing_comma { json->allow_trailing_comma }
+
action parse_name {
char *np;
- json->parsing_name = 1;
- np = JSON_parse_string(json, fpc, pe, &last_name);
- json->parsing_name = 0;
- if (np == NULL) { fhold; fbreak; } else fexec np;
+ json->parsing_name = true;
+ np = JSON_parse_string(json, fpc, pe, result);
+ json->parsing_name = false;
+ if (np == NULL) { fhold; fbreak; } else {
+ PUSH(*result);
+ fexec np;
+ }
}
action exit { fhold; fbreak; }
@@ -165,33 +498,57 @@ static int utf8_encindex;
main := (
begin_object
- (pair (next_pair)*)? ignore*
+ (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore*
end_object
) @exit;
}%%
+#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack)
+
static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
{
int cs = EVIL;
- VALUE last_name = Qnil;
- VALUE object_class = json->object_class;
if (json->max_nesting && current_nesting > json->max_nesting) {
rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
}
- *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class);
+ long stack_head = json->stack->head;
%% write init;
%% write exec;
if (cs >= JSON_object_first_final) {
- if (json->create_additions) {
+ long count = json->stack->head - stack_head;
+
+ if (RB_UNLIKELY(json->object_class)) {
+ VALUE object = rb_class_new_instance(0, 0, json->object_class);
+ long index = 0;
+ VALUE *items = rvalue_stack_peek(json->stack, count);
+ while (index < count) {
+ VALUE name = items[index++];
+ VALUE value = items[index++];
+ rb_funcall(object, i_aset, 2, name, value);
+ }
+ *result = object;
+ } else {
+ VALUE hash;
+#ifdef HAVE_RB_HASH_NEW_CAPA
+ hash = rb_hash_new_capa(count >> 1);
+#else
+ hash = rb_hash_new();
+#endif
+ rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash);
+ *result = hash;
+ }
+ rvalue_stack_pop(json->stack, count);
+
+ if (RB_UNLIKELY(json->create_additions)) {
VALUE klassname;
- if (NIL_P(json->object_class)) {
- klassname = rb_hash_aref(*result, json->create_id);
+ if (json->object_class) {
+ klassname = rb_funcall(*result, i_aref, 1, json->create_id);
} else {
- klassname = rb_funcall(*result, i_aref, 1, json->create_id);
+ klassname = rb_hash_aref(*result, json->create_id);
}
if (!NIL_P(klassname)) {
VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname);
@@ -209,7 +566,6 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
}
}
-
%%{
machine JSON_value;
include JSON_common;
@@ -241,7 +597,12 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
}
action parse_string {
char *np = JSON_parse_string(json, fpc, pe, result);
- if (np == NULL) { fhold; fbreak; } else fexec np;
+ if (np == NULL) {
+ fhold;
+ fbreak;
+ } else {
+ fexec np;
+ }
}
action parse_number {
@@ -256,9 +617,13 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu
}
}
np = JSON_parse_float(json, fpc, pe, result);
- if (np != NULL) fexec np;
+ if (np != NULL) {
+ fexec np;
+ }
np = JSON_parse_integer(json, fpc, pe, result);
- if (np != NULL) fexec np;
+ if (np != NULL) {
+ fexec np;
+ }
fhold; fbreak;
}
@@ -301,6 +666,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul
}
if (cs >= JSON_value_first_final) {
+ PUSH(*result);
return p;
} else {
return NULL;
@@ -362,7 +728,7 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
if (cs >= JSON_float_first_final) {
VALUE mod = Qnil;
ID method_id = 0;
- if (!NIL_P(json->decimal_class)) {
+ if (json->decimal_class) {
if (rb_respond_to(json->decimal_class, i_try_convert)) {
mod = json->decimal_class;
method_id = i_try_convert;
@@ -421,39 +787,51 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul
if (np == NULL) {
fhold; fbreak;
} else {
- if (NIL_P(json->array_class)) {
- rb_ary_push(*result, v);
- } else {
- rb_funcall(*result, i_leftshift, 1, v);
- }
fexec np;
}
}
+ action allow_trailing_comma { json->allow_trailing_comma }
+
action exit { fhold; fbreak; }
next_element = value_separator ignore* begin_value >parse_value;
main := begin_array ignore*
((begin_value >parse_value ignore*)
- (ignore* next_element ignore*)*)?
+ (ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)?
end_array @exit;
}%%
static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting)
{
int cs = EVIL;
- VALUE array_class = json->array_class;
if (json->max_nesting && current_nesting > json->max_nesting) {
rb_raise(eNestingError, "nesting of %d is too deep", current_nesting);
}
- *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class);
+ long stack_head = json->stack->head;
%% write init;
%% write exec;
if(cs >= JSON_array_first_final) {
+ long count = json->stack->head - stack_head;
+
+ if (RB_UNLIKELY(json->array_class)) {
+ VALUE array = rb_class_new_instance(0, 0, json->array_class);
+ VALUE *items = rvalue_stack_peek(json->stack, count);
+ long index;
+ for (index = 0; index < count; index++) {
+ rb_funcall(array, i_leftshift, 1, items[index]);
+ }
+ *result = array;
+ } else {
+ VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count));
+ *result = array;
+ }
+ rvalue_stack_pop(json->stack, count);
+
return p + 1;
} else {
raise_parse_error("unexpected token at '%s'", p);
@@ -469,7 +847,7 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
VALUE result;
# ifdef HAVE_RB_ENC_INTERNED_STR
if (intern) {
- result = rb_enc_interned_str(start, (long)(end - start), rb_utf8_encoding());
+ result = rb_enc_interned_str(start, (long)(end - start), enc_utf8);
} else {
result = rb_utf8_str_new(start, (long)(end - start));
}
@@ -487,13 +865,26 @@ static inline VALUE build_string(const char *start, const char *end, bool intern
return result;
}
-static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize)
+static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize)
{
size_t bufferSize = stringEnd - string;
char *p = string, *pe = string, *unescape, *bufferStart, *buffer;
int unescape_len;
char buf[4];
+ if (is_name) {
+ VALUE cached_key;
+ if (RB_UNLIKELY(symbolize)) {
+ cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize);
+ } else {
+ cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize);
+ }
+
+ if (RB_LIKELY(cached_key)) {
+ return cached_key;
+ }
+ }
+
pe = memchr(p, '\\', bufferSize);
if (RB_LIKELY(pe == NULL)) {
return build_string(string, stringEnd, intern, symbolize);
@@ -602,7 +993,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo
write data;
action parse_string {
- *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
+ *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names);
if (NIL_P(*result)) {
fhold;
fbreak;
@@ -671,7 +1062,7 @@ static VALUE convert_encoding(VALUE source)
{
int encindex = RB_ENCODING_GET(source);
- if (encindex == utf8_encindex) {
+ if (RB_LIKELY(encindex == utf8_encindex)) {
return source;
}
@@ -683,6 +1074,68 @@ static VALUE convert_encoding(VALUE source)
return rb_funcall(source, i_encode, 1, Encoding_UTF_8);
}
+static int configure_parser_i(VALUE key, VALUE val, VALUE data)
+{
+ JSON_Parser *json = (JSON_Parser *)data;
+
+ if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; }
+ else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); }
+ else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); }
+ else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); }
+ else if (key == sym_freeze) { json->freeze = RTEST(val); }
+ else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; }
+ else if (key == sym_create_additions) {
+ if (NIL_P(val)) {
+ json->create_additions = true;
+ json->deprecated_create_additions = true;
+ } else {
+ json->create_additions = RTEST(val);
+ json->deprecated_create_additions = false;
+ }
+ }
+
+ return ST_CONTINUE;
+}
+
+static void parser_init(JSON_Parser *json, VALUE source, VALUE opts)
+{
+ if (json->Vsource) {
+ rb_raise(rb_eTypeError, "already initialized instance");
+ }
+
+ json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
+ json->max_nesting = 100;
+
+ if (!NIL_P(opts)) {
+ Check_Type(opts, T_HASH);
+ if (RHASH_SIZE(opts) > 0) {
+ // We assume in most cases few keys are set so it's faster to go over
+ // the provided keys than to check all possible keys.
+ rb_hash_foreach(opts, configure_parser_i, (VALUE)json);
+
+ if (json->symbolize_names && json->create_additions) {
+ rb_raise(rb_eArgError,
+ "options :symbolize_names and :create_additions cannot be "
+ " used in conjunction");
+ }
+
+ if (json->create_additions && !json->create_id) {
+ json->create_id = rb_funcall(mJSON, i_create_id, 0);
+ }
+ }
+
+ }
+ source = convert_encoding(StringValue(source));
+ StringValue(source);
+ json->len = RSTRING_LEN(source);
+ json->source = RSTRING_PTR(source);
+ json->Vsource = source;
+}
+
/*
* call-seq: new(source, opts => {})
*
@@ -717,117 +1170,11 @@ static VALUE convert_encoding(VALUE source)
*/
static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self)
{
- VALUE source, opts;
GET_PARSER_INIT;
- if (json->Vsource) {
- rb_raise(rb_eTypeError, "already initialized instance");
- }
-
rb_check_arity(argc, 1, 2);
- source = argv[0];
- opts = Qnil;
- if (argc == 2) {
- opts = argv[1];
- Check_Type(argv[1], T_HASH);
- if (RHASH_SIZE(argv[1]) > 0) {
- opts = argv[1];
- }
- }
-
- if (!NIL_P(opts)) {
- VALUE tmp = ID2SYM(i_max_nesting);
- if (option_given_p(opts, tmp)) {
- VALUE max_nesting = rb_hash_aref(opts, tmp);
- if (RTEST(max_nesting)) {
- Check_Type(max_nesting, T_FIXNUM);
- json->max_nesting = FIX2INT(max_nesting);
- } else {
- json->max_nesting = 0;
- }
- } else {
- json->max_nesting = 100;
- }
- tmp = ID2SYM(i_allow_nan);
- if (option_given_p(opts, tmp)) {
- json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
- } else {
- json->allow_nan = 0;
- }
- tmp = ID2SYM(i_symbolize_names);
- if (option_given_p(opts, tmp)) {
- json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
- } else {
- json->symbolize_names = 0;
- }
- tmp = ID2SYM(i_freeze);
- if (option_given_p(opts, tmp)) {
- json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0;
- } else {
- json->freeze = 0;
- }
- tmp = ID2SYM(i_create_additions);
- if (option_given_p(opts, tmp)) {
- tmp = rb_hash_aref(opts, tmp);
- if (NIL_P(tmp)) {
- json->create_additions = 1;
- json->deprecated_create_additions = 1;
- } else {
- json->create_additions = RTEST(tmp);
- json->deprecated_create_additions = 0;
- }
- }
- if (json->symbolize_names && json->create_additions) {
- rb_raise(rb_eArgError,
- "options :symbolize_names and :create_additions cannot be "
- " used in conjunction");
- }
- tmp = ID2SYM(i_create_id);
- if (option_given_p(opts, tmp)) {
- json->create_id = rb_hash_aref(opts, tmp);
- } else {
- json->create_id = rb_funcall(mJSON, i_create_id, 0);
- }
- tmp = ID2SYM(i_object_class);
- if (option_given_p(opts, tmp)) {
- json->object_class = rb_hash_aref(opts, tmp);
- } else {
- json->object_class = Qnil;
- }
- tmp = ID2SYM(i_array_class);
- if (option_given_p(opts, tmp)) {
- json->array_class = rb_hash_aref(opts, tmp);
- } else {
- json->array_class = Qnil;
- }
- tmp = ID2SYM(i_decimal_class);
- if (option_given_p(opts, tmp)) {
- json->decimal_class = rb_hash_aref(opts, tmp);
- } else {
- json->decimal_class = Qnil;
- }
- tmp = ID2SYM(i_match_string);
- if (option_given_p(opts, tmp)) {
- VALUE match_string = rb_hash_aref(opts, tmp);
- json->match_string = RTEST(match_string) ? match_string : Qnil;
- } else {
- json->match_string = Qnil;
- }
- } else {
- json->max_nesting = 100;
- json->allow_nan = 0;
- json->create_additions = 0;
- json->create_id = Qnil;
- json->object_class = Qnil;
- json->array_class = Qnil;
- json->decimal_class = Qnil;
- }
- source = convert_encoding(StringValue(source));
- StringValue(source);
- json->len = RSTRING_LEN(source);
- json->source = RSTRING_PTR(source);
- json->Vsource = source;
+ parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil);
return self;
}
@@ -862,11 +1209,26 @@ static VALUE cParser_parse(VALUE self)
VALUE result = Qnil;
GET_PARSER;
+ char stack_buffer[FBUFFER_STACK_SIZE];
+ fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
+
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
+ rvalue_stack stack = {
+ .type = RVALUE_STACK_STACK_ALLOCATED,
+ .ptr = rvalue_stack_buffer,
+ .capa = RVALUE_STACK_INITIAL_CAPA,
+ };
+ json->stack = &stack;
+
%% write init;
p = json->source;
pe = p + json->len;
%% write exec;
+ if (json->stack_handle) {
+ rvalue_stack_eagerly_release(json->stack_handle);
+ }
+
if (cs >= JSON_first_final && p == pe) {
return result;
} else {
@@ -875,18 +1237,43 @@ static VALUE cParser_parse(VALUE self)
}
}
-#ifndef HAVE_RB_GC_MARK_LOCATIONS
-// For TruffleRuby
-void rb_gc_mark_locations(const VALUE *start, const VALUE *end)
+static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts)
{
- VALUE *value = start;
+ char *p, *pe;
+ int cs = EVIL;
+ VALUE result = Qnil;
- while (value < end) {
- rb_gc_mark(*value);
- value++;
+ JSON_Parser _parser = {0};
+ JSON_Parser *json = &_parser;
+ parser_init(json, source, opts);
+
+ char stack_buffer[FBUFFER_STACK_SIZE];
+ fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE);
+
+ VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA];
+ rvalue_stack stack = {
+ .type = RVALUE_STACK_STACK_ALLOCATED,
+ .ptr = rvalue_stack_buffer,
+ .capa = RVALUE_STACK_INITIAL_CAPA,
+ };
+ json->stack = &stack;
+
+ %% write init;
+ p = json->source;
+ pe = p + json->len;
+ %% write exec;
+
+ if (json->stack_handle) {
+ rvalue_stack_eagerly_release(json->stack_handle);
+ }
+
+ if (cs >= JSON_first_final && p == pe) {
+ return result;
+ } else {
+ raise_parse_error("unexpected token at '%s'", p);
+ return Qnil;
}
}
-#endif
static void JSON_mark(void *ptr)
{
@@ -897,6 +1284,8 @@ static void JSON_mark(void *ptr)
rb_gc_mark(json->array_class);
rb_gc_mark(json->decimal_class);
rb_gc_mark(json->match_string);
+ rb_gc_mark(json->stack_handle);
+
const VALUE *name_cache_entries = &json->name_cache.entries[0];
rb_gc_mark_locations(name_cache_entries, name_cache_entries + json->name_cache.length);
}
@@ -959,6 +1348,8 @@ void Init_parser(void)
rb_define_method(cParser, "parse", cParser_parse, 0);
rb_define_method(cParser, "source", cParser_source, 0);
+ rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2);
+
CNaN = rb_const_get(mJSON, rb_intern("NaN"));
rb_gc_register_mark_object(CNaN);
@@ -971,31 +1362,35 @@ void Init_parser(void)
rb_global_variable(&Encoding_UTF_8);
Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8"));
+ sym_max_nesting = ID2SYM(rb_intern("max_nesting"));
+ sym_allow_nan = ID2SYM(rb_intern("allow_nan"));
+ sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma"));
+ sym_symbolize_names = ID2SYM(rb_intern("symbolize_names"));
+ sym_freeze = ID2SYM(rb_intern("freeze"));
+ sym_create_additions = ID2SYM(rb_intern("create_additions"));
+ sym_create_id = ID2SYM(rb_intern("create_id"));
+ sym_object_class = ID2SYM(rb_intern("object_class"));
+ sym_array_class = ID2SYM(rb_intern("array_class"));
+ sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
+ sym_match_string = ID2SYM(rb_intern("match_string"));
+
+ i_create_id = rb_intern("create_id");
i_json_creatable_p = rb_intern("json_creatable?");
i_json_create = rb_intern("json_create");
- i_create_id = rb_intern("create_id");
- i_create_additions = rb_intern("create_additions");
i_chr = rb_intern("chr");
- i_max_nesting = rb_intern("max_nesting");
- i_allow_nan = rb_intern("allow_nan");
- i_symbolize_names = rb_intern("symbolize_names");
- i_object_class = rb_intern("object_class");
- i_array_class = rb_intern("array_class");
- i_decimal_class = rb_intern("decimal_class");
i_match = rb_intern("match");
- i_match_string = rb_intern("match_string");
i_deep_const_get = rb_intern("deep_const_get");
i_aset = rb_intern("[]=");
i_aref = rb_intern("[]");
i_leftshift = rb_intern("<<");
i_new = rb_intern("new");
i_try_convert = rb_intern("try_convert");
- i_freeze = rb_intern("freeze");
i_uminus = rb_intern("-@");
i_encode = rb_intern("encode");
binary_encindex = rb_ascii8bit_encindex();
utf8_encindex = rb_utf8_encindex();
+ enc_utf8 = rb_utf8_encoding();
}
/*
diff --git a/test/json/fixtures/fail4.json b/test/json/fixtures/fail4.json
deleted file mode 100644
index 9de168bf34..0000000000
--- a/test/json/fixtures/fail4.json
+++ /dev/null
@@ -1 +0,0 @@
-["extra comma",] \ No newline at end of file
diff --git a/test/json/fixtures/fail9.json b/test/json/fixtures/fail9.json
deleted file mode 100644
index 5815574f36..0000000000
--- a/test/json/fixtures/fail9.json
+++ /dev/null
@@ -1 +0,0 @@
-{"Extra comma": true,} \ No newline at end of file
diff --git a/test/json/json_common_interface_test.rb b/test/json/json_common_interface_test.rb
index e552412bfd..6165cc0411 100644
--- a/test/json/json_common_interface_test.rb
+++ b/test/json/json_common_interface_test.rb
@@ -52,11 +52,11 @@ class JSONCommonInterfaceTest < Test::Unit::TestCase
end
def test_generator
- assert_match(/::Generator\z/, JSON.generator.name)
+ assert_match(/::(TruffleRuby)?Generator\z/, JSON.generator.name)
end
def test_state
- assert_match(/::Generator::State\z/, JSON.state.name)
+ assert_match(/::(TruffleRuby)?Generator::State\z/, JSON.state.name)
end
def test_create_id
diff --git a/test/json/json_ext_parser_test.rb b/test/json/json_ext_parser_test.rb
index 9db8ae772f..da61504989 100644
--- a/test/json/json_ext_parser_test.rb
+++ b/test/json/json_ext_parser_test.rb
@@ -2,53 +2,51 @@
require_relative 'test_helper'
class JSONExtParserTest < Test::Unit::TestCase
- if defined?(JSON::Ext::Parser)
- include JSON
-
- def test_allocate
- parser = JSON::Ext::Parser.new("{}")
- assert_raise(TypeError, '[ruby-core:35079]') do
- parser.__send__(:initialize, "{}")
- end
- parser = JSON::Ext::Parser.allocate
- assert_raise(TypeError, '[ruby-core:35079]') { parser.source }
- end
+ include JSON
- def test_error_messages
- ex = assert_raise(ParserError) { parse('Infinity') }
- assert_equal "unexpected token at 'Infinity'", ex.message
+ def test_allocate
+ parser = JSON::Ext::Parser.new("{}")
+ assert_raise(TypeError, '[ruby-core:35079]') do
+ parser.__send__(:initialize, "{}")
+ end
+ parser = JSON::Ext::Parser.allocate
+ assert_raise(TypeError, '[ruby-core:35079]') { parser.source }
+ end
- unless RUBY_PLATFORM =~ /java/
- ex = assert_raise(ParserError) { parse('-Infinity') }
- assert_equal "unexpected token at '-Infinity'", ex.message
- end
+ def test_error_messages
+ ex = assert_raise(ParserError) { parse('Infinity') }
+ assert_equal "unexpected token at 'Infinity'", ex.message
- ex = assert_raise(ParserError) { parse('NaN') }
- assert_equal "unexpected token at 'NaN'", ex.message
+ unless RUBY_PLATFORM =~ /java/
+ ex = assert_raise(ParserError) { parse('-Infinity') }
+ assert_equal "unexpected token at '-Infinity'", ex.message
end
- if GC.respond_to?(:stress=)
- def test_gc_stress_parser_new
- payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10)
-
- previous_stress = GC.stress
- JSON::Parser.new(payload).parse
- ensure
- GC.stress = previous_stress
- end
+ ex = assert_raise(ParserError) { parse('NaN') }
+ assert_equal "unexpected token at 'NaN'", ex.message
+ end
- def test_gc_stress
- payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10)
+ if GC.respond_to?(:stress=)
+ def test_gc_stress_parser_new
+ payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10)
- previous_stress = GC.stress
- JSON.parse(payload)
- ensure
- GC.stress = previous_stress
- end
+ previous_stress = GC.stress
+ JSON::Parser.new(payload).parse
+ ensure
+ GC.stress = previous_stress
end
- def parse(json)
- JSON::Ext::Parser.new(json).parse
+ def test_gc_stress
+ payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10)
+
+ previous_stress = GC.stress
+ JSON.parse(payload)
+ ensure
+ GC.stress = previous_stress
end
end
+
+ def parse(json)
+ JSON::Ext::Parser.new(json).parse
+ end
end
diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb
index 112c03b220..700220a152 100755
--- a/test/json/json_generator_test.rb
+++ b/test/json/json_generator_test.rb
@@ -343,27 +343,25 @@ class JSONGeneratorTest < Test::Unit::TestCase
assert_equal '2', state.indent
end
- if defined?(JSON::Ext::Generator)
- def test_broken_bignum # [ruby-core:38867]
- pid = fork do
- x = 1 << 64
- x.class.class_eval do
- def to_s
- end
- end
- begin
- JSON::Ext::Generator::State.new.generate(x)
- exit 1
- rescue TypeError
- exit 0
+ def test_broken_bignum # [ruby-core:38867]
+ pid = fork do
+ x = 1 << 64
+ x.class.class_eval do
+ def to_s
end
end
- _, status = Process.waitpid2(pid)
- assert status.success?
- rescue NotImplementedError
- # forking to avoid modifying core class of a parent process and
- # introducing race conditions of tests are run in parallel
+ begin
+ JSON::Ext::Generator::State.new.generate(x)
+ exit 1
+ rescue TypeError
+ exit 0
+ end
end
+ _, status = Process.waitpid2(pid)
+ assert status.success?
+ rescue NotImplementedError
+ # forking to avoid modifying core class of a parent process and
+ # introducing race conditions of tests are run in parallel
end
def test_hash_likeness_set_symbol
@@ -477,12 +475,20 @@ class JSONGeneratorTest < Test::Unit::TestCase
end
assert_includes error.message, "source sequence is illegal/malformed utf-8"
- assert_raise(Encoding::UndefinedConversionError) do
+ assert_raise(JSON::GeneratorError) do
+ JSON.dump("\x82\xAC\xEF".b)
+ end
+
+ assert_raise(JSON::GeneratorError) do
"\x82\xAC\xEF".b.to_json
end
- assert_raise(Encoding::UndefinedConversionError) do
- JSON.dump("\x82\xAC\xEF".b)
+ assert_raise(JSON::GeneratorError) do
+ ["\x82\xAC\xEF".b].to_json
+ end
+
+ assert_raise(JSON::GeneratorError) do
+ { foo: "\x82\xAC\xEF".b }.to_json
end
end
diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb
index adff91674d..8759ccd262 100644
--- a/test/json/json_parser_test.rb
+++ b/test/json/json_parser_test.rb
@@ -40,7 +40,7 @@ class JSONParserTest < Test::Unit::TestCase
}
assert_equal(Encoding::UTF_8, e.message.encoding, bug10705)
assert_include(e.message, json, bug10705)
- end if defined?(JSON::Ext::Parser)
+ end
def test_parsing
parser = JSON::Parser.new('"test"')
@@ -180,7 +180,93 @@ class JSONParserTest < Test::Unit::TestCase
assert parse('NaN', :allow_nan => true).nan?
assert parse('Infinity', :allow_nan => true).infinite?
assert parse('-Infinity', :allow_nan => true).infinite?
- assert_raise(JSON::ParserError) { parse('[ 1, ]') }
+ end
+
+ def test_parse_arrays_with_allow_trailing_comma
+ assert_equal([], parse('[]', allow_trailing_comma: true))
+ assert_equal([], parse('[]', allow_trailing_comma: false))
+ assert_raise(JSON::ParserError) { parse('[,]', allow_trailing_comma: true) }
+ assert_raise(JSON::ParserError) { parse('[,]', allow_trailing_comma: false) }
+
+ assert_equal([1], parse('[1]', allow_trailing_comma: true))
+ assert_equal([1], parse('[1]', allow_trailing_comma: false))
+ assert_equal([1], parse('[1,]', allow_trailing_comma: true))
+ assert_raise(JSON::ParserError) { parse('[1,]', allow_trailing_comma: false) }
+
+ assert_equal([1, 2, 3], parse('[1,2,3]', allow_trailing_comma: true))
+ assert_equal([1, 2, 3], parse('[1,2,3]', allow_trailing_comma: false))
+ assert_equal([1, 2, 3], parse('[1,2,3,]', allow_trailing_comma: true))
+ assert_raise(JSON::ParserError) { parse('[1,2,3,]', allow_trailing_comma: false) }
+
+ assert_equal([1, 2, 3], parse('[ 1 , 2 , 3 ]', allow_trailing_comma: true))
+ assert_equal([1, 2, 3], parse('[ 1 , 2 , 3 ]', allow_trailing_comma: false))
+ assert_equal([1, 2, 3], parse('[ 1 , 2 , 3 , ]', allow_trailing_comma: true))
+ assert_raise(JSON::ParserError) { parse('[ 1 , 2 , 3 , ]', allow_trailing_comma: false) }
+
+ assert_equal({'foo' => [1, 2, 3]}, parse('{ "foo": [1,2,3] }', allow_trailing_comma: true))
+ assert_equal({'foo' => [1, 2, 3]}, parse('{ "foo": [1,2,3] }', allow_trailing_comma: false))
+ assert_equal({'foo' => [1, 2, 3]}, parse('{ "foo": [1,2,3,] }', allow_trailing_comma: true))
+ assert_raise(JSON::ParserError) { parse('{ "foo": [1,2,3,] }', allow_trailing_comma: false) }
+ end
+
+ def test_parse_object_with_allow_trailing_comma
+ assert_equal({}, parse('{}', allow_trailing_comma: true))
+ assert_equal({}, parse('{}', allow_trailing_comma: false))
+ assert_raise(JSON::ParserError) { parse('{,}', allow_trailing_comma: true) }
+ assert_raise(JSON::ParserError) { parse('{,}', allow_trailing_comma: false) }
+
+ assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}', allow_trailing_comma: true))
+ assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}', allow_trailing_comma: false))
+ assert_equal({'foo'=>'bar'}, parse('{"foo":"bar",}', allow_trailing_comma: true))
+ assert_raise(JSON::ParserError) { parse('{"foo":"bar",}', allow_trailing_comma: false) }
+
+ assert_equal(
+ {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'},
+ parse('{"foo":"bar","baz":"qux","quux":"garply"}', allow_trailing_comma: true)
+ )
+ assert_equal(
+ {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'},
+ parse('{"foo":"bar","baz":"qux","quux":"garply"}', allow_trailing_comma: false)
+ )
+ assert_equal(
+ {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'},
+ parse('{"foo":"bar","baz":"qux","quux":"garply",}', allow_trailing_comma: true)
+ )
+ assert_raise(JSON::ParserError) {
+ parse('{"foo":"bar","baz":"qux","quux":"garply",}', allow_trailing_comma: false)
+ }
+
+ assert_equal(
+ {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'},
+ parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" }', allow_trailing_comma: true)
+ )
+ assert_equal(
+ {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'},
+ parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" }', allow_trailing_comma: false)
+ )
+ assert_equal(
+ {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'},
+ parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" , }', allow_trailing_comma: true)
+ )
+ assert_raise(JSON::ParserError) {
+ parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" , }', allow_trailing_comma: false)
+ }
+
+ assert_equal(
+ [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}],
+ parse('[{"foo":"bar","baz":"qux","quux":"garply"}]', allow_trailing_comma: true)
+ )
+ assert_equal(
+ [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}],
+ parse('[{"foo":"bar","baz":"qux","quux":"garply"}]', allow_trailing_comma: false)
+ )
+ assert_equal(
+ [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}],
+ parse('[{"foo":"bar","baz":"qux","quux":"garply",}]', allow_trailing_comma: true)
+ )
+ assert_raise(JSON::ParserError) {
+ parse('[{"foo":"bar","baz":"qux","quux":"garply",}]', allow_trailing_comma: false)
+ }
end
def test_parse_some_strings
@@ -533,7 +619,7 @@ class JSONParserTest < Test::Unit::TestCase
error = assert_raise(JSON::ParserError) do
JSON.parse('{"input":{"firstName":"Bob","lastName":"Mob","email":"[email protected]"}')
end
- if RUBY_ENGINE == "ruby" && defined?(JSON::Ext)
+ if RUBY_ENGINE == "ruby"
assert_equal %(unexpected token at '{"input":{"firstName":"Bob","las'), error.message
end
end
diff --git a/test/json/test_helper.rb b/test/json/test_helper.rb
index 6fcb76edf8..11bb8ba8c2 100644
--- a/test/json/test_helper.rb
+++ b/test/json/test_helper.rb
@@ -1,30 +1,14 @@
-case ENV['JSON']
-when 'pure'
- $LOAD_PATH.unshift(File.expand_path('../../../lib', __FILE__))
- $stderr.puts("Testing JSON::Pure")
- require 'json/pure'
-when 'ext'
- $stderr.puts("Testing JSON::Ext")
- $LOAD_PATH.unshift(File.expand_path('../../../ext', __FILE__), File.expand_path('../../../lib', __FILE__))
- require 'json/ext'
-else
- $LOAD_PATH.unshift(File.expand_path('../../../ext', __FILE__), File.expand_path('../../../lib', __FILE__))
- $stderr.puts("Testing JSON")
- require 'json'
-end
+$LOAD_PATH.unshift(File.expand_path('../../../ext', __FILE__), File.expand_path('../../../lib', __FILE__))
+require 'json'
require 'test/unit'
-begin
- require 'byebug'
-rescue LoadError
-end
if GC.respond_to?(:verify_compaction_references)
# This method was added in Ruby 3.0.0. Calling it this way asks the GC to
# move objects around, helping to find object movement bugs.
begin
- GC.verify_compaction_references(double_heap: true, toward: :empty)
- rescue NotImplementedError
+ GC.verify_compaction_references(expand_heap: true, toward: :empty)
+ rescue NotImplementedError, ArgumentError
# Some platforms don't support compaction
end
end