diff options
author | Jean Boussier <[email protected]> | 2024-11-05 16:35:58 +0100 |
---|---|---|
committer | Jean Boussier <[email protected]> | 2024-11-05 18:00:36 +0100 |
commit | ca8f21ace86e9c7ab184b3f0087ff458c06b178c (patch) | |
tree | e5a4c05ba4bff8d9dc19963936d2c81248200881 | |
parent | e8522f06b51f86214d28259118c736ac8951d8cd (diff) |
[ruby/json] Resync
Notes
Notes:
Merged: https://github.com/ruby/ruby/pull/12003
-rw-r--r-- | ext/json/fbuffer/fbuffer.h | 51 | ||||
-rw-r--r-- | ext/json/generator/generator.c | 29 | ||||
-rw-r--r-- | ext/json/generator/generator.h | 91 | ||||
-rw-r--r-- | ext/json/lib/json.rb | 7 | ||||
-rw-r--r-- | ext/json/lib/json/common.rb | 23 | ||||
-rw-r--r-- | ext/json/lib/json/ext.rb | 6 | ||||
-rw-r--r-- | ext/json/lib/json/ext/generator/state.rb (renamed from lib/json/ext/generator/state.rb) | 0 | ||||
-rw-r--r-- | ext/json/parser/extconf.rb | 5 | ||||
-rw-r--r-- | ext/json/parser/parser.c | 1752 | ||||
-rw-r--r-- | ext/json/parser/parser.h | 78 | ||||
-rw-r--r-- | ext/json/parser/parser.rl | 747 | ||||
-rw-r--r-- | test/json/fixtures/fail4.json | 1 | ||||
-rw-r--r-- | test/json/fixtures/fail9.json | 1 | ||||
-rw-r--r-- | test/json/json_common_interface_test.rb | 4 | ||||
-rw-r--r-- | test/json/json_ext_parser_test.rb | 74 | ||||
-rwxr-xr-x | test/json/json_generator_test.rb | 48 | ||||
-rw-r--r-- | test/json/json_parser_test.rb | 92 | ||||
-rw-r--r-- | test/json/test_helper.rb | 24 |
18 files changed, 2167 insertions, 866 deletions
diff --git a/ext/json/fbuffer/fbuffer.h b/ext/json/fbuffer/fbuffer.h index 367ebd89ff..3e154a5fa8 100644 --- a/ext/json/fbuffer/fbuffer.h +++ b/ext/json/fbuffer/fbuffer.h @@ -4,9 +4,40 @@ #include "ruby.h" #include "ruby/encoding.h" +/* shims */ +/* This is the fallback definition from Ruby 3.4 */ + +#ifndef RBIMPL_STDBOOL_H +#if defined(__cplusplus) +# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) +# include <cstdbool> +# endif +#elif defined(HAVE_STDBOOL_H) +# include <stdbool.h> +#elif !defined(HAVE__BOOL) +typedef unsigned char _Bool; +# define bool _Bool +# define true ((_Bool)+1) +# define false ((_Bool)+0) +# define __bool_true_false_are_defined +#endif +#endif + +#ifndef RB_UNLIKELY +#define RB_UNLIKELY(expr) expr +#endif + +#ifndef RB_LIKELY +#define RB_LIKELY(expr) expr +#endif + +#ifndef MAYBE_UNUSED +# define MAYBE_UNUSED(x) x +#endif + enum fbuffer_type { - HEAP = 0, - STACK = 1, + FBUFFER_HEAP_ALLOCATED = 0, + FBUFFER_STACK_ALLOCATED = 1, }; typedef struct FBufferStruct { @@ -38,19 +69,11 @@ static inline void fbuffer_append_char(FBuffer *fb, char newchr); static VALUE fbuffer_to_s(FBuffer *fb); #endif -#ifndef RB_UNLIKELY -#define RB_UNLIKELY(expr) expr -#endif - -#ifndef RB_LIKELY -#define RB_LIKELY(expr) expr -#endif - static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char *stack_buffer, long stack_buffer_size) { fb->initial_length = (initial_length > 0) ? initial_length : FBUFFER_INITIAL_LENGTH_DEFAULT; if (stack_buffer) { - fb->type = STACK; + fb->type = FBUFFER_STACK_ALLOCATED; fb->ptr = stack_buffer; fb->capa = stack_buffer_size; } @@ -58,7 +81,7 @@ static void fbuffer_stack_init(FBuffer *fb, unsigned long initial_length, char * static void fbuffer_free(FBuffer *fb) { - if (fb->ptr && fb->type == HEAP) { + if (fb->ptr && fb->type == FBUFFER_HEAP_ALLOCATED) { ruby_xfree(fb->ptr); } } @@ -82,10 +105,10 @@ static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) for (required = fb->capa; requested > required - fb->len; required <<= 1); if (required > fb->capa) { - if (fb->type == STACK) { + if (fb->type == FBUFFER_STACK_ALLOCATED) { const char *old_buffer = fb->ptr; fb->ptr = ALLOC_N(char, required); - fb->type = HEAP; + fb->type = FBUFFER_HEAP_ALLOCATED; MEMCPY(fb->ptr, old_buffer, char, fb->len); } else { REALLOC_N(fb->ptr, char, required); diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c index c32b5ce093..80539af6c8 100644 --- a/ext/json/generator/generator.c +++ b/ext/json/generator/generator.c @@ -1,5 +1,27 @@ +#include "ruby.h" #include "../fbuffer/fbuffer.h" -#include "generator.h" + +#include <math.h> +#include <ctype.h> + +/* ruby api and some helpers */ + +typedef struct JSON_Generator_StateStruct { + VALUE indent; + VALUE space; + VALUE space_before; + VALUE object_nl; + VALUE array_nl; + + long max_nesting; + long depth; + long buffer_initial_length; + + bool allow_nan; + bool ascii_only; + bool script_safe; + bool strict; +} JSON_Generator_State; #ifndef RB_UNLIKELY #define RB_UNLIKELY(cond) (cond) @@ -31,6 +53,7 @@ struct generate_json_data { generator_func func; }; +static VALUE cState_from_state_s(VALUE self, VALUE opts); static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func); static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); @@ -1013,6 +1036,10 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc) struct generate_json_data *data = (struct generate_json_data *)d; fbuffer_free(data->buffer); + if (RBASIC_CLASS(exc) == rb_path2class("Encoding::UndefinedConversionError")) { + exc = rb_exc_new_str(eGeneratorError, rb_funcall(exc, rb_intern("message"), 0)); + } + rb_exc_raise(exc); return Qundef; diff --git a/ext/json/generator/generator.h b/ext/json/generator/generator.h deleted file mode 100644 index 749a627a10..0000000000 --- a/ext/json/generator/generator.h +++ /dev/null @@ -1,91 +0,0 @@ -#ifndef _GENERATOR_H_ -#define _GENERATOR_H_ - -#include <math.h> -#include <ctype.h> - -#include "ruby.h" - -/* This is the fallback definition from Ruby 3.4 */ -#ifndef RBIMPL_STDBOOL_H -#if defined(__cplusplus) -# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) -# include <cstdbool> -# endif -#elif defined(HAVE_STDBOOL_H) -# include <stdbool.h> -#elif !defined(HAVE__BOOL) -typedef unsigned char _Bool; -# define bool _Bool -# define true ((_Bool)+1) -# define false ((_Bool)+0) -# define __bool_true_false_are_defined -#endif -#endif - -/* ruby api and some helpers */ - -typedef struct JSON_Generator_StateStruct { - VALUE indent; - VALUE space; - VALUE space_before; - VALUE object_nl; - VALUE array_nl; - - long max_nesting; - long depth; - long buffer_initial_length; - - bool allow_nan; - bool ascii_only; - bool script_safe; - bool strict; -} JSON_Generator_State; - -static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self); -#ifdef RUBY_INTEGER_UNIFICATION -static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self); -#else -static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self); -#endif -static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mString_included_s(VALUE self, VALUE modul); -static VALUE mString_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mString_to_json_raw_object(VALUE self); -static VALUE mString_to_json_raw(int argc, VALUE *argv, VALUE self); -static VALUE mString_Extend_json_create(VALUE self, VALUE o); -static VALUE mTrueClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mFalseClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mNilClass_to_json(int argc, VALUE *argv, VALUE self); -static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self); -static void State_free(void *state); -static VALUE cState_s_allocate(VALUE klass); - -static VALUE cState_generate(VALUE self, VALUE obj); -static VALUE cState_from_state_s(VALUE self, VALUE opts); -static VALUE cState_indent(VALUE self); -static VALUE cState_indent_set(VALUE self, VALUE indent); -static VALUE cState_space(VALUE self); -static VALUE cState_space_set(VALUE self, VALUE space); -static VALUE cState_space_before(VALUE self); -static VALUE cState_space_before_set(VALUE self, VALUE space_before); -static VALUE cState_object_nl(VALUE self); -static VALUE cState_object_nl_set(VALUE self, VALUE object_nl); -static VALUE cState_array_nl(VALUE self); -static VALUE cState_array_nl_set(VALUE self, VALUE array_nl); -static VALUE cState_max_nesting(VALUE self); -static VALUE cState_max_nesting_set(VALUE self, VALUE depth); -static VALUE cState_allow_nan_p(VALUE self); -static VALUE cState_ascii_only_p(VALUE self); -static VALUE cState_depth(VALUE self); -static VALUE cState_depth_set(VALUE self, VALUE depth); -static VALUE cState_script_safe(VALUE self); -static VALUE cState_script_safe_set(VALUE self, VALUE depth); -static VALUE cState_strict(VALUE self); -static VALUE cState_strict_set(VALUE self, VALUE strict); - -static const rb_data_type_t JSON_Generator_State_type; - -#endif diff --git a/ext/json/lib/json.rb b/ext/json/lib/json.rb index c28e853e13..dfd9b7dfc2 100644 --- a/ext/json/lib/json.rb +++ b/ext/json/lib/json.rb @@ -583,10 +583,5 @@ require 'json/common' # module JSON require 'json/version' - - begin - require 'json/ext' - rescue LoadError - require 'json/pure' - end + require 'json/ext' end diff --git a/ext/json/lib/json/common.rb b/ext/json/lib/json/common.rb index 546b6ec801..2269896ba8 100644 --- a/ext/json/lib/json/common.rb +++ b/ext/json/lib/json/common.rb @@ -32,9 +32,7 @@ module JSON JSON.generate(object, opts) end - # Returns the JSON parser class that is used by JSON. This is either - # JSON::Ext::Parser or JSON::Pure::Parser: - # JSON.parser # => JSON::Ext::Parser + # Returns the JSON parser class that is used by JSON. attr_reader :parser # Set the JSON parser class _parser_ to be used by JSON. @@ -97,14 +95,10 @@ module JSON ) end - # Returns the JSON generator module that is used by JSON. This is - # either JSON::Ext::Generator or JSON::Pure::Generator: - # JSON.generator # => JSON::Ext::Generator + # Returns the JSON generator module that is used by JSON. attr_reader :generator - # Sets or Returns the JSON generator state class that is used by JSON. This is - # either JSON::Ext::Generator::State or JSON::Pure::Generator::State: - # JSON.state # => JSON::Ext::Generator::State + # Sets or Returns the JSON generator state class that is used by JSON. attr_accessor :state end @@ -207,16 +201,7 @@ module JSON # JSON.parse('') # def parse(source, opts = nil) - if opts.nil? - Parser.new(source).parse - else - # NB: The ** shouldn't be required, but we have to deal with - # different versions of the `json` and `json_pure` gems being - # loaded concurrently. - # Prior to 2.7.3, `JSON::Ext::Parser` would only take kwargs. - # Ref: https://github.com/ruby/json/issues/650 - Parser.new(source, **opts).parse - end + Parser.parse(source, opts) end # :call-seq: diff --git a/ext/json/lib/json/ext.rb b/ext/json/lib/json/ext.rb index 92ef61eaec..2082cae68f 100644 --- a/ext/json/lib/json/ext.rb +++ b/ext/json/lib/json/ext.rb @@ -8,14 +8,12 @@ module JSON module Ext if RUBY_ENGINE == 'truffleruby' require 'json/ext/parser' - require 'json/pure' - $DEBUG and warn "Using Ext extension for JSON parser and Pure library for JSON generator." + require 'json/truffle_ruby/generator' JSON.parser = Parser - JSON.generator = JSON::Pure::Generator + JSON.generator = ::JSON::TruffleRuby::Generator else require 'json/ext/parser' require 'json/ext/generator' - $DEBUG and warn "Using Ext extension for JSON." JSON.parser = Parser JSON.generator = Generator end diff --git a/lib/json/ext/generator/state.rb b/ext/json/lib/json/ext/generator/state.rb index 6cd9496e67..6cd9496e67 100644 --- a/lib/json/ext/generator/state.rb +++ b/ext/json/lib/json/ext/generator/state.rb diff --git a/ext/json/parser/extconf.rb b/ext/json/parser/extconf.rb index c3c23d2cb4..f9104de12d 100644 --- a/ext/json/parser/extconf.rb +++ b/ext/json/parser/extconf.rb @@ -2,7 +2,10 @@ require 'mkmf' have_func("rb_enc_interned_str", "ruby.h") # RUBY_VERSION >= 3.0 -have_func("rb_gc_mark_locations") # Missing on TruffleRuby +have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2 +have_func("rb_gc_mark_locations", "ruby.h") # Missing on TruffleRuby +have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby + append_cflags("-std=c99") create_makefile 'json/ext/parser' diff --git a/ext/json/parser/parser.c b/ext/json/parser/parser.c index 758dba4694..382e21e12d 100644 --- a/ext/json/parser/parser.c +++ b/ext/json/parser/parser.c @@ -1,7 +1,310 @@ /* This file is automatically generated from parser.rl by using ragel */ #line 1 "parser.rl" +#include "ruby.h" #include "../fbuffer/fbuffer.h" -#include "parser.h" + +static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; +static VALUE CNaN, CInfinity, CMinusInfinity; + +static ID i_json_creatable_p, i_json_create, i_create_id, + i_chr, i_deep_const_get, i_match, i_aset, i_aref, + i_leftshift, i_new, i_try_convert, i_uminus, i_encode; + +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, + sym_create_additions, sym_create_id, sym_object_class, sym_array_class, + sym_decimal_class, sym_match_string; + +static int binary_encindex; +static int utf8_encindex; + +#ifndef HAVE_RB_GC_MARK_LOCATIONS +// For TruffleRuby +void rb_gc_mark_locations(const VALUE *start, const VALUE *end) +{ + VALUE *value = start; + + while (value < end) { + rb_gc_mark(*value); + value++; + } +} +#endif + +#ifndef HAVE_RB_HASH_BULK_INSERT +// For TruffleRuby +void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) +{ + long index = 0; + while (index < count) { + VALUE name = pairs[index++]; + VALUE value = pairs[index++]; + rb_hash_aset(hash, name, value); + } + RB_GC_GUARD(hash); +} +#endif + +/* name cache */ + +#include <string.h> +#include <ctype.h> + +// Object names are likely to be repeated, and are frozen. +// As such we can re-use them if we keep a cache of the ones we've seen so far, +// and save much more expensive lookups into the global fstring table. +// This cache implementation is deliberately simple, as we're optimizing for compactness, +// to be able to fit safely on the stack. +// As such, binary search into a sorted array gives a good tradeoff between compactness and +// performance. +#define JSON_RVALUE_CACHE_CAPA 63 +typedef struct rvalue_cache_struct { + int length; + VALUE entries[JSON_RVALUE_CACHE_CAPA]; +} rvalue_cache; + +static rb_encoding *enc_utf8; + +#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 + +static inline VALUE build_interned_string(const char *str, const long length) +{ +# ifdef HAVE_RB_ENC_INTERNED_STR + return rb_enc_interned_str(str, length, enc_utf8); +# else + VALUE rstring = rb_utf8_str_new(str, length); + return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); +# endif +} + +static inline VALUE build_symbol(const char *str, const long length) +{ + return rb_str_intern(build_interned_string(str, length)); +} + +static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) +{ + MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); + cache->length++; + cache->entries[index] = rstring; +} + +static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +{ + long rstring_length = RSTRING_LEN(rstring); + if (length == rstring_length) { + return memcmp(str, RSTRING_PTR(rstring), length); + } else { + return (int)(length - rstring_length); + } +} + +static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { + // Common names aren't likely to be very long. So we just don't + // cache names above an arbitrary threshold. + return Qfalse; + } + + if (RB_UNLIKELY(!isalpha(str[0]))) { + // Simple heuristic, if the first character isn't a letter, + // we're much less likely to see this string again. + // We mostly want to cache strings that are likely to be repeated. + return Qfalse; + } + + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, entry); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + if (RB_UNLIKELY(memchr(str, '\\', length))) { + // We assume the overwhelming majority of names don't need to be escaped. + // But if they do, we have to fallback to the slow path. + return Qfalse; + } + + VALUE rstring = build_interned_string(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rstring); + } + return rstring; +} + +static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { + // Common names aren't likely to be very long. So we just don't + // cache names above an arbitrary threshold. + return Qfalse; + } + + if (RB_UNLIKELY(!isalpha(str[0]))) { + // Simple heuristic, if the first character isn't a letter, + // we're much less likely to see this string again. + // We mostly want to cache strings that are likely to be repeated. + return Qfalse; + } + + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + if (RB_UNLIKELY(memchr(str, '\\', length))) { + // We assume the overwhelming majority of names don't need to be escaped. + // But if they do, we have to fallback to the slow path. + return Qfalse; + } + + VALUE rsymbol = build_symbol(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rsymbol); + } + return rsymbol; +} + +/* rvalue stack */ + +#define RVALUE_STACK_INITIAL_CAPA 128 + +enum rvalue_stack_type { + RVALUE_STACK_HEAP_ALLOCATED = 0, + RVALUE_STACK_STACK_ALLOCATED = 1, +}; + +typedef struct rvalue_stack_struct { + enum rvalue_stack_type type; + long capa; + long head; + VALUE *ptr; +} rvalue_stack; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); + +static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) +{ + long required = stack->capa * 2; + + if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { + stack = rvalue_stack_spill(stack, handle, stack_ref); + } else { + REALLOC_N(stack->ptr, VALUE, required); + stack->capa = required; + } + return stack; +} + +static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) +{ + if (RB_UNLIKELY(stack->head >= stack->capa)) { + stack = rvalue_stack_grow(stack, handle, stack_ref); + } + stack->ptr[stack->head] = value; + stack->head++; +} + +static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) +{ + return stack->ptr + (stack->head - count); +} + +static inline void rvalue_stack_pop(rvalue_stack *stack, long count) +{ + stack->head -= count; +} + +static void rvalue_stack_mark(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + rb_gc_mark_locations(stack->ptr, stack->ptr + stack->head); +} + +static void rvalue_stack_free(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + if (stack) { + ruby_xfree(stack->ptr); + ruby_xfree(stack); + } +} + +static size_t rvalue_stack_memsize(const void *ptr) +{ + const rvalue_stack *stack = (const rvalue_stack *)ptr; + return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; +} + +static const rb_data_type_t JSON_Parser_rvalue_stack_type = { + "JSON::Ext::Parser/rvalue_stack", + { + .dmark = rvalue_stack_mark, + .dfree = rvalue_stack_free, + .dsize = rvalue_stack_memsize, + }, + 0, 0, + RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) +{ + rvalue_stack *stack; + *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + *stack_ref = stack; + MEMCPY(stack, old_stack, rvalue_stack, 1); + + stack->capa = old_stack->capa << 1; + stack->ptr = ALLOC_N(VALUE, stack->capa); + stack->type = RVALUE_STACK_HEAP_ALLOCATED; + MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); + return stack; +} + +static void rvalue_stack_eagerly_release(VALUE handle) +{ + rvalue_stack *stack; + TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + RTYPEDDATA_DATA(handle) = NULL; + rvalue_stack_free(stack); +} /* unicode */ @@ -69,6 +372,50 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) return len; } +typedef struct JSON_ParserStruct { + VALUE Vsource; + char *source; + long len; + char *memo; + VALUE create_id; + VALUE object_class; + VALUE array_class; + VALUE decimal_class; + VALUE match_string; + FBuffer fbuffer; + int max_nesting; + bool allow_nan; + bool allow_trailing_comma; + bool parsing_name; + bool symbolize_names; + bool freeze; + bool create_additions; + bool deprecated_create_additions; + rvalue_cache name_cache; + rvalue_stack *stack; + VALUE stack_handle; +} JSON_Parser; + +#define GET_PARSER \ + GET_PARSER_INIT; \ + if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") + +#define GET_PARSER_INIT \ + JSON_Parser *json; \ + TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) + +#define MinusInfinity "-Infinity" +#define EVIL 0x666 + +static const rb_data_type_t JSON_Parser_type; +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); + + #define PARSE_ERROR_FRAGMENT_LEN 32 #ifdef RBIMPL_ATTR_NORETURN RBIMPL_ATTR_NORETURN() @@ -86,60 +433,49 @@ static void raise_parse_error(const char *format, const char *start) ptr = buffer; } - rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr); + rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); } -static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; -static VALUE CNaN, CInfinity, CMinusInfinity; - -static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, - i_object_class, i_array_class, i_decimal_class, - i_deep_const_get, i_match, i_match_string, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_freeze, i_uminus, i_encode; -static int binary_encindex; -static int utf8_encindex; +#line 464 "parser.rl" -#line 129 "parser.rl" - - -#line 111 "parser.c" +#line 446 "parser.c" enum {JSON_object_start = 1}; -enum {JSON_object_first_final = 27}; +enum {JSON_object_first_final = 32}; enum {JSON_object_error = 0}; enum {JSON_object_en_main = 1}; -#line 171 "parser.rl" +#line 504 "parser.rl" + +#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; - VALUE last_name = Qnil; - VALUE object_class = json->object_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); + long stack_head = json->stack->head; -#line 135 "parser.c" +#line 470 "parser.c" { cs = JSON_object_start; } -#line 186 "parser.rl" +#line 519 "parser.rl" -#line 142 "parser.c" +#line 477 "parser.c" { + short _widec; if ( p == pe ) goto _test_eof; switch ( cs ) @@ -159,27 +495,30 @@ case 2: case 13: goto st2; case 32: goto st2; case 34: goto tr2; - case 47: goto st23; + case 47: goto st28; case 125: goto tr4; } if ( 9 <= (*p) && (*p) <= 10 ) goto st2; goto st0; tr2: -#line 153 "parser.rl" +#line 483 "parser.rl" { char *np; - json->parsing_name = 1; - np = JSON_parse_string(json, p, pe, &last_name); - json->parsing_name = 0; - if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else {p = (( np))-1;} + json->parsing_name = true; + np = JSON_parse_string(json, p, pe, result); + json->parsing_name = false; + if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else { + PUSH(*result); + {p = (( np))-1;} + } } goto st3; st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 183 "parser.c" +#line 522 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -230,7 +569,7 @@ case 8: case 32: goto st8; case 34: goto tr11; case 45: goto tr11; - case 47: goto st19; + case 47: goto st24; case 73: goto tr11; case 78: goto tr11; case 91: goto tr11; @@ -246,19 +585,12 @@ case 8: goto st8; goto st0; tr11: -#line 137 "parser.rl" +#line 472 "parser.rl" { - VALUE v = Qnil; - char *np = JSON_parse_value(json, p, pe, &v, current_nesting); + char *np = JSON_parse_value(json, p, pe, result, current_nesting); if (np == NULL) { p--; {p++; cs = 9; goto _out;} } else { - if (NIL_P(json->object_class)) { - OBJ_FREEZE(last_name); - rb_hash_aset(*result, last_name, v); - } else { - rb_funcall(*result, i_aset, 2, last_name, v); - } {p = (( np))-1;} } } @@ -267,16 +599,75 @@ st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 271 "parser.c" - switch( (*p) ) { - case 13: goto st9; - case 32: goto st9; - case 44: goto st10; - case 47: goto st15; +#line 603 "parser.c" + _widec = (*p); + if ( (*p) < 13 ) { + if ( (*p) > 9 ) { + if ( 10 <= (*p) && (*p) <= 10 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 13 ) { + if ( (*p) < 44 ) { + if ( 32 <= (*p) && (*p) <= 32 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 44 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { case 125: goto tr4; - } - if ( 9 <= (*p) && (*p) <= 10 ) - goto st9; + case 269: goto st10; + case 288: goto st10; + case 300: goto st11; + case 303: goto st16; + case 525: goto st9; + case 544: goto st9; + case 556: goto st2; + case 559: goto st20; + } + if ( _widec > 266 ) { + if ( 521 <= _widec && _widec <= 522 ) + goto st9; + } else if ( _widec >= 265 ) + goto st10; + goto st0; +tr4: +#line 494 "parser.rl" + { p--; {p++; cs = 32; goto _out;} } + goto st32; +st32: + if ( ++p == pe ) + goto _test_eof32; +case 32: +#line 671 "parser.c" goto st0; st10: if ( ++p == pe ) @@ -285,8 +676,9 @@ case 10: switch( (*p) ) { case 13: goto st10; case 32: goto st10; - case 34: goto tr2; - case 47: goto st11; + case 44: goto st11; + case 47: goto st16; + case 125: goto tr4; } if ( 9 <= (*p) && (*p) <= 10 ) goto st10; @@ -296,139 +688,288 @@ st11: goto _test_eof11; case 11: switch( (*p) ) { - case 42: goto st12; - case 47: goto st14; + case 13: goto st11; + case 32: goto st11; + case 34: goto tr2; + case 47: goto st12; } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st11; goto st0; st12: if ( ++p == pe ) goto _test_eof12; case 12: - if ( (*p) == 42 ) - goto st13; - goto st12; + switch( (*p) ) { + case 42: goto st13; + case 47: goto st15; + } + goto st0; st13: if ( ++p == pe ) goto _test_eof13; case 13: - switch( (*p) ) { - case 42: goto st13; - case 47: goto st10; - } - goto st12; + if ( (*p) == 42 ) + goto st14; + goto st13; st14: if ( ++p == pe ) goto _test_eof14; case 14: - if ( (*p) == 10 ) - goto st10; - goto st14; + switch( (*p) ) { + case 42: goto st14; + case 47: goto st11; + } + goto st13; st15: if ( ++p == pe ) goto _test_eof15; case 15: - switch( (*p) ) { - case 42: goto st16; - case 47: goto st18; - } - goto st0; + if ( (*p) == 10 ) + goto st11; + goto st15; st16: if ( ++p == pe ) goto _test_eof16; case 16: - if ( (*p) == 42 ) - goto st17; - goto st16; + switch( (*p) ) { + case 42: goto st17; + case 47: goto st19; + } + goto st0; st17: if ( ++p == pe ) goto _test_eof17; case 17: - switch( (*p) ) { - case 42: goto st17; - case 47: goto st9; - } - goto st16; + if ( (*p) == 42 ) + goto st18; + goto st17; st18: if ( ++p == pe ) goto _test_eof18; case 18: - if ( (*p) == 10 ) - goto st9; - goto st18; -tr4: -#line 161 "parser.rl" - { p--; {p++; cs = 27; goto _out;} } - goto st27; -st27: - if ( ++p == pe ) - goto _test_eof27; -case 27: -#line 367 "parser.c" - goto st0; + switch( (*p) ) { + case 42: goto st18; + case 47: goto st10; + } + goto st17; st19: if ( ++p == pe ) goto _test_eof19; case 19: - switch( (*p) ) { - case 42: goto st20; - case 47: goto st22; - } - goto st0; + if ( (*p) == 10 ) + goto st10; + goto st19; st20: if ( ++p == pe ) goto _test_eof20; case 20: - if ( (*p) == 42 ) - goto st21; - goto st20; + _widec = (*p); + if ( (*p) > 42 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st17; + case 303: goto st19; + case 554: goto st21; + case 559: goto st23; + } + goto st0; st21: if ( ++p == pe ) goto _test_eof21; case 21: - switch( (*p) ) { - case 42: goto st21; - case 47: goto st8; - } - goto st20; + _widec = (*p); + if ( (*p) < 42 ) { + if ( (*p) <= 41 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 42 ) { + if ( 43 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st18; + case 554: goto st22; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st21; + } else if ( _widec >= 128 ) + goto st17; + goto st0; st22: if ( ++p == pe ) goto _test_eof22; case 22: - if ( (*p) == 10 ) - goto st8; - goto st22; + _widec = (*p); + if ( (*p) < 43 ) { + if ( (*p) > 41 ) { + if ( 42 <= (*p) && (*p) <= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 46 ) { + if ( (*p) > 47 ) { + if ( 48 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st18; + case 303: goto st10; + case 554: goto st22; + case 559: goto st9; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st21; + } else if ( _widec >= 128 ) + goto st17; + goto st0; st23: if ( ++p == pe ) goto _test_eof23; case 23: - switch( (*p) ) { - case 42: goto st24; - case 47: goto st26; - } + _widec = (*p); + if ( (*p) < 10 ) { + if ( (*p) <= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 10 ) { + if ( 11 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 481 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 266: goto st10; + case 522: goto st9; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st23; + } else if ( _widec >= 128 ) + goto st19; goto st0; st24: if ( ++p == pe ) goto _test_eof24; case 24: - if ( (*p) == 42 ) - goto st25; - goto st24; + switch( (*p) ) { + case 42: goto st25; + case 47: goto st27; + } + goto st0; st25: if ( ++p == pe ) goto _test_eof25; case 25: - switch( (*p) ) { - case 42: goto st25; - case 47: goto st2; - } - goto st24; + if ( (*p) == 42 ) + goto st26; + goto st25; st26: if ( ++p == pe ) goto _test_eof26; case 26: + switch( (*p) ) { + case 42: goto st26; + case 47: goto st8; + } + goto st25; +st27: + if ( ++p == pe ) + goto _test_eof27; +case 27: + if ( (*p) == 10 ) + goto st8; + goto st27; +st28: + if ( ++p == pe ) + goto _test_eof28; +case 28: + switch( (*p) ) { + case 42: goto st29; + case 47: goto st31; + } + goto st0; +st29: + if ( ++p == pe ) + goto _test_eof29; +case 29: + if ( (*p) == 42 ) + goto st30; + goto st29; +st30: + if ( ++p == pe ) + goto _test_eof30; +case 30: + switch( (*p) ) { + case 42: goto st30; + case 47: goto st2; + } + goto st29; +st31: + if ( ++p == pe ) + goto _test_eof31; +case 31: if ( (*p) == 10 ) goto st2; - goto st26; + goto st31; } _test_eof2: cs = 2; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; @@ -438,6 +979,7 @@ case 26: _test_eof7: cs = 7; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; _test_eof9: cs = 9; goto _test_eof; + _test_eof32: cs = 32; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; _test_eof11: cs = 11; goto _test_eof; _test_eof12: cs = 12; goto _test_eof; @@ -447,7 +989,6 @@ case 26: _test_eof16: cs = 16; goto _test_eof; _test_eof17: cs = 17; goto _test_eof; _test_eof18: cs = 18; goto _test_eof; - _test_eof27: cs = 27; goto _test_eof; _test_eof19: cs = 19; goto _test_eof; _test_eof20: cs = 20; goto _test_eof; _test_eof21: cs = 21; goto _test_eof; @@ -456,20 +997,49 @@ case 26: _test_eof24: cs = 24; goto _test_eof; _test_eof25: cs = 25; goto _test_eof; _test_eof26: cs = 26; goto _test_eof; + _test_eof27: cs = 27; goto _test_eof; + _test_eof28: cs = 28; goto _test_eof; + _test_eof29: cs = 29; goto _test_eof; + _test_eof30: cs = 30; goto _test_eof; + _test_eof31: cs = 31; goto _test_eof; _test_eof: {} _out: {} } -#line 187 "parser.rl" +#line 520 "parser.rl" if (cs >= JSON_object_first_final) { - if (json->create_additions) { + long count = json->stack->head - stack_head; + + if (RB_UNLIKELY(json->object_class)) { + VALUE object = rb_class_new_instance(0, 0, json->object_class); + long index = 0; + VALUE *items = rvalue_stack_peek(json->stack, count); + while (index < count) { + VALUE name = items[index++]; + VALUE value = items[index++]; + rb_funcall(object, i_aset, 2, name, value); + } + *result = object; + } else { + VALUE hash; +#ifdef HAVE_RB_HASH_NEW_CAPA + hash = rb_hash_new_capa(count >> 1); +#else + hash = rb_hash_new(); +#endif + rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash); + *result = hash; + } + rvalue_stack_pop(json->stack, count); + + if (RB_UNLIKELY(json->create_additions)) { VALUE klassname; - if (NIL_P(json->object_class)) { - klassname = rb_hash_aref(*result, json->create_id); + if (json->object_class) { + klassname = rb_funcall(*result, i_aref, 1, json->create_id); } else { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); + klassname = rb_hash_aref(*result, json->create_id); } if (!NIL_P(klassname)) { VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); @@ -488,8 +1058,7 @@ case 26: } - -#line 493 "parser.c" +#line 1062 "parser.c" enum {JSON_value_start = 1}; enum {JSON_value_first_final = 29}; enum {JSON_value_error = 0}; @@ -497,7 +1066,7 @@ enum {JSON_value_error = 0}; enum {JSON_value_en_main = 1}; -#line 290 "parser.rl" +#line 655 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -505,14 +1074,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 509 "parser.c" +#line 1078 "parser.c" { cs = JSON_value_start; } -#line 297 "parser.rl" +#line 662 "parser.rl" -#line 516 "parser.c" +#line 1085 "parser.c" { if ( p == pe ) goto _test_eof; @@ -546,14 +1115,19 @@ st0: cs = 0; goto _out; tr2: -#line 242 "parser.rl" +#line 598 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); - if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} + if (np == NULL) { + p--; + {p++; cs = 29; goto _out;} + } else { + {p = (( np))-1;} + } } goto st29; tr3: -#line 247 "parser.rl" +#line 608 "parser.rl" { char *np; if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { @@ -566,14 +1140,18 @@ tr3: } } np = JSON_parse_float(json, p, pe, result); - if (np != NULL) {p = (( np))-1;} + if (np != NULL) { + {p = (( np))-1;} + } np = JSON_parse_integer(json, p, pe, result); - if (np != NULL) {p = (( np))-1;} + if (np != NULL) { + {p = (( np))-1;} + } p--; {p++; cs = 29; goto _out;} } goto st29; tr7: -#line 265 "parser.rl" +#line 630 "parser.rl" { char *np; np = JSON_parse_array(json, p, pe, result, current_nesting + 1); @@ -581,7 +1159,7 @@ tr7: } goto st29; tr11: -#line 271 "parser.rl" +#line 636 "parser.rl" { char *np; np = JSON_parse_object(json, p, pe, result, current_nesting + 1); @@ -589,7 +1167,7 @@ tr11: } goto st29; tr25: -#line 235 "parser.rl" +#line 591 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -599,7 +1177,7 @@ tr25: } goto st29; tr27: -#line 228 "parser.rl" +#line 584 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -609,19 +1187,19 @@ tr27: } goto st29; tr31: -#line 222 "parser.rl" +#line 578 "parser.rl" { *result = Qfalse; } goto st29; tr34: -#line 219 "parser.rl" +#line 575 "parser.rl" { *result = Qnil; } goto st29; tr37: -#line 225 "parser.rl" +#line 581 "parser.rl" { *result = Qtrue; } @@ -630,9 +1208,9 @@ st29: if ( ++p == pe ) goto _test_eof29; case 29: -#line 277 "parser.rl" +#line 642 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 636 "parser.c" +#line 1214 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -873,13 +1451,14 @@ case 28: _out: {} } -#line 298 "parser.rl" +#line 663 "parser.rl" if (json->freeze) { OBJ_FREEZE(*result); } if (cs >= JSON_value_first_final) { + PUSH(*result); return p; } else { return NULL; @@ -887,7 +1466,7 @@ case 28: } -#line 891 "parser.c" +#line 1470 "parser.c" enum {JSON_integer_start = 1}; enum {JSON_integer_first_final = 3}; enum {JSON_integer_error = 0}; @@ -895,7 +1474,7 @@ enum {JSON_integer_error = 0}; enum {JSON_integer_en_main = 1}; -#line 318 "parser.rl" +#line 684 "parser.rl" static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -903,15 +1482,15 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res int cs = EVIL; -#line 907 "parser.c" +#line 1486 "parser.c" { cs = JSON_integer_start; } -#line 325 "parser.rl" +#line 691 "parser.rl" json->memo = p; -#line 915 "parser.c" +#line 1494 "parser.c" { if ( p == pe ) goto _test_eof; @@ -945,14 +1524,14 @@ case 3: goto st0; goto tr4; tr4: -#line 315 "parser.rl" +#line 681 "parser.rl" { p--; {p++; cs = 4; goto _out;} } goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: -#line 956 "parser.c" +#line 1535 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -971,7 +1550,7 @@ case 5: _out: {} } -#line 327 "parser.rl" +#line 693 "parser.rl" if (cs >= JSON_integer_first_final) { long len = p - json->memo; @@ -986,7 +1565,7 @@ case 5: } -#line 990 "parser.c" +#line 1569 "parser.c" enum {JSON_float_start = 1}; enum {JSON_float_first_final = 8}; enum {JSON_float_error = 0}; @@ -994,7 +1573,7 @@ enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 352 "parser.rl" +#line 718 "parser.rl" static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) @@ -1002,15 +1581,15 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1006 "parser.c" +#line 1585 "parser.c" { cs = JSON_float_start; } -#line 359 "parser.rl" +#line 725 "parser.rl" json->memo = p; -#line 1014 "parser.c" +#line 1593 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1068,14 +1647,14 @@ case 8: goto st0; goto tr9; tr9: -#line 346 "parser.rl" +#line 712 "parser.rl" { p--; {p++; cs = 9; goto _out;} } goto st9; st9: if ( ++p == pe ) goto _test_eof9; case 9: -#line 1079 "parser.c" +#line 1658 "parser.c" goto st0; st5: if ( ++p == pe ) @@ -1136,12 +1715,12 @@ case 7: _out: {} } -#line 361 "parser.rl" +#line 727 "parser.rl" if (cs >= JSON_float_first_final) { VALUE mod = Qnil; ID method_id = 0; - if (!NIL_P(json->decimal_class)) { + if (json->decimal_class) { if (rb_respond_to(json->decimal_class, i_try_convert)) { mod = json->decimal_class; method_id = i_try_convert; @@ -1189,37 +1768,37 @@ case 7: -#line 1193 "parser.c" +#line 1772 "parser.c" enum {JSON_array_start = 1}; -enum {JSON_array_first_final = 17}; +enum {JSON_array_first_final = 22}; enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 441 "parser.rl" +#line 804 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; - VALUE array_class = json->array_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); + long stack_head = json->stack->head; -#line 1215 "parser.c" +#line 1793 "parser.c" { cs = JSON_array_start; } -#line 454 "parser.rl" +#line 816 "parser.rl" -#line 1222 "parser.c" +#line 1800 "parser.c" { + short _widec; if ( p == pe ) goto _test_eof; switch ( cs ) @@ -1240,7 +1819,7 @@ case 2: case 32: goto st2; case 34: goto tr2; case 45: goto tr2; - case 47: goto st13; + case 47: goto st18; case 73: goto tr2; case 78: goto tr2; case 91: goto tr2; @@ -1257,18 +1836,13 @@ case 2: goto st2; goto st0; tr2: -#line 418 "parser.rl" +#line 784 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); if (np == NULL) { p--; {p++; cs = 3; goto _out;} } else { - if (NIL_P(json->array_class)) { - rb_ary_push(*result, v); - } else { - rb_funcall(*result, i_leftshift, 1, v); - } {p = (( np))-1;} } } @@ -1277,15 +1851,23 @@ st3: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1281 "parser.c" - switch( (*p) ) { +#line 1855 "parser.c" + _widec = (*p); + if ( 44 <= (*p) && (*p) <= 44 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { case 13: goto st3; case 32: goto st3; - case 44: goto st4; - case 47: goto st9; + case 47: goto st4; case 93: goto tr4; + case 300: goto st8; + case 556: goto st13; } - if ( 9 <= (*p) && (*p) <= 10 ) + if ( 9 <= _widec && _widec <= 10 ) goto st3; goto st0; st4: @@ -1293,57 +1875,67 @@ st4: goto _test_eof4; case 4: switch( (*p) ) { - case 13: goto st4; - case 32: goto st4; - case 34: goto tr2; - case 45: goto tr2; - case 47: goto st5; - case 73: goto tr2; - case 78: goto tr2; - case 91: goto tr2; - case 102: goto tr2; - case 110: goto tr2; - case 116: goto tr2; - case 123: goto tr2; + case 42: goto st5; + case 47: goto st7; } - if ( (*p) > 10 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto tr2; - } else if ( (*p) >= 9 ) - goto st4; goto st0; st5: if ( ++p == pe ) goto _test_eof5; case 5: - switch( (*p) ) { - case 42: goto st6; - case 47: goto st8; - } - goto st0; + if ( (*p) == 42 ) + goto st6; + goto st5; st6: if ( ++p == pe ) goto _test_eof6; case 6: - if ( (*p) == 42 ) - goto st7; - goto st6; + switch( (*p) ) { + case 42: goto st6; + case 47: goto st3; + } + goto st5; st7: if ( ++p == pe ) goto _test_eof7; case 7: - switch( (*p) ) { - case 42: goto st7; - case 47: goto st4; - } - goto st6; + if ( (*p) == 10 ) + goto st3; + goto st7; +tr4: +#line 796 "parser.rl" + { p--; {p++; cs = 22; goto _out;} } + goto st22; +st22: + if ( ++p == pe ) + goto _test_eof22; +case 22: +#line 1914 "parser.c" + goto st0; st8: if ( ++p == pe ) goto _test_eof8; case 8: - if ( (*p) == 10 ) - goto st4; - goto st8; + switch( (*p) ) { + case 13: goto st8; + case 32: goto st8; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st9; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st8; + goto st0; st9: if ( ++p == pe ) goto _test_eof9; @@ -1366,7 +1958,7 @@ st11: case 11: switch( (*p) ) { case 42: goto st11; - case 47: goto st3; + case 47: goto st8; } goto st10; st12: @@ -1374,50 +1966,252 @@ st12: goto _test_eof12; case 12: if ( (*p) == 10 ) - goto st3; + goto st8; goto st12; -tr4: -#line 433 "parser.rl" - { p--; {p++; cs = 17; goto _out;} } - goto st17; -st17: - if ( ++p == pe ) - goto _test_eof17; -case 17: -#line 1388 "parser.c" - goto st0; st13: if ( ++p == pe ) goto _test_eof13; case 13: - switch( (*p) ) { - case 42: goto st14; - case 47: goto st16; - } + _widec = (*p); + if ( (*p) < 13 ) { + if ( (*p) > 9 ) { + if ( 10 <= (*p) && (*p) <= 10 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 13 ) { + if ( (*p) > 32 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 32 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 34: goto tr2; + case 45: goto tr2; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 93: goto tr4; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + case 269: goto st8; + case 288: goto st8; + case 303: goto st9; + case 525: goto st13; + case 544: goto st13; + case 559: goto st14; + } + if ( _widec < 265 ) { + if ( 48 <= _widec && _widec <= 57 ) + goto tr2; + } else if ( _widec > 266 ) { + if ( 521 <= _widec && _widec <= 522 ) + goto st13; + } else + goto st8; goto st0; st14: if ( ++p == pe ) goto _test_eof14; case 14: - if ( (*p) == 42 ) - goto st15; - goto st14; + _widec = (*p); + if ( (*p) > 42 ) { + if ( 47 <= (*p) && (*p) <= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st10; + case 303: goto st12; + case 554: goto st15; + case 559: goto st17; + } + goto st0; st15: if ( ++p == pe ) goto _test_eof15; case 15: - switch( (*p) ) { - case 42: goto st15; - case 47: goto st2; - } - goto st14; + _widec = (*p); + if ( (*p) < 42 ) { + if ( (*p) <= 41 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 42 ) { + if ( 43 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st11; + case 554: goto st16; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st15; + } else if ( _widec >= 128 ) + goto st10; + goto st0; st16: if ( ++p == pe ) goto _test_eof16; case 16: + _widec = (*p); + if ( (*p) < 43 ) { + if ( (*p) > 41 ) { + if ( 42 <= (*p) && (*p) <= 42 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 46 ) { + if ( (*p) > 47 ) { + if ( 48 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) >= 47 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 298: goto st11; + case 303: goto st8; + case 554: goto st16; + case 559: goto st13; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st15; + } else if ( _widec >= 128 ) + goto st10; + goto st0; +st17: + if ( ++p == pe ) + goto _test_eof17; +case 17: + _widec = (*p); + if ( (*p) < 10 ) { + if ( (*p) <= 9 ) { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else if ( (*p) > 10 ) { + if ( 11 <= (*p) ) + { _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + } else { + _widec = (short)(128 + ((*p) - -128)); + if ( +#line 794 "parser.rl" + json->allow_trailing_comma ) _widec += 256; + } + switch( _widec ) { + case 266: goto st8; + case 522: goto st13; + } + if ( _widec > 383 ) { + if ( 384 <= _widec && _widec <= 639 ) + goto st17; + } else if ( _widec >= 128 ) + goto st12; + goto st0; +st18: + if ( ++p == pe ) + goto _test_eof18; +case 18: + switch( (*p) ) { + case 42: goto st19; + case 47: goto st21; + } + goto st0; +st19: + if ( ++p == pe ) + goto _test_eof19; +case 19: + if ( (*p) == 42 ) + goto st20; + goto st19; +st20: + if ( ++p == pe ) + goto _test_eof20; +case 20: + switch( (*p) ) { + case 42: goto st20; + case 47: goto st2; + } + goto st19; +st21: + if ( ++p == pe ) + goto _test_eof21; +case 21: if ( (*p) == 10 ) goto st2; - goto st16; + goto st21; } _test_eof2: cs = 2; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; @@ -1425,24 +2219,45 @@ case 16: _test_eof5: cs = 5; goto _test_eof; _test_eof6: cs = 6; goto _test_eof; _test_eof7: cs = 7; goto _test_eof; + _test_eof22: cs = 22; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; _test_eof9: cs = 9; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; _test_eof11: cs = 11; goto _test_eof; _test_eof12: cs = 12; goto _test_eof; - _test_eof17: cs = 17; goto _test_eof; _test_eof13: cs = 13; goto _test_eof; _test_eof14: cs = 14; goto _test_eof; _test_eof15: cs = 15; goto _test_eof; _test_eof16: cs = 16; goto _test_eof; + _test_eof17: cs = 17; goto _test_eof; + _test_eof18: cs = 18; goto _test_eof; + _test_eof19: cs = 19; goto _test_eof; + _test_eof20: cs = 20; goto _test_eof; + _test_eof21: cs = 21; goto _test_eof; _test_eof: {} _out: {} } -#line 455 "parser.rl" +#line 817 "parser.rl" if(cs >= JSON_array_first_final) { + long count = json->stack->head - stack_head; + + if (RB_UNLIKELY(json->array_class)) { + VALUE array = rb_class_new_instance(0, 0, json->array_class); + VALUE *items = rvalue_stack_peek(json->stack, count); + long index; + for (index = 0; index < count; index++) { + rb_funcall(array, i_leftshift, 1, items[index]); + } + *result = array; + } else { + VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count)); + *result = array; + } + rvalue_stack_pop(json->stack, count); + return p + 1; } else { raise_parse_error("unexpected token at '%s'", p); @@ -1458,7 +2273,7 @@ static inline VALUE build_string(const char *start, const char *end, bool intern VALUE result; # ifdef HAVE_RB_ENC_INTERNED_STR if (intern) { - result = rb_enc_interned_str(start, (long)(end - start), rb_utf8_encoding()); + result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); } else { result = rb_utf8_str_new(start, (long)(end - start)); } @@ -1476,13 +2291,26 @@ static inline VALUE build_string(const char *start, const char *end, bool intern return result; } -static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize) +static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) { size_t bufferSize = stringEnd - string; char *p = string, *pe = string, *unescape, *bufferStart, *buffer; int unescape_len; char buf[4]; + if (is_name) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + pe = memchr(p, '\\', bufferSize); if (RB_LIKELY(pe == NULL)) { return build_string(string, stringEnd, intern, symbolize); @@ -1585,7 +2413,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo } -#line 1589 "parser.c" +#line 2417 "parser.c" enum {JSON_string_start = 1}; enum {JSON_string_first_final = 8}; enum {JSON_string_error = 0}; @@ -1593,7 +2421,7 @@ enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 617 "parser.rl" +#line 1008 "parser.rl" static int @@ -1614,15 +2442,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 1618 "parser.c" +#line 2446 "parser.c" { cs = JSON_string_start; } -#line 637 "parser.rl" +#line 1028 "parser.rl" json->memo = p; -#line 1626 "parser.c" +#line 2454 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1647,9 +2475,9 @@ case 2: goto st0; goto st2; tr2: -#line 604 "parser.rl" +#line 995 "parser.rl" { - *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { p--; {p++; cs = 8; goto _out;} @@ -1657,14 +2485,14 @@ tr2: {p = (( p + 1))-1;} } } -#line 614 "parser.rl" +#line 1005 "parser.rl" { p--; {p++; cs = 8; goto _out;} } goto st8; st8: if ( ++p == pe ) goto _test_eof8; case 8: -#line 1668 "parser.c" +#line 2496 "parser.c" goto st0; st3: if ( ++p == pe ) @@ -1740,7 +2568,7 @@ case 7: _out: {} } -#line 639 "parser.rl" +#line 1030 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -1776,7 +2604,7 @@ static VALUE convert_encoding(VALUE source) { int encindex = RB_ENCODING_GET(source); - if (encindex == utf8_encindex) { + if (RB_LIKELY(encindex == utf8_encindex)) { return source; } @@ -1788,6 +2616,68 @@ static VALUE convert_encoding(VALUE source) return rb_funcall(source, i_encode, 1, Encoding_UTF_8); } +static int configure_parser_i(VALUE key, VALUE val, VALUE data) +{ + JSON_Parser *json = (JSON_Parser *)data; + + if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } + else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } + else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { json->freeze = RTEST(val); } + else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } + else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } + else if (key == sym_create_additions) { + if (NIL_P(val)) { + json->create_additions = true; + json->deprecated_create_additions = true; + } else { + json->create_additions = RTEST(val); + json->deprecated_create_additions = false; + } + } + + return ST_CONTINUE; +} + +static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) +{ + if (json->Vsource) { + rb_raise(rb_eTypeError, "already initialized instance"); + } + + json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; + json->max_nesting = 100; + + if (!NIL_P(opts)) { + Check_Type(opts, T_HASH); + if (RHASH_SIZE(opts) > 0) { + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(opts, configure_parser_i, (VALUE)json); + + if (json->symbolize_names && json->create_additions) { + rb_raise(rb_eArgError, + "options :symbolize_names and :create_additions cannot be " + " used in conjunction"); + } + + if (json->create_additions && !json->create_id) { + json->create_id = rb_funcall(mJSON, i_create_id, 0); + } + } + + } + source = convert_encoding(StringValue(source)); + StringValue(source); + json->len = RSTRING_LEN(source); + json->source = RSTRING_PTR(source); + json->Vsource = source; +} + /* * call-seq: new(source, opts => {}) * @@ -1822,122 +2712,16 @@ static VALUE convert_encoding(VALUE source) */ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) { - VALUE source, opts; GET_PARSER_INIT; - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } - rb_check_arity(argc, 1, 2); - source = argv[0]; - opts = Qnil; - if (argc == 2) { - opts = argv[1]; - Check_Type(argv[1], T_HASH); - if (RHASH_SIZE(argv[1]) > 0) { - opts = argv[1]; - } - } - if (!NIL_P(opts)) { - VALUE tmp = ID2SYM(i_max_nesting); - if (option_given_p(opts, tmp)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - json->max_nesting = FIX2INT(max_nesting); - } else { - json->max_nesting = 0; - } - } else { - json->max_nesting = 100; - } - tmp = ID2SYM(i_allow_nan); - if (option_given_p(opts, tmp)) { - json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->allow_nan = 0; - } - tmp = ID2SYM(i_symbolize_names); - if (option_given_p(opts, tmp)) { - json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->symbolize_names = 0; - } - tmp = ID2SYM(i_freeze); - if (option_given_p(opts, tmp)) { - json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->freeze = 0; - } - tmp = ID2SYM(i_create_additions); - if (option_given_p(opts, tmp)) { - tmp = rb_hash_aref(opts, tmp); - if (NIL_P(tmp)) { - json->create_additions = 1; - json->deprecated_create_additions = 1; - } else { - json->create_additions = RTEST(tmp); - json->deprecated_create_additions = 0; - } - } - - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - tmp = ID2SYM(i_create_id); - if (option_given_p(opts, tmp)) { - json->create_id = rb_hash_aref(opts, tmp); - } else { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - tmp = ID2SYM(i_object_class); - if (option_given_p(opts, tmp)) { - json->object_class = rb_hash_aref(opts, tmp); - } else { - json->object_class = Qnil; - } - tmp = ID2SYM(i_array_class); - if (option_given_p(opts, tmp)) { - json->array_class = rb_hash_aref(opts, tmp); - } else { - json->array_class = Qnil; - } - tmp = ID2SYM(i_decimal_class); - if (option_given_p(opts, tmp)) { - json->decimal_class = rb_hash_aref(opts, tmp); - } else { - json->decimal_class = Qnil; - } - tmp = ID2SYM(i_match_string); - if (option_given_p(opts, tmp)) { - VALUE match_string = rb_hash_aref(opts, tmp); - json->match_string = RTEST(match_string) ? match_string : Qnil; - } else { - json->match_string = Qnil; - } - } else { - json->max_nesting = 100; - json->allow_nan = 0; - json->create_additions = 0; - json->create_id = Qnil; - json->object_class = Qnil; - json->array_class = Qnil; - json->decimal_class = Qnil; - } - source = convert_encoding(StringValue(source)); - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source); - json->Vsource = source; + parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil); return self; } -#line 1941 "parser.c" +#line 2725 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -1945,7 +2729,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 849 "parser.rl" +#line 1196 "parser.rl" /* @@ -1962,17 +2746,28 @@ static VALUE cParser_parse(VALUE self) VALUE result = Qnil; GET_PARSER; + char stack_buffer[FBUFFER_STACK_SIZE]; + fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; + json->stack = &stack; -#line 1967 "parser.c" + +#line 2762 "parser.c" { cs = JSON_start; } -#line 866 "parser.rl" +#line 1224 "parser.rl" p = json->source; pe = p + json->len; -#line 1976 "parser.c" +#line 2771 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2006,7 +2801,7 @@ st0: cs = 0; goto _out; tr2: -#line 841 "parser.rl" +#line 1188 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2016,7 +2811,7 @@ st10: if ( ++p == pe ) goto _test_eof10; case 10: -#line 2020 "parser.c" +#line 2815 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2105,7 +2900,11 @@ case 9: _out: {} } -#line 869 "parser.rl" +#line 1227 "parser.rl" + + if (json->stack_handle) { + rvalue_stack_eagerly_release(json->stack_handle); + } if (cs >= JSON_first_final && p == pe) { return result; @@ -2115,18 +2914,183 @@ case 9: } } -#ifndef HAVE_RB_GC_MARK_LOCATIONS -// For TruffleRuby -void rb_gc_mark_locations(const VALUE *start, const VALUE *end) +static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) { - VALUE *value = start; + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; - while (value < end) { - rb_gc_mark(*value); - value++; + JSON_Parser _parser = {0}; + JSON_Parser *json = &_parser; + parser_init(json, source, opts); + + char stack_buffer[FBUFFER_STACK_SIZE]; + fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; + json->stack = &stack; + + +#line 2940 "parser.c" + { + cs = JSON_start; + } + +#line 1262 "parser.rl" + p = json->source; + pe = p + json->len; + +#line 2949 "parser.c" + { + if ( p == pe ) + goto _test_eof; + switch ( cs ) + { +st1: + if ( ++p == pe ) + goto _test_eof1; +case 1: + switch( (*p) ) { + case 13: goto st1; + case 32: goto st1; + case 34: goto tr2; + case 45: goto tr2; + case 47: goto st6; + case 73: goto tr2; + case 78: goto tr2; + case 91: goto tr2; + case 102: goto tr2; + case 110: goto tr2; + case 116: goto tr2; + case 123: goto tr2; + } + if ( (*p) > 10 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto tr2; + } else if ( (*p) >= 9 ) + goto st1; + goto st0; +st0: +cs = 0; + goto _out; +tr2: +#line 1188 "parser.rl" + { + char *np = JSON_parse_value(json, p, pe, &result, 0); + if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} + } + goto st10; +st10: + if ( ++p == pe ) + goto _test_eof10; +case 10: +#line 2993 "parser.c" + switch( (*p) ) { + case 13: goto st10; + case 32: goto st10; + case 47: goto st2; + } + if ( 9 <= (*p) && (*p) <= 10 ) + goto st10; + goto st0; +st2: + if ( ++p == pe ) + goto _test_eof2; +case 2: + switch( (*p) ) { + case 42: goto st3; + case 47: goto st5; + } + goto st0; +st3: + if ( ++p == pe ) + goto _test_eof3; +case 3: + if ( (*p) == 42 ) + goto st4; + goto st3; +st4: + if ( ++p == pe ) + goto _test_eof4; +case 4: + switch( (*p) ) { + case 42: goto st4; + case 47: goto st10; + } + goto st3; +st5: + if ( ++p == pe ) + goto _test_eof5; +case 5: + if ( (*p) == 10 ) + goto st10; + goto st5; +st6: + if ( ++p == pe ) + goto _test_eof6; +case 6: + switch( (*p) ) { + case 42: goto st7; + case 47: goto st9; + } + goto st0; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: + if ( (*p) == 42 ) + goto st8; + goto st7; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + switch( (*p) ) { + case 42: goto st8; + case 47: goto st1; + } + goto st7; +st9: + if ( ++p == pe ) + goto _test_eof9; +case 9: + if ( (*p) == 10 ) + goto st1; + goto st9; + } + _test_eof1: cs = 1; goto _test_eof; + _test_eof10: cs = 10; goto _test_eof; + _test_eof2: cs = 2; goto _test_eof; + _test_eof3: cs = 3; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; + _test_eof5: cs = 5; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; + + _test_eof: {} + _out: {} + } + +#line 1265 "parser.rl" + + if (json->stack_handle) { + rvalue_stack_eagerly_release(json->stack_handle); + } + + if (cs >= JSON_first_final && p == pe) { + return result; + } else { + raise_parse_error("unexpected token at '%s'", p); + return Qnil; } } -#endif static void JSON_mark(void *ptr) { @@ -2137,6 +3101,8 @@ static void JSON_mark(void *ptr) rb_gc_mark(json->array_class); rb_gc_mark(json->decimal_class); rb_gc_mark(json->match_string); + rb_gc_mark(json->stack_handle); + const VALUE *name_cache_entries = &json->name_cache.entries[0]; rb_gc_mark_locations(name_cache_entries, name_cache_entries + json->name_cache.length); } @@ -2199,6 +3165,8 @@ void Init_parser(void) rb_define_method(cParser, "parse", cParser_parse, 0); rb_define_method(cParser, "source", cParser_source, 0); + rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); + CNaN = rb_const_get(mJSON, rb_intern("NaN")); rb_gc_register_mark_object(CNaN); @@ -2211,31 +3179,35 @@ void Init_parser(void) rb_global_variable(&Encoding_UTF_8); Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); + sym_max_nesting = ID2SYM(rb_intern("max_nesting")); + sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); + sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); + sym_freeze = ID2SYM(rb_intern("freeze")); + sym_create_additions = ID2SYM(rb_intern("create_additions")); + sym_create_id = ID2SYM(rb_intern("create_id")); + sym_object_class = ID2SYM(rb_intern("object_class")); + sym_array_class = ID2SYM(rb_intern("array_class")); + sym_decimal_class = ID2SYM(rb_intern("decimal_class")); + sym_match_string = ID2SYM(rb_intern("match_string")); + + i_create_id = rb_intern("create_id"); i_json_creatable_p = rb_intern("json_creatable?"); i_json_create = rb_intern("json_create"); - i_create_id = rb_intern("create_id"); - i_create_additions = rb_intern("create_additions"); i_chr = rb_intern("chr"); - i_max_nesting = rb_intern("max_nesting"); - i_allow_nan = rb_intern("allow_nan"); - i_symbolize_names = rb_intern("symbolize_names"); - i_object_class = rb_intern("object_class"); - i_array_class = rb_intern("array_class"); - i_decimal_class = rb_intern("decimal_class"); i_match = rb_intern("match"); - i_match_string = rb_intern("match_string"); i_deep_const_get = rb_intern("deep_const_get"); i_aset = rb_intern("[]="); i_aref = rb_intern("[]"); i_leftshift = rb_intern("<<"); i_new = rb_intern("new"); i_try_convert = rb_intern("try_convert"); - i_freeze = rb_intern("freeze"); i_uminus = rb_intern("-@"); i_encode = rb_intern("encode"); binary_encindex = rb_ascii8bit_encindex(); utf8_encindex = rb_utf8_encindex(); + enc_utf8 = rb_utf8_encoding(); } /* diff --git a/ext/json/parser/parser.h b/ext/json/parser/parser.h deleted file mode 100644 index d1863a2b9a..0000000000 --- a/ext/json/parser/parser.h +++ /dev/null @@ -1,78 +0,0 @@ -#ifndef _PARSER_H_ -#define _PARSER_H_ - -#include "ruby.h" - -/* This is the fallback definition from Ruby 3.4 */ -#ifndef RBIMPL_STDBOOL_H -#if defined(__cplusplus) -# if defined(HAVE_STDBOOL_H) && (__cplusplus >= 201103L) -# include <cstdbool> -# endif -#elif defined(HAVE_STDBOOL_H) -# include <stdbool.h> -#elif !defined(HAVE__BOOL) -typedef unsigned char _Bool; -# define bool _Bool -# define true ((_Bool)+1) -# define false ((_Bool)+0) -# define __bool_true_false_are_defined -#endif -#endif - -#ifndef MAYBE_UNUSED -# define MAYBE_UNUSED(x) x -#endif - -#define option_given_p(opts, key) (rb_hash_lookup2(opts, key, Qundef) != Qundef) - -typedef struct JSON_ParserStruct { - VALUE Vsource; - char *source; - long len; - char *memo; - VALUE create_id; - VALUE object_class; - VALUE array_class; - VALUE decimal_class; - VALUE match_string; - FBuffer fbuffer; - int max_nesting; - char allow_nan; - char parsing_name; - char symbolize_names; - char freeze; - char create_additions; - char deprecated_create_additions; -} JSON_Parser; - -#define GET_PARSER \ - GET_PARSER_INIT; \ - if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") -#define GET_PARSER_INIT \ - JSON_Parser *json; \ - TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) - -#define MinusInfinity "-Infinity" -#define EVIL 0x666 - -static uint32_t unescape_unicode(const unsigned char *p); -static int convert_UTF32_to_UTF8(char *buf, uint32_t ch); -static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize); -static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); -static VALUE convert_encoding(VALUE source); -static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self); -static VALUE cParser_parse(VALUE self); -static void JSON_mark(void *json); -static void JSON_free(void *json); -static VALUE cJSON_parser_s_allocate(VALUE klass); -static VALUE cParser_source(VALUE self); - -static const rb_data_type_t JSON_Parser_type; - -#endif diff --git a/ext/json/parser/parser.rl b/ext/json/parser/parser.rl index 15ec2b6843..6d4cc7a5b0 100644 --- a/ext/json/parser/parser.rl +++ b/ext/json/parser/parser.rl @@ -1,5 +1,308 @@ +#include "ruby.h" #include "../fbuffer/fbuffer.h" -#include "parser.h" + +static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; +static VALUE CNaN, CInfinity, CMinusInfinity; + +static ID i_json_creatable_p, i_json_create, i_create_id, + i_chr, i_deep_const_get, i_match, i_aset, i_aref, + i_leftshift, i_new, i_try_convert, i_uminus, i_encode; + +static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze, + sym_create_additions, sym_create_id, sym_object_class, sym_array_class, + sym_decimal_class, sym_match_string; + +static int binary_encindex; +static int utf8_encindex; + +#ifndef HAVE_RB_GC_MARK_LOCATIONS +// For TruffleRuby +void rb_gc_mark_locations(const VALUE *start, const VALUE *end) +{ + VALUE *value = start; + + while (value < end) { + rb_gc_mark(*value); + value++; + } +} +#endif + +#ifndef HAVE_RB_HASH_BULK_INSERT +// For TruffleRuby +void rb_hash_bulk_insert(long count, const VALUE *pairs, VALUE hash) +{ + long index = 0; + while (index < count) { + VALUE name = pairs[index++]; + VALUE value = pairs[index++]; + rb_hash_aset(hash, name, value); + } + RB_GC_GUARD(hash); +} +#endif + +/* name cache */ + +#include <string.h> +#include <ctype.h> + +// Object names are likely to be repeated, and are frozen. +// As such we can re-use them if we keep a cache of the ones we've seen so far, +// and save much more expensive lookups into the global fstring table. +// This cache implementation is deliberately simple, as we're optimizing for compactness, +// to be able to fit safely on the stack. +// As such, binary search into a sorted array gives a good tradeoff between compactness and +// performance. +#define JSON_RVALUE_CACHE_CAPA 63 +typedef struct rvalue_cache_struct { + int length; + VALUE entries[JSON_RVALUE_CACHE_CAPA]; +} rvalue_cache; + +static rb_encoding *enc_utf8; + +#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55 + +static inline VALUE build_interned_string(const char *str, const long length) +{ +# ifdef HAVE_RB_ENC_INTERNED_STR + return rb_enc_interned_str(str, length, enc_utf8); +# else + VALUE rstring = rb_utf8_str_new(str, length); + return rb_funcall(rb_str_freeze(rstring), i_uminus, 0); +# endif +} + +static inline VALUE build_symbol(const char *str, const long length) +{ + return rb_str_intern(build_interned_string(str, length)); +} + +static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring) +{ + MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index); + cache->length++; + cache->entries[index] = rstring; +} + +static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring) +{ + long rstring_length = RSTRING_LEN(rstring); + if (length == rstring_length) { + return memcmp(str, RSTRING_PTR(rstring), length); + } else { + return (int)(length - rstring_length); + } +} + +static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { + // Common names aren't likely to be very long. So we just don't + // cache names above an arbitrary threshold. + return Qfalse; + } + + if (RB_UNLIKELY(!isalpha(str[0]))) { + // Simple heuristic, if the first character isn't a letter, + // we're much less likely to see this string again. + // We mostly want to cache strings that are likely to be repeated. + return Qfalse; + } + + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, entry); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + if (RB_UNLIKELY(memchr(str, '\\', length))) { + // We assume the overwhelming majority of names don't need to be escaped. + // But if they do, we have to fallback to the slow path. + return Qfalse; + } + + VALUE rstring = build_interned_string(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rstring); + } + return rstring; +} + +static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const long length) +{ + if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) { + // Common names aren't likely to be very long. So we just don't + // cache names above an arbitrary threshold. + return Qfalse; + } + + if (RB_UNLIKELY(!isalpha(str[0]))) { + // Simple heuristic, if the first character isn't a letter, + // we're much less likely to see this string again. + // We mostly want to cache strings that are likely to be repeated. + return Qfalse; + } + + int low = 0; + int high = cache->length - 1; + int mid = 0; + int last_cmp = 0; + + while (low <= high) { + mid = (high + low) >> 1; + VALUE entry = cache->entries[mid]; + last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry)); + + if (last_cmp == 0) { + return entry; + } else if (last_cmp > 0) { + low = mid + 1; + } else { + high = mid - 1; + } + } + + if (RB_UNLIKELY(memchr(str, '\\', length))) { + // We assume the overwhelming majority of names don't need to be escaped. + // But if they do, we have to fallback to the slow path. + return Qfalse; + } + + VALUE rsymbol = build_symbol(str, length); + + if (cache->length < JSON_RVALUE_CACHE_CAPA) { + if (last_cmp > 0) { + mid += 1; + } + + rvalue_cache_insert_at(cache, mid, rsymbol); + } + return rsymbol; +} + +/* rvalue stack */ + +#define RVALUE_STACK_INITIAL_CAPA 128 + +enum rvalue_stack_type { + RVALUE_STACK_HEAP_ALLOCATED = 0, + RVALUE_STACK_STACK_ALLOCATED = 1, +}; + +typedef struct rvalue_stack_struct { + enum rvalue_stack_type type; + long capa; + long head; + VALUE *ptr; +} rvalue_stack; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref); + +static rvalue_stack *rvalue_stack_grow(rvalue_stack *stack, VALUE *handle, rvalue_stack **stack_ref) +{ + long required = stack->capa * 2; + + if (stack->type == RVALUE_STACK_STACK_ALLOCATED) { + stack = rvalue_stack_spill(stack, handle, stack_ref); + } else { + REALLOC_N(stack->ptr, VALUE, required); + stack->capa = required; + } + return stack; +} + +static void rvalue_stack_push(rvalue_stack *stack, VALUE value, VALUE *handle, rvalue_stack **stack_ref) +{ + if (RB_UNLIKELY(stack->head >= stack->capa)) { + stack = rvalue_stack_grow(stack, handle, stack_ref); + } + stack->ptr[stack->head] = value; + stack->head++; +} + +static inline VALUE *rvalue_stack_peek(rvalue_stack *stack, long count) +{ + return stack->ptr + (stack->head - count); +} + +static inline void rvalue_stack_pop(rvalue_stack *stack, long count) +{ + stack->head -= count; +} + +static void rvalue_stack_mark(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + rb_gc_mark_locations(stack->ptr, stack->ptr + stack->head); +} + +static void rvalue_stack_free(void *ptr) +{ + rvalue_stack *stack = (rvalue_stack *)ptr; + if (stack) { + ruby_xfree(stack->ptr); + ruby_xfree(stack); + } +} + +static size_t rvalue_stack_memsize(const void *ptr) +{ + const rvalue_stack *stack = (const rvalue_stack *)ptr; + return sizeof(rvalue_stack) + sizeof(VALUE) * stack->capa; +} + +static const rb_data_type_t JSON_Parser_rvalue_stack_type = { + "JSON::Ext::Parser/rvalue_stack", + { + .dmark = rvalue_stack_mark, + .dfree = rvalue_stack_free, + .dsize = rvalue_stack_memsize, + }, + 0, 0, + RUBY_TYPED_FREE_IMMEDIATELY, +}; + +static rvalue_stack *rvalue_stack_spill(rvalue_stack *old_stack, VALUE *handle, rvalue_stack **stack_ref) +{ + rvalue_stack *stack; + *handle = TypedData_Make_Struct(0, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + *stack_ref = stack; + MEMCPY(stack, old_stack, rvalue_stack, 1); + + stack->capa = old_stack->capa << 1; + stack->ptr = ALLOC_N(VALUE, stack->capa); + stack->type = RVALUE_STACK_HEAP_ALLOCATED; + MEMCPY(stack->ptr, old_stack->ptr, VALUE, old_stack->head); + return stack; +} + +static void rvalue_stack_eagerly_release(VALUE handle) +{ + rvalue_stack *stack; + TypedData_Get_Struct(handle, rvalue_stack, &JSON_Parser_rvalue_stack_type, stack); + RTYPEDDATA_DATA(handle) = NULL; + rvalue_stack_free(stack); +} /* unicode */ @@ -67,6 +370,50 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch) return len; } +typedef struct JSON_ParserStruct { + VALUE Vsource; + char *source; + long len; + char *memo; + VALUE create_id; + VALUE object_class; + VALUE array_class; + VALUE decimal_class; + VALUE match_string; + FBuffer fbuffer; + int max_nesting; + bool allow_nan; + bool allow_trailing_comma; + bool parsing_name; + bool symbolize_names; + bool freeze; + bool create_additions; + bool deprecated_create_additions; + rvalue_cache name_cache; + rvalue_stack *stack; + VALUE stack_handle; +} JSON_Parser; + +#define GET_PARSER \ + GET_PARSER_INIT; \ + if (!json->Vsource) rb_raise(rb_eTypeError, "uninitialized instance") + +#define GET_PARSER_INIT \ + JSON_Parser *json; \ + TypedData_Get_Struct(self, JSON_Parser, &JSON_Parser_type, json) + +#define MinusInfinity "-Infinity" +#define EVIL 0x666 + +static const rb_data_type_t JSON_Parser_type; +static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); +static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); + + #define PARSE_ERROR_FRAGMENT_LEN 32 #ifdef RBIMPL_ATTR_NORETURN RBIMPL_ATTR_NORETURN() @@ -84,21 +431,9 @@ static void raise_parse_error(const char *format, const char *start) ptr = buffer; } - rb_enc_raise(rb_utf8_encoding(), rb_path2class("JSON::ParserError"), format, ptr); + rb_enc_raise(enc_utf8, rb_path2class("JSON::ParserError"), format, ptr); } -static VALUE mJSON, mExt, cParser, eNestingError, Encoding_UTF_8; -static VALUE CNaN, CInfinity, CMinusInfinity; - -static ID i_json_creatable_p, i_json_create, i_create_id, i_create_additions, - i_chr, i_max_nesting, i_allow_nan, i_symbolize_names, - i_object_class, i_array_class, i_decimal_class, - i_deep_const_get, i_match, i_match_string, i_aset, i_aref, - i_leftshift, i_new, i_try_convert, i_freeze, i_uminus, i_encode; - -static int binary_encindex; -static int utf8_encindex; - %%{ machine JSON_common; @@ -135,27 +470,25 @@ static int utf8_encindex; write data; action parse_value { - VALUE v = Qnil; - char *np = JSON_parse_value(json, fpc, pe, &v, current_nesting); + char *np = JSON_parse_value(json, fpc, pe, result, current_nesting); if (np == NULL) { fhold; fbreak; } else { - if (NIL_P(json->object_class)) { - OBJ_FREEZE(last_name); - rb_hash_aset(*result, last_name, v); - } else { - rb_funcall(*result, i_aset, 2, last_name, v); - } fexec np; } } + action allow_trailing_comma { json->allow_trailing_comma } + action parse_name { char *np; - json->parsing_name = 1; - np = JSON_parse_string(json, fpc, pe, &last_name); - json->parsing_name = 0; - if (np == NULL) { fhold; fbreak; } else fexec np; + json->parsing_name = true; + np = JSON_parse_string(json, fpc, pe, result); + json->parsing_name = false; + if (np == NULL) { fhold; fbreak; } else { + PUSH(*result); + fexec np; + } } action exit { fhold; fbreak; } @@ -165,33 +498,57 @@ static int utf8_encindex; main := ( begin_object - (pair (next_pair)*)? ignore* + (pair (next_pair)*((ignore* value_separator) when allow_trailing_comma)?)? ignore* end_object ) @exit; }%% +#define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) + static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; - VALUE last_name = Qnil; - VALUE object_class = json->object_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = NIL_P(object_class) ? rb_hash_new() : rb_class_new_instance(0, 0, object_class); + long stack_head = json->stack->head; %% write init; %% write exec; if (cs >= JSON_object_first_final) { - if (json->create_additions) { + long count = json->stack->head - stack_head; + + if (RB_UNLIKELY(json->object_class)) { + VALUE object = rb_class_new_instance(0, 0, json->object_class); + long index = 0; + VALUE *items = rvalue_stack_peek(json->stack, count); + while (index < count) { + VALUE name = items[index++]; + VALUE value = items[index++]; + rb_funcall(object, i_aset, 2, name, value); + } + *result = object; + } else { + VALUE hash; +#ifdef HAVE_RB_HASH_NEW_CAPA + hash = rb_hash_new_capa(count >> 1); +#else + hash = rb_hash_new(); +#endif + rb_hash_bulk_insert(count, rvalue_stack_peek(json->stack, count), hash); + *result = hash; + } + rvalue_stack_pop(json->stack, count); + + if (RB_UNLIKELY(json->create_additions)) { VALUE klassname; - if (NIL_P(json->object_class)) { - klassname = rb_hash_aref(*result, json->create_id); + if (json->object_class) { + klassname = rb_funcall(*result, i_aref, 1, json->create_id); } else { - klassname = rb_funcall(*result, i_aref, 1, json->create_id); + klassname = rb_hash_aref(*result, json->create_id); } if (!NIL_P(klassname)) { VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); @@ -209,7 +566,6 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu } } - %%{ machine JSON_value; include JSON_common; @@ -241,7 +597,12 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu } action parse_string { char *np = JSON_parse_string(json, fpc, pe, result); - if (np == NULL) { fhold; fbreak; } else fexec np; + if (np == NULL) { + fhold; + fbreak; + } else { + fexec np; + } } action parse_number { @@ -256,9 +617,13 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu } } np = JSON_parse_float(json, fpc, pe, result); - if (np != NULL) fexec np; + if (np != NULL) { + fexec np; + } np = JSON_parse_integer(json, fpc, pe, result); - if (np != NULL) fexec np; + if (np != NULL) { + fexec np; + } fhold; fbreak; } @@ -301,6 +666,7 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul } if (cs >= JSON_value_first_final) { + PUSH(*result); return p; } else { return NULL; @@ -362,7 +728,7 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul if (cs >= JSON_float_first_final) { VALUE mod = Qnil; ID method_id = 0; - if (!NIL_P(json->decimal_class)) { + if (json->decimal_class) { if (rb_respond_to(json->decimal_class, i_try_convert)) { mod = json->decimal_class; method_id = i_try_convert; @@ -421,39 +787,51 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul if (np == NULL) { fhold; fbreak; } else { - if (NIL_P(json->array_class)) { - rb_ary_push(*result, v); - } else { - rb_funcall(*result, i_leftshift, 1, v); - } fexec np; } } + action allow_trailing_comma { json->allow_trailing_comma } + action exit { fhold; fbreak; } next_element = value_separator ignore* begin_value >parse_value; main := begin_array ignore* ((begin_value >parse_value ignore*) - (ignore* next_element ignore*)*)? + (ignore* next_element ignore*)*((value_separator ignore*) when allow_trailing_comma)?)? end_array @exit; }%% static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) { int cs = EVIL; - VALUE array_class = json->array_class; if (json->max_nesting && current_nesting > json->max_nesting) { rb_raise(eNestingError, "nesting of %d is too deep", current_nesting); } - *result = NIL_P(array_class) ? rb_ary_new() : rb_class_new_instance(0, 0, array_class); + long stack_head = json->stack->head; %% write init; %% write exec; if(cs >= JSON_array_first_final) { + long count = json->stack->head - stack_head; + + if (RB_UNLIKELY(json->array_class)) { + VALUE array = rb_class_new_instance(0, 0, json->array_class); + VALUE *items = rvalue_stack_peek(json->stack, count); + long index; + for (index = 0; index < count; index++) { + rb_funcall(array, i_leftshift, 1, items[index]); + } + *result = array; + } else { + VALUE array = rb_ary_new_from_values(count, rvalue_stack_peek(json->stack, count)); + *result = array; + } + rvalue_stack_pop(json->stack, count); + return p + 1; } else { raise_parse_error("unexpected token at '%s'", p); @@ -469,7 +847,7 @@ static inline VALUE build_string(const char *start, const char *end, bool intern VALUE result; # ifdef HAVE_RB_ENC_INTERNED_STR if (intern) { - result = rb_enc_interned_str(start, (long)(end - start), rb_utf8_encoding()); + result = rb_enc_interned_str(start, (long)(end - start), enc_utf8); } else { result = rb_utf8_str_new(start, (long)(end - start)); } @@ -487,13 +865,26 @@ static inline VALUE build_string(const char *start, const char *end, bool intern return result; } -static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bool symbolize) +static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) { size_t bufferSize = stringEnd - string; char *p = string, *pe = string, *unescape, *bufferStart, *buffer; int unescape_len; char buf[4]; + if (is_name) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + pe = memchr(p, '\\', bufferSize); if (RB_LIKELY(pe == NULL)) { return build_string(string, stringEnd, intern, symbolize); @@ -602,7 +993,7 @@ static VALUE json_string_unescape(char *string, char *stringEnd, bool intern, bo write data; action parse_string { - *result = json_string_unescape(json->memo + 1, p, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); if (NIL_P(*result)) { fhold; fbreak; @@ -671,7 +1062,7 @@ static VALUE convert_encoding(VALUE source) { int encindex = RB_ENCODING_GET(source); - if (encindex == utf8_encindex) { + if (RB_LIKELY(encindex == utf8_encindex)) { return source; } @@ -683,6 +1074,68 @@ static VALUE convert_encoding(VALUE source) return rb_funcall(source, i_encode, 1, Encoding_UTF_8); } +static int configure_parser_i(VALUE key, VALUE val, VALUE data) +{ + JSON_Parser *json = (JSON_Parser *)data; + + if (key == sym_max_nesting) { json->max_nesting = RTEST(val) ? FIX2INT(val) : 0; } + else if (key == sym_allow_nan) { json->allow_nan = RTEST(val); } + else if (key == sym_allow_trailing_comma) { json->allow_trailing_comma = RTEST(val); } + else if (key == sym_symbolize_names) { json->symbolize_names = RTEST(val); } + else if (key == sym_freeze) { json->freeze = RTEST(val); } + else if (key == sym_create_id) { json->create_id = RTEST(val) ? val : Qfalse; } + else if (key == sym_object_class) { json->object_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_array_class) { json->array_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_decimal_class) { json->decimal_class = RTEST(val) ? val : Qfalse; } + else if (key == sym_match_string) { json->match_string = RTEST(val) ? val : Qfalse; } + else if (key == sym_create_additions) { + if (NIL_P(val)) { + json->create_additions = true; + json->deprecated_create_additions = true; + } else { + json->create_additions = RTEST(val); + json->deprecated_create_additions = false; + } + } + + return ST_CONTINUE; +} + +static void parser_init(JSON_Parser *json, VALUE source, VALUE opts) +{ + if (json->Vsource) { + rb_raise(rb_eTypeError, "already initialized instance"); + } + + json->fbuffer.initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; + json->max_nesting = 100; + + if (!NIL_P(opts)) { + Check_Type(opts, T_HASH); + if (RHASH_SIZE(opts) > 0) { + // We assume in most cases few keys are set so it's faster to go over + // the provided keys than to check all possible keys. + rb_hash_foreach(opts, configure_parser_i, (VALUE)json); + + if (json->symbolize_names && json->create_additions) { + rb_raise(rb_eArgError, + "options :symbolize_names and :create_additions cannot be " + " used in conjunction"); + } + + if (json->create_additions && !json->create_id) { + json->create_id = rb_funcall(mJSON, i_create_id, 0); + } + } + + } + source = convert_encoding(StringValue(source)); + StringValue(source); + json->len = RSTRING_LEN(source); + json->source = RSTRING_PTR(source); + json->Vsource = source; +} + /* * call-seq: new(source, opts => {}) * @@ -717,117 +1170,11 @@ static VALUE convert_encoding(VALUE source) */ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) { - VALUE source, opts; GET_PARSER_INIT; - if (json->Vsource) { - rb_raise(rb_eTypeError, "already initialized instance"); - } - rb_check_arity(argc, 1, 2); - source = argv[0]; - opts = Qnil; - if (argc == 2) { - opts = argv[1]; - Check_Type(argv[1], T_HASH); - if (RHASH_SIZE(argv[1]) > 0) { - opts = argv[1]; - } - } - - if (!NIL_P(opts)) { - VALUE tmp = ID2SYM(i_max_nesting); - if (option_given_p(opts, tmp)) { - VALUE max_nesting = rb_hash_aref(opts, tmp); - if (RTEST(max_nesting)) { - Check_Type(max_nesting, T_FIXNUM); - json->max_nesting = FIX2INT(max_nesting); - } else { - json->max_nesting = 0; - } - } else { - json->max_nesting = 100; - } - tmp = ID2SYM(i_allow_nan); - if (option_given_p(opts, tmp)) { - json->allow_nan = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->allow_nan = 0; - } - tmp = ID2SYM(i_symbolize_names); - if (option_given_p(opts, tmp)) { - json->symbolize_names = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->symbolize_names = 0; - } - tmp = ID2SYM(i_freeze); - if (option_given_p(opts, tmp)) { - json->freeze = RTEST(rb_hash_aref(opts, tmp)) ? 1 : 0; - } else { - json->freeze = 0; - } - tmp = ID2SYM(i_create_additions); - if (option_given_p(opts, tmp)) { - tmp = rb_hash_aref(opts, tmp); - if (NIL_P(tmp)) { - json->create_additions = 1; - json->deprecated_create_additions = 1; - } else { - json->create_additions = RTEST(tmp); - json->deprecated_create_additions = 0; - } - } - if (json->symbolize_names && json->create_additions) { - rb_raise(rb_eArgError, - "options :symbolize_names and :create_additions cannot be " - " used in conjunction"); - } - tmp = ID2SYM(i_create_id); - if (option_given_p(opts, tmp)) { - json->create_id = rb_hash_aref(opts, tmp); - } else { - json->create_id = rb_funcall(mJSON, i_create_id, 0); - } - tmp = ID2SYM(i_object_class); - if (option_given_p(opts, tmp)) { - json->object_class = rb_hash_aref(opts, tmp); - } else { - json->object_class = Qnil; - } - tmp = ID2SYM(i_array_class); - if (option_given_p(opts, tmp)) { - json->array_class = rb_hash_aref(opts, tmp); - } else { - json->array_class = Qnil; - } - tmp = ID2SYM(i_decimal_class); - if (option_given_p(opts, tmp)) { - json->decimal_class = rb_hash_aref(opts, tmp); - } else { - json->decimal_class = Qnil; - } - tmp = ID2SYM(i_match_string); - if (option_given_p(opts, tmp)) { - VALUE match_string = rb_hash_aref(opts, tmp); - json->match_string = RTEST(match_string) ? match_string : Qnil; - } else { - json->match_string = Qnil; - } - } else { - json->max_nesting = 100; - json->allow_nan = 0; - json->create_additions = 0; - json->create_id = Qnil; - json->object_class = Qnil; - json->array_class = Qnil; - json->decimal_class = Qnil; - } - source = convert_encoding(StringValue(source)); - StringValue(source); - json->len = RSTRING_LEN(source); - json->source = RSTRING_PTR(source); - json->Vsource = source; + parser_init(json, argv[0], argc == 2 ? argv[1] : Qnil); return self; } @@ -862,11 +1209,26 @@ static VALUE cParser_parse(VALUE self) VALUE result = Qnil; GET_PARSER; + char stack_buffer[FBUFFER_STACK_SIZE]; + fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; + json->stack = &stack; + %% write init; p = json->source; pe = p + json->len; %% write exec; + if (json->stack_handle) { + rvalue_stack_eagerly_release(json->stack_handle); + } + if (cs >= JSON_first_final && p == pe) { return result; } else { @@ -875,18 +1237,43 @@ static VALUE cParser_parse(VALUE self) } } -#ifndef HAVE_RB_GC_MARK_LOCATIONS -// For TruffleRuby -void rb_gc_mark_locations(const VALUE *start, const VALUE *end) +static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) { - VALUE *value = start; + char *p, *pe; + int cs = EVIL; + VALUE result = Qnil; - while (value < end) { - rb_gc_mark(*value); - value++; + JSON_Parser _parser = {0}; + JSON_Parser *json = &_parser; + parser_init(json, source, opts); + + char stack_buffer[FBUFFER_STACK_SIZE]; + fbuffer_stack_init(&json->fbuffer, FBUFFER_INITIAL_LENGTH_DEFAULT, stack_buffer, FBUFFER_STACK_SIZE); + + VALUE rvalue_stack_buffer[RVALUE_STACK_INITIAL_CAPA]; + rvalue_stack stack = { + .type = RVALUE_STACK_STACK_ALLOCATED, + .ptr = rvalue_stack_buffer, + .capa = RVALUE_STACK_INITIAL_CAPA, + }; + json->stack = &stack; + + %% write init; + p = json->source; + pe = p + json->len; + %% write exec; + + if (json->stack_handle) { + rvalue_stack_eagerly_release(json->stack_handle); + } + + if (cs >= JSON_first_final && p == pe) { + return result; + } else { + raise_parse_error("unexpected token at '%s'", p); + return Qnil; } } -#endif static void JSON_mark(void *ptr) { @@ -897,6 +1284,8 @@ static void JSON_mark(void *ptr) rb_gc_mark(json->array_class); rb_gc_mark(json->decimal_class); rb_gc_mark(json->match_string); + rb_gc_mark(json->stack_handle); + const VALUE *name_cache_entries = &json->name_cache.entries[0]; rb_gc_mark_locations(name_cache_entries, name_cache_entries + json->name_cache.length); } @@ -959,6 +1348,8 @@ void Init_parser(void) rb_define_method(cParser, "parse", cParser_parse, 0); rb_define_method(cParser, "source", cParser_source, 0); + rb_define_singleton_method(cParser, "parse", cParser_m_parse, 2); + CNaN = rb_const_get(mJSON, rb_intern("NaN")); rb_gc_register_mark_object(CNaN); @@ -971,31 +1362,35 @@ void Init_parser(void) rb_global_variable(&Encoding_UTF_8); Encoding_UTF_8 = rb_const_get(rb_path2class("Encoding"), rb_intern("UTF_8")); + sym_max_nesting = ID2SYM(rb_intern("max_nesting")); + sym_allow_nan = ID2SYM(rb_intern("allow_nan")); + sym_allow_trailing_comma = ID2SYM(rb_intern("allow_trailing_comma")); + sym_symbolize_names = ID2SYM(rb_intern("symbolize_names")); + sym_freeze = ID2SYM(rb_intern("freeze")); + sym_create_additions = ID2SYM(rb_intern("create_additions")); + sym_create_id = ID2SYM(rb_intern("create_id")); + sym_object_class = ID2SYM(rb_intern("object_class")); + sym_array_class = ID2SYM(rb_intern("array_class")); + sym_decimal_class = ID2SYM(rb_intern("decimal_class")); + sym_match_string = ID2SYM(rb_intern("match_string")); + + i_create_id = rb_intern("create_id"); i_json_creatable_p = rb_intern("json_creatable?"); i_json_create = rb_intern("json_create"); - i_create_id = rb_intern("create_id"); - i_create_additions = rb_intern("create_additions"); i_chr = rb_intern("chr"); - i_max_nesting = rb_intern("max_nesting"); - i_allow_nan = rb_intern("allow_nan"); - i_symbolize_names = rb_intern("symbolize_names"); - i_object_class = rb_intern("object_class"); - i_array_class = rb_intern("array_class"); - i_decimal_class = rb_intern("decimal_class"); i_match = rb_intern("match"); - i_match_string = rb_intern("match_string"); i_deep_const_get = rb_intern("deep_const_get"); i_aset = rb_intern("[]="); i_aref = rb_intern("[]"); i_leftshift = rb_intern("<<"); i_new = rb_intern("new"); i_try_convert = rb_intern("try_convert"); - i_freeze = rb_intern("freeze"); i_uminus = rb_intern("-@"); i_encode = rb_intern("encode"); binary_encindex = rb_ascii8bit_encindex(); utf8_encindex = rb_utf8_encindex(); + enc_utf8 = rb_utf8_encoding(); } /* diff --git a/test/json/fixtures/fail4.json b/test/json/fixtures/fail4.json deleted file mode 100644 index 9de168bf34..0000000000 --- a/test/json/fixtures/fail4.json +++ /dev/null @@ -1 +0,0 @@ -["extra comma",]
\ No newline at end of file diff --git a/test/json/fixtures/fail9.json b/test/json/fixtures/fail9.json deleted file mode 100644 index 5815574f36..0000000000 --- a/test/json/fixtures/fail9.json +++ /dev/null @@ -1 +0,0 @@ -{"Extra comma": true,}
\ No newline at end of file diff --git a/test/json/json_common_interface_test.rb b/test/json/json_common_interface_test.rb index e552412bfd..6165cc0411 100644 --- a/test/json/json_common_interface_test.rb +++ b/test/json/json_common_interface_test.rb @@ -52,11 +52,11 @@ class JSONCommonInterfaceTest < Test::Unit::TestCase end def test_generator - assert_match(/::Generator\z/, JSON.generator.name) + assert_match(/::(TruffleRuby)?Generator\z/, JSON.generator.name) end def test_state - assert_match(/::Generator::State\z/, JSON.state.name) + assert_match(/::(TruffleRuby)?Generator::State\z/, JSON.state.name) end def test_create_id diff --git a/test/json/json_ext_parser_test.rb b/test/json/json_ext_parser_test.rb index 9db8ae772f..da61504989 100644 --- a/test/json/json_ext_parser_test.rb +++ b/test/json/json_ext_parser_test.rb @@ -2,53 +2,51 @@ require_relative 'test_helper' class JSONExtParserTest < Test::Unit::TestCase - if defined?(JSON::Ext::Parser) - include JSON - - def test_allocate - parser = JSON::Ext::Parser.new("{}") - assert_raise(TypeError, '[ruby-core:35079]') do - parser.__send__(:initialize, "{}") - end - parser = JSON::Ext::Parser.allocate - assert_raise(TypeError, '[ruby-core:35079]') { parser.source } - end + include JSON - def test_error_messages - ex = assert_raise(ParserError) { parse('Infinity') } - assert_equal "unexpected token at 'Infinity'", ex.message + def test_allocate + parser = JSON::Ext::Parser.new("{}") + assert_raise(TypeError, '[ruby-core:35079]') do + parser.__send__(:initialize, "{}") + end + parser = JSON::Ext::Parser.allocate + assert_raise(TypeError, '[ruby-core:35079]') { parser.source } + end - unless RUBY_PLATFORM =~ /java/ - ex = assert_raise(ParserError) { parse('-Infinity') } - assert_equal "unexpected token at '-Infinity'", ex.message - end + def test_error_messages + ex = assert_raise(ParserError) { parse('Infinity') } + assert_equal "unexpected token at 'Infinity'", ex.message - ex = assert_raise(ParserError) { parse('NaN') } - assert_equal "unexpected token at 'NaN'", ex.message + unless RUBY_PLATFORM =~ /java/ + ex = assert_raise(ParserError) { parse('-Infinity') } + assert_equal "unexpected token at '-Infinity'", ex.message end - if GC.respond_to?(:stress=) - def test_gc_stress_parser_new - payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10) - - previous_stress = GC.stress - JSON::Parser.new(payload).parse - ensure - GC.stress = previous_stress - end + ex = assert_raise(ParserError) { parse('NaN') } + assert_equal "unexpected token at 'NaN'", ex.message + end - def test_gc_stress - payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10) + if GC.respond_to?(:stress=) + def test_gc_stress_parser_new + payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10) - previous_stress = GC.stress - JSON.parse(payload) - ensure - GC.stress = previous_stress - end + previous_stress = GC.stress + JSON::Parser.new(payload).parse + ensure + GC.stress = previous_stress end - def parse(json) - JSON::Ext::Parser.new(json).parse + def test_gc_stress + payload = JSON.dump([{ foo: 1, bar: 2, baz: 3, egg: { spam: 4 } }] * 10) + + previous_stress = GC.stress + JSON.parse(payload) + ensure + GC.stress = previous_stress end end + + def parse(json) + JSON::Ext::Parser.new(json).parse + end end diff --git a/test/json/json_generator_test.rb b/test/json/json_generator_test.rb index 112c03b220..700220a152 100755 --- a/test/json/json_generator_test.rb +++ b/test/json/json_generator_test.rb @@ -343,27 +343,25 @@ class JSONGeneratorTest < Test::Unit::TestCase assert_equal '2', state.indent end - if defined?(JSON::Ext::Generator) - def test_broken_bignum # [ruby-core:38867] - pid = fork do - x = 1 << 64 - x.class.class_eval do - def to_s - end - end - begin - JSON::Ext::Generator::State.new.generate(x) - exit 1 - rescue TypeError - exit 0 + def test_broken_bignum # [ruby-core:38867] + pid = fork do + x = 1 << 64 + x.class.class_eval do + def to_s end end - _, status = Process.waitpid2(pid) - assert status.success? - rescue NotImplementedError - # forking to avoid modifying core class of a parent process and - # introducing race conditions of tests are run in parallel + begin + JSON::Ext::Generator::State.new.generate(x) + exit 1 + rescue TypeError + exit 0 + end end + _, status = Process.waitpid2(pid) + assert status.success? + rescue NotImplementedError + # forking to avoid modifying core class of a parent process and + # introducing race conditions of tests are run in parallel end def test_hash_likeness_set_symbol @@ -477,12 +475,20 @@ class JSONGeneratorTest < Test::Unit::TestCase end assert_includes error.message, "source sequence is illegal/malformed utf-8" - assert_raise(Encoding::UndefinedConversionError) do + assert_raise(JSON::GeneratorError) do + JSON.dump("\x82\xAC\xEF".b) + end + + assert_raise(JSON::GeneratorError) do "\x82\xAC\xEF".b.to_json end - assert_raise(Encoding::UndefinedConversionError) do - JSON.dump("\x82\xAC\xEF".b) + assert_raise(JSON::GeneratorError) do + ["\x82\xAC\xEF".b].to_json + end + + assert_raise(JSON::GeneratorError) do + { foo: "\x82\xAC\xEF".b }.to_json end end diff --git a/test/json/json_parser_test.rb b/test/json/json_parser_test.rb index adff91674d..8759ccd262 100644 --- a/test/json/json_parser_test.rb +++ b/test/json/json_parser_test.rb @@ -40,7 +40,7 @@ class JSONParserTest < Test::Unit::TestCase } assert_equal(Encoding::UTF_8, e.message.encoding, bug10705) assert_include(e.message, json, bug10705) - end if defined?(JSON::Ext::Parser) + end def test_parsing parser = JSON::Parser.new('"test"') @@ -180,7 +180,93 @@ class JSONParserTest < Test::Unit::TestCase assert parse('NaN', :allow_nan => true).nan? assert parse('Infinity', :allow_nan => true).infinite? assert parse('-Infinity', :allow_nan => true).infinite? - assert_raise(JSON::ParserError) { parse('[ 1, ]') } + end + + def test_parse_arrays_with_allow_trailing_comma + assert_equal([], parse('[]', allow_trailing_comma: true)) + assert_equal([], parse('[]', allow_trailing_comma: false)) + assert_raise(JSON::ParserError) { parse('[,]', allow_trailing_comma: true) } + assert_raise(JSON::ParserError) { parse('[,]', allow_trailing_comma: false) } + + assert_equal([1], parse('[1]', allow_trailing_comma: true)) + assert_equal([1], parse('[1]', allow_trailing_comma: false)) + assert_equal([1], parse('[1,]', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('[1,]', allow_trailing_comma: false) } + + assert_equal([1, 2, 3], parse('[1,2,3]', allow_trailing_comma: true)) + assert_equal([1, 2, 3], parse('[1,2,3]', allow_trailing_comma: false)) + assert_equal([1, 2, 3], parse('[1,2,3,]', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('[1,2,3,]', allow_trailing_comma: false) } + + assert_equal([1, 2, 3], parse('[ 1 , 2 , 3 ]', allow_trailing_comma: true)) + assert_equal([1, 2, 3], parse('[ 1 , 2 , 3 ]', allow_trailing_comma: false)) + assert_equal([1, 2, 3], parse('[ 1 , 2 , 3 , ]', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('[ 1 , 2 , 3 , ]', allow_trailing_comma: false) } + + assert_equal({'foo' => [1, 2, 3]}, parse('{ "foo": [1,2,3] }', allow_trailing_comma: true)) + assert_equal({'foo' => [1, 2, 3]}, parse('{ "foo": [1,2,3] }', allow_trailing_comma: false)) + assert_equal({'foo' => [1, 2, 3]}, parse('{ "foo": [1,2,3,] }', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('{ "foo": [1,2,3,] }', allow_trailing_comma: false) } + end + + def test_parse_object_with_allow_trailing_comma + assert_equal({}, parse('{}', allow_trailing_comma: true)) + assert_equal({}, parse('{}', allow_trailing_comma: false)) + assert_raise(JSON::ParserError) { parse('{,}', allow_trailing_comma: true) } + assert_raise(JSON::ParserError) { parse('{,}', allow_trailing_comma: false) } + + assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}', allow_trailing_comma: true)) + assert_equal({'foo'=>'bar'}, parse('{"foo":"bar"}', allow_trailing_comma: false)) + assert_equal({'foo'=>'bar'}, parse('{"foo":"bar",}', allow_trailing_comma: true)) + assert_raise(JSON::ParserError) { parse('{"foo":"bar",}', allow_trailing_comma: false) } + + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{"foo":"bar","baz":"qux","quux":"garply"}', allow_trailing_comma: true) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{"foo":"bar","baz":"qux","quux":"garply"}', allow_trailing_comma: false) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{"foo":"bar","baz":"qux","quux":"garply",}', allow_trailing_comma: true) + ) + assert_raise(JSON::ParserError) { + parse('{"foo":"bar","baz":"qux","quux":"garply",}', allow_trailing_comma: false) + } + + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" }', allow_trailing_comma: true) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" }', allow_trailing_comma: false) + ) + assert_equal( + {'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}, + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" , }', allow_trailing_comma: true) + ) + assert_raise(JSON::ParserError) { + parse('{ "foo":"bar" , "baz":"qux" , "quux":"garply" , }', allow_trailing_comma: false) + } + + assert_equal( + [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], + parse('[{"foo":"bar","baz":"qux","quux":"garply"}]', allow_trailing_comma: true) + ) + assert_equal( + [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], + parse('[{"foo":"bar","baz":"qux","quux":"garply"}]', allow_trailing_comma: false) + ) + assert_equal( + [{'foo'=>'bar', 'baz'=>'qux', 'quux'=>'garply'}], + parse('[{"foo":"bar","baz":"qux","quux":"garply",}]', allow_trailing_comma: true) + ) + assert_raise(JSON::ParserError) { + parse('[{"foo":"bar","baz":"qux","quux":"garply",}]', allow_trailing_comma: false) + } end def test_parse_some_strings @@ -533,7 +619,7 @@ class JSONParserTest < Test::Unit::TestCase error = assert_raise(JSON::ParserError) do JSON.parse('{"input":{"firstName":"Bob","lastName":"Mob","email":"[email protected]"}') end - if RUBY_ENGINE == "ruby" && defined?(JSON::Ext) + if RUBY_ENGINE == "ruby" assert_equal %(unexpected token at '{"input":{"firstName":"Bob","las'), error.message end end diff --git a/test/json/test_helper.rb b/test/json/test_helper.rb index 6fcb76edf8..11bb8ba8c2 100644 --- a/test/json/test_helper.rb +++ b/test/json/test_helper.rb @@ -1,30 +1,14 @@ -case ENV['JSON'] -when 'pure' - $LOAD_PATH.unshift(File.expand_path('../../../lib', __FILE__)) - $stderr.puts("Testing JSON::Pure") - require 'json/pure' -when 'ext' - $stderr.puts("Testing JSON::Ext") - $LOAD_PATH.unshift(File.expand_path('../../../ext', __FILE__), File.expand_path('../../../lib', __FILE__)) - require 'json/ext' -else - $LOAD_PATH.unshift(File.expand_path('../../../ext', __FILE__), File.expand_path('../../../lib', __FILE__)) - $stderr.puts("Testing JSON") - require 'json' -end +$LOAD_PATH.unshift(File.expand_path('../../../ext', __FILE__), File.expand_path('../../../lib', __FILE__)) +require 'json' require 'test/unit' -begin - require 'byebug' -rescue LoadError -end if GC.respond_to?(:verify_compaction_references) # This method was added in Ruby 3.0.0. Calling it this way asks the GC to # move objects around, helping to find object movement bugs. begin - GC.verify_compaction_references(double_heap: true, toward: :empty) - rescue NotImplementedError + GC.verify_compaction_references(expand_heap: true, toward: :empty) + rescue NotImplementedError, ArgumentError # Some platforms don't support compaction end end |