summaryrefslogtreecommitdiff
path: root/ext/json/generator/generator.c
diff options
context:
space:
mode:
authorJean Boussier <[email protected]>2024-10-29 18:14:12 +0100
committerHiroshi SHIBATA <[email protected]>2024-11-01 13:04:24 +0900
commitcc2e67a138d258290f727f5797bdc14fbc5a6e52 (patch)
tree2c85e628286a5e117705a98c1e100aa2430a2289 /ext/json/generator/generator.c
parent88b411464d85b735c833ea0029c37411deb2480f (diff)
Elide Generator::State allocation until a `to_json` method has to be called
Fix: https://github.com/ruby/json/issues/655 For very small documents, the biggest performance gap with alternatives is that the API impose that we allocate the `State` object. In a real world app this doesn't make much of a difference, but when running in a micro-benchmark this doubles the allocations, causing twice the amount of GC runs, making us look bad. However, unless we have to call a `to_json` method, the `State` object isn't visible, so with some refactoring, we can elude that allocation entirely. Instead we allocate the State internal struct on the stack, and if we need to call a `to_json` method, we allocate the `State` and spill the struct on the heap. As a result, `JSON.generate` is now as fast as re-using a `State` instance, as long as only primitives are generated. Before: ``` == Encoding small mixed (34 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json (reuse) 598.654k i/100ms json 400.542k i/100ms oj 533.353k i/100ms Calculating ------------------------------------- json (reuse) 6.371M (± 8.6%) i/s (156.96 ns/i) - 31.729M in 5.059195s json 4.120M (± 6.6%) i/s (242.72 ns/i) - 20.828M in 5.090549s oj 5.622M (± 6.4%) i/s (177.86 ns/i) - 28.268M in 5.061473s Comparison: json (reuse): 6371126.6 i/s oj: 5622452.0 i/s - same-ish: difference falls within error json: 4119991.1 i/s - 1.55x slower == Encoding small nested array (121 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json (reuse) 248.125k i/100ms json 215.255k i/100ms oj 217.531k i/100ms Calculating ------------------------------------- json (reuse) 2.628M (± 6.1%) i/s (380.55 ns/i) - 13.151M in 5.030281s json 2.185M (± 6.7%) i/s (457.74 ns/i) - 10.978M in 5.057655s oj 2.217M (± 6.7%) i/s (451.10 ns/i) - 11.094M in 5.044844s Comparison: json (reuse): 2627799.4 i/s oj: 2216824.8 i/s - 1.19x slower json: 2184669.5 i/s - 1.20x slower == Encoding small hash (65 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json (reuse) 641.334k i/100ms json 322.745k i/100ms oj 642.450k i/100ms Calculating ------------------------------------- json (reuse) 7.133M (± 6.5%) i/s (140.19 ns/i) - 35.915M in 5.068201s json 4.615M (± 7.0%) i/s (216.70 ns/i) - 22.915M in 5.003718s oj 6.912M (± 6.4%) i/s (144.68 ns/i) - 34.692M in 5.047690s Comparison: json (reuse): 7133123.3 i/s oj: 6911977.1 i/s - same-ish: difference falls within error json: 4614696.6 i/s - 1.55x slower ``` After: ``` == Encoding small mixed (34 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json (reuse) 572.751k i/100ms json 457.741k i/100ms oj 512.247k i/100ms Calculating ------------------------------------- json (reuse) 6.324M (± 6.9%) i/s (158.12 ns/i) - 31.501M in 5.023093s json 6.263M (± 6.9%) i/s (159.66 ns/i) - 31.126M in 5.017086s oj 5.569M (± 6.6%) i/s (179.56 ns/i) - 27.661M in 5.003739s Comparison: json (reuse): 6324183.5 i/s json: 6263204.9 i/s - same-ish: difference falls within error oj: 5569049.2 i/s - same-ish: difference falls within error == Encoding small nested array (121 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json (reuse) 258.505k i/100ms json 242.335k i/100ms oj 220.678k i/100ms Calculating ------------------------------------- json (reuse) 2.589M (± 9.6%) i/s (386.17 ns/i) - 12.925M in 5.071853s json 2.594M (± 6.6%) i/s (385.46 ns/i) - 13.086M in 5.083035s oj 2.250M (± 2.3%) i/s (444.43 ns/i) - 11.255M in 5.004707s Comparison: json (reuse): 2589499.6 i/s json: 2594321.0 i/s - same-ish: difference falls within error oj: 2250064.0 i/s - 1.15x slower == Encoding small hash (65 bytes) ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json (reuse) 656.373k i/100ms json 644.135k i/100ms oj 650.283k i/100ms Calculating ------------------------------------- json (reuse) 7.202M (± 7.1%) i/s (138.84 ns/i) - 36.101M in 5.051438s json 7.278M (± 1.7%) i/s (137.40 ns/i) - 36.716M in 5.046300s oj 7.036M (± 1.7%) i/s (142.12 ns/i) - 35.766M in 5.084729s Comparison: json (reuse): 7202447.9 i/s json: 7277883.0 i/s - same-ish: difference falls within error oj: 7036115.2 i/s - same-ish: difference falls within error ```
Diffstat (limited to 'ext/json/generator/generator.c')
-rw-r--r--ext/json/generator/generator.c173
1 files changed, 122 insertions, 51 deletions
diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c
index aa9f1c9dec..8118e1392b 100644
--- a/ext/json/generator/generator.c
+++ b/ext/json/generator/generator.c
@@ -11,6 +11,41 @@ static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_e
static ID sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict;
+
+#define GET_STATE_TO(self, state) \
+ TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state)
+
+#define GET_STATE(self) \
+ JSON_Generator_State *state; \
+ GET_STATE_TO(self, state)
+
+struct generate_json_data;
+
+typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+
+struct generate_json_data {
+ FBuffer *buffer;
+ VALUE vstate;
+ JSON_Generator_State *state;
+ VALUE obj;
+ generator_func func;
+};
+
+static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func);
+static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+#ifdef RUBY_INTEGER_UNIFICATION
+static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+#endif
+static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj);
+
/* Converts in_string to a JSON string (without the wrapping '"'
* characters) in FBuffer out_buffer.
*
@@ -629,19 +664,39 @@ static const rb_data_type_t JSON_Generator_State_type = {
RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
};
+static void state_init(JSON_Generator_State *state)
+{
+ state->max_nesting = 100;
+ state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
+}
+
static VALUE cState_s_allocate(VALUE klass)
{
JSON_Generator_State *state;
VALUE obj = TypedData_Make_Struct(klass, JSON_Generator_State, &JSON_Generator_State_type, state);
- state->max_nesting = 100;
- state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT;
+ state_init(state);
return obj;
}
+static void vstate_spill(struct generate_json_data *data)
+{
+ VALUE vstate = cState_s_allocate(cState);
+ GET_STATE(vstate);
+ MEMCPY(state, data->state, JSON_Generator_State, 1);
+ data->state = state;
+ data->vstate = vstate;
+}
+
+static inline VALUE vstate_get(struct generate_json_data *data)
+{
+ if (RB_UNLIKELY(!data->vstate)) {
+ vstate_spill(data);
+ }
+ return data->vstate;
+}
+
struct hash_foreach_arg {
- FBuffer *buffer;
- JSON_Generator_State *state;
- VALUE Vstate;
+ struct generate_json_data *data;
int iter;
};
@@ -649,9 +704,10 @@ static int
json_object_i(VALUE key, VALUE val, VALUE _arg)
{
struct hash_foreach_arg *arg = (struct hash_foreach_arg *)_arg;
- FBuffer *buffer = arg->buffer;
- JSON_Generator_State *state = arg->state;
- VALUE Vstate = arg->Vstate;
+ struct generate_json_data *data = arg->data;
+
+ FBuffer *buffer = data->buffer;
+ JSON_Generator_State *state = data->state;
long depth = state->depth;
int j;
@@ -679,22 +735,21 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
break;
}
- generate_json_string(buffer, Vstate, state, key_to_s);
+ generate_json_string(buffer, data, state, key_to_s);
if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, state->space_before);
fbuffer_append_char(buffer, ':');
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, state->space);
- generate_json(buffer, Vstate, state, val);
+ generate_json(buffer, data, state, val);
arg->iter++;
return ST_CONTINUE;
}
-static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
{
long max_nesting = state->max_nesting;
long depth = ++state->depth;
int j;
- struct hash_foreach_arg arg;
if (max_nesting != 0 && depth > max_nesting) {
rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth);
@@ -708,10 +763,10 @@ static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
fbuffer_append_char(buffer, '{');
- arg.buffer = buffer;
- arg.state = state;
- arg.Vstate = Vstate;
- arg.iter = 0;
+ struct hash_foreach_arg arg = {
+ .data = data,
+ .iter = 0,
+ };
rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
depth = --state->depth;
@@ -726,7 +781,7 @@ static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
fbuffer_append_char(buffer, '}');
}
-static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
{
long max_nesting = state->max_nesting;
long depth = ++state->depth;
@@ -753,7 +808,7 @@ static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_St
fbuffer_append_str(buffer, state->indent);
}
}
- generate_json(buffer, Vstate, state, RARRAY_AREF(obj, i));
+ generate_json(buffer, data, state, RARRAY_AREF(obj, i));
}
state->depth = --depth;
if (RB_UNLIKELY(state->array_nl)) {
@@ -799,7 +854,7 @@ static inline VALUE ensure_valid_encoding(VALUE str)
return str;
}
-static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
{
obj = ensure_valid_encoding(obj);
@@ -823,43 +878,43 @@ static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_S
fbuffer_append_char(buffer, '"');
}
-static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
{
fbuffer_append(buffer, "null", 4);
}
-static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
{
fbuffer_append(buffer, "false", 5);
}
-static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
{
fbuffer_append(buffer, "true", 4);
}
-static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
{
fbuffer_append_long(buffer, FIX2LONG(obj));
}
-static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
{
VALUE tmp = rb_funcall(obj, i_to_s, 0);
fbuffer_append_str(buffer, tmp);
}
#ifdef RUBY_INTEGER_UNIFICATION
-static void generate_json_integer(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
{
if (FIXNUM_P(obj))
- generate_json_fixnum(buffer, Vstate, state, obj);
+ generate_json_fixnum(buffer, data, state, obj);
else
- generate_json_bignum(buffer, Vstate, state, obj);
+ generate_json_bignum(buffer, data, state, obj);
}
#endif
-static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
{
double value = RFLOAT_VALUE(obj);
char allow_nan = state->allow_nan;
@@ -874,20 +929,20 @@ static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_St
fbuffer_append_str(buffer, tmp);
}
-static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)
+static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj)
{
VALUE tmp;
if (obj == Qnil) {
- generate_json_null(buffer, Vstate, state, obj);
+ generate_json_null(buffer, data, state, obj);
} else if (obj == Qfalse) {
- generate_json_false(buffer, Vstate, state, obj);
+ generate_json_false(buffer, data, state, obj);
} else if (obj == Qtrue) {
- generate_json_true(buffer, Vstate, state, obj);
+ generate_json_true(buffer, data, state, obj);
} else if (RB_SPECIAL_CONST_P(obj)) {
if (RB_FIXNUM_P(obj)) {
- generate_json_fixnum(buffer, Vstate, state, obj);
+ generate_json_fixnum(buffer, data, state, obj);
} else if (RB_FLONUM_P(obj)) {
- generate_json_float(buffer, Vstate, state, obj);
+ generate_json_float(buffer, data, state, obj);
} else {
goto general;
}
@@ -895,54 +950,46 @@ static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *s
VALUE klass = RBASIC_CLASS(obj);
switch (RB_BUILTIN_TYPE(obj)) {
case T_BIGNUM:
- generate_json_bignum(buffer, Vstate, state, obj);
+ generate_json_bignum(buffer, data, state, obj);
break;
case T_HASH:
if (klass != rb_cHash) goto general;
- generate_json_object(buffer, Vstate, state, obj);
+ generate_json_object(buffer, data, state, obj);
break;
case T_ARRAY:
if (klass != rb_cArray) goto general;
- generate_json_array(buffer, Vstate, state, obj);
+ generate_json_array(buffer, data, state, obj);
break;
case T_STRING:
if (klass != rb_cString) goto general;
- generate_json_string(buffer, Vstate, state, obj);
+ generate_json_string(buffer, data, state, obj);
break;
case T_FLOAT:
if (klass != rb_cFloat) goto general;
- generate_json_float(buffer, Vstate, state, obj);
+ generate_json_float(buffer, data, state, obj);
break;
default:
general:
if (state->strict) {
rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj));
} else if (rb_respond_to(obj, i_to_json)) {
- tmp = rb_funcall(obj, i_to_json, 1, Vstate);
+ tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data));
Check_Type(tmp, T_STRING);
fbuffer_append_str(buffer, tmp);
} else {
tmp = rb_funcall(obj, i_to_s, 0);
Check_Type(tmp, T_STRING);
- generate_json_string(buffer, Vstate, state, tmp);
+ generate_json_string(buffer, data, state, tmp);
}
}
}
}
-struct generate_json_data {
- FBuffer *buffer;
- VALUE vstate;
- JSON_Generator_State *state;
- VALUE obj;
- void (*func)(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj);
-};
-
static VALUE generate_json_try(VALUE d)
{
struct generate_json_data *data = (struct generate_json_data *)d;
- data->func(data->buffer, data->vstate, data->state, data->obj);
+ data->func(data->buffer, data, data->state, data->obj);
return Qnil;
}
@@ -957,7 +1004,7 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc)
return Qundef;
}
-static VALUE cState_partial_generate(VALUE self, VALUE obj, void (*func)(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj))
+static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func)
{
GET_STATE(self);
@@ -1414,6 +1461,28 @@ static VALUE cState_configure(VALUE self, VALUE opts)
return self;
}
+static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts)
+{
+ JSON_Generator_State state = {0};
+ state_init(&state);
+ configure_state(&state, opts);
+
+ char stack_buffer[FBUFFER_STACK_SIZE];
+ FBuffer buffer = {0};
+ fbuffer_stack_init(&buffer, state.buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE);
+
+ struct generate_json_data data = {
+ .buffer = &buffer,
+ .vstate = Qfalse,
+ .state = &state,
+ .obj = obj,
+ .func = generate_json,
+ };
+ rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data);
+
+ return fbuffer_to_s(&buffer);
+}
+
/*
*
*/
@@ -1475,6 +1544,8 @@ void Init_generator(void)
rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1);
rb_define_method(cState, "generate", cState_generate, 1);
+ rb_define_singleton_method(cState, "generate", cState_m_generate, 2);
+
VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
VALUE mObject = rb_define_module_under(mGeneratorMethods, "Object");