diff options
author | Jean Boussier <[email protected]> | 2024-10-29 18:14:12 +0100 |
---|---|---|
committer | Hiroshi SHIBATA <[email protected]> | 2024-11-01 13:04:24 +0900 |
commit | cc2e67a138d258290f727f5797bdc14fbc5a6e52 (patch) | |
tree | 2c85e628286a5e117705a98c1e100aa2430a2289 /ext/json/generator/generator.c | |
parent | 88b411464d85b735c833ea0029c37411deb2480f (diff) |
Elide Generator::State allocation until a `to_json` method has to be called
Fix: https://github.com/ruby/json/issues/655
For very small documents, the biggest performance gap with alternatives is
that the API impose that we allocate the `State` object. In a real world app
this doesn't make much of a difference, but when running in a micro-benchmark
this doubles the allocations, causing twice the amount of GC runs, making us
look bad.
However, unless we have to call a `to_json` method, the `State` object isn't
visible, so with some refactoring, we can elude that allocation entirely.
Instead we allocate the State internal struct on the stack, and if we need
to call a `to_json` method, we allocate the `State` and spill the struct on
the heap.
As a result, `JSON.generate` is now as fast as re-using a `State` instance,
as long as only primitives are generated.
Before:
```
== Encoding small mixed (34 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
json (reuse) 598.654k i/100ms
json 400.542k i/100ms
oj 533.353k i/100ms
Calculating -------------------------------------
json (reuse) 6.371M (± 8.6%) i/s (156.96 ns/i) - 31.729M in 5.059195s
json 4.120M (± 6.6%) i/s (242.72 ns/i) - 20.828M in 5.090549s
oj 5.622M (± 6.4%) i/s (177.86 ns/i) - 28.268M in 5.061473s
Comparison:
json (reuse): 6371126.6 i/s
oj: 5622452.0 i/s - same-ish: difference falls within error
json: 4119991.1 i/s - 1.55x slower
== Encoding small nested array (121 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
json (reuse) 248.125k i/100ms
json 215.255k i/100ms
oj 217.531k i/100ms
Calculating -------------------------------------
json (reuse) 2.628M (± 6.1%) i/s (380.55 ns/i) - 13.151M in 5.030281s
json 2.185M (± 6.7%) i/s (457.74 ns/i) - 10.978M in 5.057655s
oj 2.217M (± 6.7%) i/s (451.10 ns/i) - 11.094M in 5.044844s
Comparison:
json (reuse): 2627799.4 i/s
oj: 2216824.8 i/s - 1.19x slower
json: 2184669.5 i/s - 1.20x slower
== Encoding small hash (65 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
json (reuse) 641.334k i/100ms
json 322.745k i/100ms
oj 642.450k i/100ms
Calculating -------------------------------------
json (reuse) 7.133M (± 6.5%) i/s (140.19 ns/i) - 35.915M in 5.068201s
json 4.615M (± 7.0%) i/s (216.70 ns/i) - 22.915M in 5.003718s
oj 6.912M (± 6.4%) i/s (144.68 ns/i) - 34.692M in 5.047690s
Comparison:
json (reuse): 7133123.3 i/s
oj: 6911977.1 i/s - same-ish: difference falls within error
json: 4614696.6 i/s - 1.55x slower
```
After:
```
== Encoding small mixed (34 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
json (reuse) 572.751k i/100ms
json 457.741k i/100ms
oj 512.247k i/100ms
Calculating -------------------------------------
json (reuse) 6.324M (± 6.9%) i/s (158.12 ns/i) - 31.501M in 5.023093s
json 6.263M (± 6.9%) i/s (159.66 ns/i) - 31.126M in 5.017086s
oj 5.569M (± 6.6%) i/s (179.56 ns/i) - 27.661M in 5.003739s
Comparison:
json (reuse): 6324183.5 i/s
json: 6263204.9 i/s - same-ish: difference falls within error
oj: 5569049.2 i/s - same-ish: difference falls within error
== Encoding small nested array (121 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
json (reuse) 258.505k i/100ms
json 242.335k i/100ms
oj 220.678k i/100ms
Calculating -------------------------------------
json (reuse) 2.589M (± 9.6%) i/s (386.17 ns/i) - 12.925M in 5.071853s
json 2.594M (± 6.6%) i/s (385.46 ns/i) - 13.086M in 5.083035s
oj 2.250M (± 2.3%) i/s (444.43 ns/i) - 11.255M in 5.004707s
Comparison:
json (reuse): 2589499.6 i/s
json: 2594321.0 i/s - same-ish: difference falls within error
oj: 2250064.0 i/s - 1.15x slower
== Encoding small hash (65 bytes)
ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
json (reuse) 656.373k i/100ms
json 644.135k i/100ms
oj 650.283k i/100ms
Calculating -------------------------------------
json (reuse) 7.202M (± 7.1%) i/s (138.84 ns/i) - 36.101M in 5.051438s
json 7.278M (± 1.7%) i/s (137.40 ns/i) - 36.716M in 5.046300s
oj 7.036M (± 1.7%) i/s (142.12 ns/i) - 35.766M in 5.084729s
Comparison:
json (reuse): 7202447.9 i/s
json: 7277883.0 i/s - same-ish: difference falls within error
oj: 7036115.2 i/s - same-ish: difference falls within error
```
Diffstat (limited to 'ext/json/generator/generator.c')
-rw-r--r-- | ext/json/generator/generator.c | 173 |
1 files changed, 122 insertions, 51 deletions
diff --git a/ext/json/generator/generator.c b/ext/json/generator/generator.c index aa9f1c9dec..8118e1392b 100644 --- a/ext/json/generator/generator.c +++ b/ext/json/generator/generator.c @@ -11,6 +11,41 @@ static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_e static ID sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict; + +#define GET_STATE_TO(self, state) \ + TypedData_Get_Struct(self, JSON_Generator_State, &JSON_Generator_State_type, state) + +#define GET_STATE(self) \ + JSON_Generator_State *state; \ + GET_STATE_TO(self, state) + +struct generate_json_data; + +typedef void (*generator_func)(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); + +struct generate_json_data { + FBuffer *buffer; + VALUE vstate; + JSON_Generator_State *state; + VALUE obj; + generator_func func; +}; + +static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func); +static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +#ifdef RUBY_INTEGER_UNIFICATION +static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +#endif +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); + /* Converts in_string to a JSON string (without the wrapping '"' * characters) in FBuffer out_buffer. * @@ -629,19 +664,39 @@ static const rb_data_type_t JSON_Generator_State_type = { RUBY_TYPED_WB_PROTECTED | RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE, }; +static void state_init(JSON_Generator_State *state) +{ + state->max_nesting = 100; + state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; +} + static VALUE cState_s_allocate(VALUE klass) { JSON_Generator_State *state; VALUE obj = TypedData_Make_Struct(klass, JSON_Generator_State, &JSON_Generator_State_type, state); - state->max_nesting = 100; - state->buffer_initial_length = FBUFFER_INITIAL_LENGTH_DEFAULT; + state_init(state); return obj; } +static void vstate_spill(struct generate_json_data *data) +{ + VALUE vstate = cState_s_allocate(cState); + GET_STATE(vstate); + MEMCPY(state, data->state, JSON_Generator_State, 1); + data->state = state; + data->vstate = vstate; +} + +static inline VALUE vstate_get(struct generate_json_data *data) +{ + if (RB_UNLIKELY(!data->vstate)) { + vstate_spill(data); + } + return data->vstate; +} + struct hash_foreach_arg { - FBuffer *buffer; - JSON_Generator_State *state; - VALUE Vstate; + struct generate_json_data *data; int iter; }; @@ -649,9 +704,10 @@ static int json_object_i(VALUE key, VALUE val, VALUE _arg) { struct hash_foreach_arg *arg = (struct hash_foreach_arg *)_arg; - FBuffer *buffer = arg->buffer; - JSON_Generator_State *state = arg->state; - VALUE Vstate = arg->Vstate; + struct generate_json_data *data = arg->data; + + FBuffer *buffer = data->buffer; + JSON_Generator_State *state = data->state; long depth = state->depth; int j; @@ -679,22 +735,21 @@ json_object_i(VALUE key, VALUE val, VALUE _arg) break; } - generate_json_string(buffer, Vstate, state, key_to_s); + generate_json_string(buffer, data, state, key_to_s); if (RB_UNLIKELY(state->space_before)) fbuffer_append_str(buffer, state->space_before); fbuffer_append_char(buffer, ':'); if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, state->space); - generate_json(buffer, Vstate, state, val); + generate_json(buffer, data, state, val); arg->iter++; return ST_CONTINUE; } -static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { long max_nesting = state->max_nesting; long depth = ++state->depth; int j; - struct hash_foreach_arg arg; if (max_nesting != 0 && depth > max_nesting) { rb_raise(eNestingError, "nesting of %ld is too deep", --state->depth); @@ -708,10 +763,10 @@ static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_S fbuffer_append_char(buffer, '{'); - arg.buffer = buffer; - arg.state = state; - arg.Vstate = Vstate; - arg.iter = 0; + struct hash_foreach_arg arg = { + .data = data, + .iter = 0, + }; rb_hash_foreach(obj, json_object_i, (VALUE)&arg); depth = --state->depth; @@ -726,7 +781,7 @@ static void generate_json_object(FBuffer *buffer, VALUE Vstate, JSON_Generator_S fbuffer_append_char(buffer, '}'); } -static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { long max_nesting = state->max_nesting; long depth = ++state->depth; @@ -753,7 +808,7 @@ static void generate_json_array(FBuffer *buffer, VALUE Vstate, JSON_Generator_St fbuffer_append_str(buffer, state->indent); } } - generate_json(buffer, Vstate, state, RARRAY_AREF(obj, i)); + generate_json(buffer, data, state, RARRAY_AREF(obj, i)); } state->depth = --depth; if (RB_UNLIKELY(state->array_nl)) { @@ -799,7 +854,7 @@ static inline VALUE ensure_valid_encoding(VALUE str) return str; } -static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_string(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { obj = ensure_valid_encoding(obj); @@ -823,43 +878,43 @@ static void generate_json_string(FBuffer *buffer, VALUE Vstate, JSON_Generator_S fbuffer_append_char(buffer, '"'); } -static void generate_json_null(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_null(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { fbuffer_append(buffer, "null", 4); } -static void generate_json_false(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_false(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { fbuffer_append(buffer, "false", 5); } -static void generate_json_true(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_true(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { fbuffer_append(buffer, "true", 4); } -static void generate_json_fixnum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_fixnum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { fbuffer_append_long(buffer, FIX2LONG(obj)); } -static void generate_json_bignum(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_bignum(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { VALUE tmp = rb_funcall(obj, i_to_s, 0); fbuffer_append_str(buffer, tmp); } #ifdef RUBY_INTEGER_UNIFICATION -static void generate_json_integer(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_integer(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { if (FIXNUM_P(obj)) - generate_json_fixnum(buffer, Vstate, state, obj); + generate_json_fixnum(buffer, data, state, obj); else - generate_json_bignum(buffer, Vstate, state, obj); + generate_json_bignum(buffer, data, state, obj); } #endif -static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json_float(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { double value = RFLOAT_VALUE(obj); char allow_nan = state->allow_nan; @@ -874,20 +929,20 @@ static void generate_json_float(FBuffer *buffer, VALUE Vstate, JSON_Generator_St fbuffer_append_str(buffer, tmp); } -static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj) +static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj) { VALUE tmp; if (obj == Qnil) { - generate_json_null(buffer, Vstate, state, obj); + generate_json_null(buffer, data, state, obj); } else if (obj == Qfalse) { - generate_json_false(buffer, Vstate, state, obj); + generate_json_false(buffer, data, state, obj); } else if (obj == Qtrue) { - generate_json_true(buffer, Vstate, state, obj); + generate_json_true(buffer, data, state, obj); } else if (RB_SPECIAL_CONST_P(obj)) { if (RB_FIXNUM_P(obj)) { - generate_json_fixnum(buffer, Vstate, state, obj); + generate_json_fixnum(buffer, data, state, obj); } else if (RB_FLONUM_P(obj)) { - generate_json_float(buffer, Vstate, state, obj); + generate_json_float(buffer, data, state, obj); } else { goto general; } @@ -895,54 +950,46 @@ static void generate_json(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *s VALUE klass = RBASIC_CLASS(obj); switch (RB_BUILTIN_TYPE(obj)) { case T_BIGNUM: - generate_json_bignum(buffer, Vstate, state, obj); + generate_json_bignum(buffer, data, state, obj); break; case T_HASH: if (klass != rb_cHash) goto general; - generate_json_object(buffer, Vstate, state, obj); + generate_json_object(buffer, data, state, obj); break; case T_ARRAY: if (klass != rb_cArray) goto general; - generate_json_array(buffer, Vstate, state, obj); + generate_json_array(buffer, data, state, obj); break; case T_STRING: if (klass != rb_cString) goto general; - generate_json_string(buffer, Vstate, state, obj); + generate_json_string(buffer, data, state, obj); break; case T_FLOAT: if (klass != rb_cFloat) goto general; - generate_json_float(buffer, Vstate, state, obj); + generate_json_float(buffer, data, state, obj); break; default: general: if (state->strict) { rb_raise(eGeneratorError, "%"PRIsVALUE" not allowed in JSON", CLASS_OF(obj)); } else if (rb_respond_to(obj, i_to_json)) { - tmp = rb_funcall(obj, i_to_json, 1, Vstate); + tmp = rb_funcall(obj, i_to_json, 1, vstate_get(data)); Check_Type(tmp, T_STRING); fbuffer_append_str(buffer, tmp); } else { tmp = rb_funcall(obj, i_to_s, 0); Check_Type(tmp, T_STRING); - generate_json_string(buffer, Vstate, state, tmp); + generate_json_string(buffer, data, state, tmp); } } } } -struct generate_json_data { - FBuffer *buffer; - VALUE vstate; - JSON_Generator_State *state; - VALUE obj; - void (*func)(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj); -}; - static VALUE generate_json_try(VALUE d) { struct generate_json_data *data = (struct generate_json_data *)d; - data->func(data->buffer, data->vstate, data->state, data->obj); + data->func(data->buffer, data, data->state, data->obj); return Qnil; } @@ -957,7 +1004,7 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc) return Qundef; } -static VALUE cState_partial_generate(VALUE self, VALUE obj, void (*func)(FBuffer *buffer, VALUE Vstate, JSON_Generator_State *state, VALUE obj)) +static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func) { GET_STATE(self); @@ -1414,6 +1461,28 @@ static VALUE cState_configure(VALUE self, VALUE opts) return self; } +static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts) +{ + JSON_Generator_State state = {0}; + state_init(&state); + configure_state(&state, opts); + + char stack_buffer[FBUFFER_STACK_SIZE]; + FBuffer buffer = {0}; + fbuffer_stack_init(&buffer, state.buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE); + + struct generate_json_data data = { + .buffer = &buffer, + .vstate = Qfalse, + .state = &state, + .obj = obj, + .func = generate_json, + }; + rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); + + return fbuffer_to_s(&buffer); +} + /* * */ @@ -1475,6 +1544,8 @@ void Init_generator(void) rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1); rb_define_method(cState, "generate", cState_generate, 1); + rb_define_singleton_method(cState, "generate", cState_m_generate, 2); + VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); VALUE mObject = rb_define_module_under(mGeneratorMethods, "Object"); |