diff options
author | Jean Boussier <[email protected]> | 2024-10-29 12:13:44 +0100 |
---|---|---|
committer | Hiroshi SHIBATA <[email protected]> | 2024-11-01 13:04:24 +0900 |
commit | 4a5e44953a4f42eb7a698c1941cf659119de5ea2 (patch) | |
tree | b5b5c89ff2dd92c7e34c91d3e43f1582913ad513 | |
parent | 59eebeca02325861dd16452c9b85f4920bccd84f (diff) |
[ruby/json] Make fbuffer_inc_capa easier to inline
With the extra logic added for stack allocation, and especially the
memcpy, it became harder for compilers to inline.
This doesn't fully reclaim the speed lost with the stack allocation,
but it's getting closer.
Before:
```
== Encoding twitter.json (466906 bytes)
ruby 3.3.4 (2024-07-09 revision https://github.com/ruby/json/commit/be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
json 160.000 i/100ms
oj 225.000 i/100ms
Calculating -------------------------------------
json 1.577k (± 2.0%) i/s (634.20 μs/i) - 8.000k in 5.075561s
oj 2.264k (± 2.3%) i/s (441.79 μs/i) - 11.475k in 5.072205s
Comparison:
json: 1576.8 i/s
oj: 2263.5 i/s - 1.44x faster
== Encoding citm_catalog.json (500298 bytes)
ruby 3.3.4 (2024-07-09 revision https://github.com/ruby/json/commit/be1089c8ec) +YJIT [arm64-darwin23]
Warming up --------------------------------------
json 101.000 i/100ms
oj 123.000 i/100ms
Calculating -------------------------------------
json 1.033k (± 2.6%) i/s (968.06 μs/i) - 5.252k in 5.087617s
oj 1.257k (± 2.2%) i/s (795.54 μs/i) - 6.396k in 5.090830s
Comparison:
json: 1033.0 i/s
oj: 1257.0 i/s - 1.22x faster
```
After:
```
== Encoding twitter.json (466906 bytes)
ruby 3.3.4 (2024-07-09 revision https://github.com/ruby/json/commit/be1089c8ec) [arm64-darwin23]
Warming up --------------------------------------
json 213.000 i/100ms
oj 230.000 i/100ms
Calculating -------------------------------------
json 2.064k (± 3.6%) i/s (484.44 μs/i) - 10.437k in 5.063685s
oj 2.246k (± 0.7%) i/s (445.19 μs/i) - 11.270k in 5.017541s
Comparison:
json: 2064.2 i/s
oj: 2246.2 i/s - 1.09x faster
== Encoding citm_catalog.json (500298 bytes)
ruby 3.3.4 (2024-07-09 revision https://github.com/ruby/json/commit/be1089c8ec) [arm64-darwin23]
Warming up --------------------------------------
json 133.000 i/100ms
oj 132.000 i/100ms
Calculating -------------------------------------
json 1.327k (± 1.7%) i/s (753.69 μs/i) - 6.650k in 5.013565s
oj 1.305k (± 2.2%) i/s (766.40 μs/i) - 6.600k in 5.061089s
Comparison:
json: 1326.8 i/s
oj: 1304.8 i/s - same-ish: difference falls within error
```
https://github.com/ruby/json/commit/89f816e868
-rw-r--r-- | ext/json/fbuffer/fbuffer.h | 47 |
1 files changed, 26 insertions, 21 deletions
diff --git a/ext/json/fbuffer/fbuffer.h b/ext/json/fbuffer/fbuffer.h index 55fc0bba9d..9bbfeed3cb 100644 --- a/ext/json/fbuffer/fbuffer.h +++ b/ext/json/fbuffer/fbuffer.h @@ -33,7 +33,7 @@ static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len); #ifdef JSON_GENERATOR static void fbuffer_append_long(FBuffer *fb, long number); #endif -static void fbuffer_append_char(FBuffer *fb, char newchr); +static inline void fbuffer_append_char(FBuffer *fb, char newchr); #ifdef JSON_GENERATOR static VALUE fbuffer_to_s(FBuffer *fb); #endif @@ -66,29 +66,34 @@ static void fbuffer_clear(FBuffer *fb) } #endif -static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested) +static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) { - if (RB_UNLIKELY(requested > fb->capa - fb->len)) { - unsigned long required; + unsigned long required; - if (RB_UNLIKELY(!fb->ptr)) { - fb->ptr = ALLOC_N(char, fb->initial_length); - fb->capa = fb->initial_length; - } + if (RB_UNLIKELY(!fb->ptr)) { + fb->ptr = ALLOC_N(char, fb->initial_length); + fb->capa = fb->initial_length; + } + + for (required = fb->capa; requested > required - fb->len; required <<= 1); - for (required = fb->capa; requested > required - fb->len; required <<= 1); - - if (required > fb->capa) { - if (fb->type == STACK) { - const char *old_buffer = fb->ptr; - fb->ptr = ALLOC_N(char, required); - fb->type = HEAP; - MEMCPY(fb->ptr, old_buffer, char, fb->len); - } else { - REALLOC_N(fb->ptr, char, required); - } - fb->capa = required; + if (required > fb->capa) { + if (fb->type == STACK) { + const char *old_buffer = fb->ptr; + fb->ptr = ALLOC_N(char, required); + fb->type = HEAP; + MEMCPY(fb->ptr, old_buffer, char, fb->len); + } else { + REALLOC_N(fb->ptr, char, required); } + fb->capa = required; + } +} + +static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested) +{ + if (RB_UNLIKELY(requested > fb->capa - fb->len)) { + fbuffer_do_inc_capa(fb, requested); } } @@ -113,7 +118,7 @@ static void fbuffer_append_str(FBuffer *fb, VALUE str) } #endif -static void fbuffer_append_char(FBuffer *fb, char newchr) +static inline void fbuffer_append_char(FBuffer *fb, char newchr) { fbuffer_inc_capa(fb, 1); *(fb->ptr + fb->len) = newchr; |