summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean Boussier <[email protected]>2024-10-29 12:13:44 +0100
committerHiroshi SHIBATA <[email protected]>2024-11-01 13:04:24 +0900
commit4a5e44953a4f42eb7a698c1941cf659119de5ea2 (patch)
treeb5b5c89ff2dd92c7e34c91d3e43f1582913ad513
parent59eebeca02325861dd16452c9b85f4920bccd84f (diff)
[ruby/json] Make fbuffer_inc_capa easier to inline
With the extra logic added for stack allocation, and especially the memcpy, it became harder for compilers to inline. This doesn't fully reclaim the speed lost with the stack allocation, but it's getting closer. Before: ``` == Encoding twitter.json (466906 bytes) ruby 3.3.4 (2024-07-09 revision https://github.com/ruby/json/commit/be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 160.000 i/100ms oj 225.000 i/100ms Calculating ------------------------------------- json 1.577k (± 2.0%) i/s (634.20 μs/i) - 8.000k in 5.075561s oj 2.264k (± 2.3%) i/s (441.79 μs/i) - 11.475k in 5.072205s Comparison: json: 1576.8 i/s oj: 2263.5 i/s - 1.44x faster == Encoding citm_catalog.json (500298 bytes) ruby 3.3.4 (2024-07-09 revision https://github.com/ruby/json/commit/be1089c8ec) +YJIT [arm64-darwin23] Warming up -------------------------------------- json 101.000 i/100ms oj 123.000 i/100ms Calculating ------------------------------------- json 1.033k (± 2.6%) i/s (968.06 μs/i) - 5.252k in 5.087617s oj 1.257k (± 2.2%) i/s (795.54 μs/i) - 6.396k in 5.090830s Comparison: json: 1033.0 i/s oj: 1257.0 i/s - 1.22x faster ``` After: ``` == Encoding twitter.json (466906 bytes) ruby 3.3.4 (2024-07-09 revision https://github.com/ruby/json/commit/be1089c8ec) [arm64-darwin23] Warming up -------------------------------------- json 213.000 i/100ms oj 230.000 i/100ms Calculating ------------------------------------- json 2.064k (± 3.6%) i/s (484.44 μs/i) - 10.437k in 5.063685s oj 2.246k (± 0.7%) i/s (445.19 μs/i) - 11.270k in 5.017541s Comparison: json: 2064.2 i/s oj: 2246.2 i/s - 1.09x faster == Encoding citm_catalog.json (500298 bytes) ruby 3.3.4 (2024-07-09 revision https://github.com/ruby/json/commit/be1089c8ec) [arm64-darwin23] Warming up -------------------------------------- json 133.000 i/100ms oj 132.000 i/100ms Calculating ------------------------------------- json 1.327k (± 1.7%) i/s (753.69 μs/i) - 6.650k in 5.013565s oj 1.305k (± 2.2%) i/s (766.40 μs/i) - 6.600k in 5.061089s Comparison: json: 1326.8 i/s oj: 1304.8 i/s - same-ish: difference falls within error ``` https://github.com/ruby/json/commit/89f816e868
-rw-r--r--ext/json/fbuffer/fbuffer.h47
1 files changed, 26 insertions, 21 deletions
diff --git a/ext/json/fbuffer/fbuffer.h b/ext/json/fbuffer/fbuffer.h
index 55fc0bba9d..9bbfeed3cb 100644
--- a/ext/json/fbuffer/fbuffer.h
+++ b/ext/json/fbuffer/fbuffer.h
@@ -33,7 +33,7 @@ static void fbuffer_append(FBuffer *fb, const char *newstr, unsigned long len);
#ifdef JSON_GENERATOR
static void fbuffer_append_long(FBuffer *fb, long number);
#endif
-static void fbuffer_append_char(FBuffer *fb, char newchr);
+static inline void fbuffer_append_char(FBuffer *fb, char newchr);
#ifdef JSON_GENERATOR
static VALUE fbuffer_to_s(FBuffer *fb);
#endif
@@ -66,29 +66,34 @@ static void fbuffer_clear(FBuffer *fb)
}
#endif
-static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested)
+static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested)
{
- if (RB_UNLIKELY(requested > fb->capa - fb->len)) {
- unsigned long required;
+ unsigned long required;
- if (RB_UNLIKELY(!fb->ptr)) {
- fb->ptr = ALLOC_N(char, fb->initial_length);
- fb->capa = fb->initial_length;
- }
+ if (RB_UNLIKELY(!fb->ptr)) {
+ fb->ptr = ALLOC_N(char, fb->initial_length);
+ fb->capa = fb->initial_length;
+ }
+
+ for (required = fb->capa; requested > required - fb->len; required <<= 1);
- for (required = fb->capa; requested > required - fb->len; required <<= 1);
-
- if (required > fb->capa) {
- if (fb->type == STACK) {
- const char *old_buffer = fb->ptr;
- fb->ptr = ALLOC_N(char, required);
- fb->type = HEAP;
- MEMCPY(fb->ptr, old_buffer, char, fb->len);
- } else {
- REALLOC_N(fb->ptr, char, required);
- }
- fb->capa = required;
+ if (required > fb->capa) {
+ if (fb->type == STACK) {
+ const char *old_buffer = fb->ptr;
+ fb->ptr = ALLOC_N(char, required);
+ fb->type = HEAP;
+ MEMCPY(fb->ptr, old_buffer, char, fb->len);
+ } else {
+ REALLOC_N(fb->ptr, char, required);
}
+ fb->capa = required;
+ }
+}
+
+static inline void fbuffer_inc_capa(FBuffer *fb, unsigned long requested)
+{
+ if (RB_UNLIKELY(requested > fb->capa - fb->len)) {
+ fbuffer_do_inc_capa(fb, requested);
}
}
@@ -113,7 +118,7 @@ static void fbuffer_append_str(FBuffer *fb, VALUE str)
}
#endif
-static void fbuffer_append_char(FBuffer *fb, char newchr)
+static inline void fbuffer_append_char(FBuffer *fb, char newchr)
{
fbuffer_inc_capa(fb, 1);
*(fb->ptr + fb->len) = newchr;