summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAaron Patterson <[email protected]>2024-02-13 08:20:27 -0800
committerGitHub <[email protected]>2024-02-13 16:20:27 +0000
commitc35fea8509551aefe257986c937ea7147f436bdf (patch)
tree1c7cfb948db049aedffa0305e25e14d56d21f314
parenta71d1ed83875600c174e66a8ace18e0ad451958a (diff)
Specialize String#byteslice(a, b) (#9939)
* Specialize String#byteslice(a, b) This adds a specialization for String#byteslice when there are two parameters. This makes our protobuf parser go from 5.84x slower to 5.33x slower ``` Comparison: decode upstream (53738 bytes): 7228.5 i/s decode protobuff (53738 bytes): 1236.8 i/s - 5.84x slower Comparison: decode upstream (53738 bytes): 7024.8 i/s decode protobuff (53738 bytes): 1318.5 i/s - 5.33x slower ``` * Update yjit/src/codegen.rs --------- Co-authored-by: Maxime Chevalier-Boisvert <[email protected]>
-rw-r--r--internal/string.h1
-rw-r--r--string.c6
-rw-r--r--yjit/bindgen/src/main.rs1
-rw-r--r--yjit/src/codegen.rs32
-rw-r--r--yjit/src/cruby_bindings.inc.rs1
5 files changed, 41 insertions, 0 deletions
diff --git a/internal/string.h b/internal/string.h
index ba2af25877..cde81a1a25 100644
--- a/internal/string.h
+++ b/internal/string.h
@@ -45,6 +45,7 @@ void rb_str_make_independent(VALUE str);
int rb_enc_str_coderange_scan(VALUE str, rb_encoding *enc);
int rb_ascii8bit_appendable_encoding_index(rb_encoding *enc, unsigned int code);
VALUE rb_str_include(VALUE str, VALUE arg);
+VALUE rb_str_byte_substr(VALUE str, VALUE beg, VALUE len);
static inline bool STR_EMBED_P(VALUE str);
static inline bool STR_SHARED_P(VALUE str);
diff --git a/string.c b/string.c
index 36e09b589b..83be7d166b 100644
--- a/string.c
+++ b/string.c
@@ -6274,6 +6274,12 @@ str_byte_substr(VALUE str, long beg, long len, int empty)
return str2;
}
+VALUE
+rb_str_byte_substr(VALUE str, VALUE beg, VALUE len)
+{
+ return str_byte_substr(str, NUM2LONG(beg), NUM2LONG(len), TRUE);
+}
+
static VALUE
str_byte_aref(VALUE str, VALUE indx)
{
diff --git a/yjit/bindgen/src/main.rs b/yjit/bindgen/src/main.rs
index 92c91c4ec6..0824e80cd0 100644
--- a/yjit/bindgen/src/main.rs
+++ b/yjit/bindgen/src/main.rs
@@ -224,6 +224,7 @@ fn main() {
.allowlist_function("rb_ec_str_resurrect")
.allowlist_function("rb_str_concat_literals")
.allowlist_function("rb_obj_as_string_result")
+ .allowlist_function("rb_str_byte_substr")
// From include/ruby/internal/intern/parse.h
.allowlist_function("rb_backref_get")
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index 9b84a41104..327e4fc05c 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -5304,6 +5304,37 @@ fn jit_rb_str_bytesize(
true
}
+fn jit_rb_str_byteslice(
+ jit: &mut JITState,
+ asm: &mut Assembler,
+ _ocb: &mut OutlinedCb,
+ _ci: *const rb_callinfo,
+ _cme: *const rb_callable_method_entry_t,
+ _block: Option<BlockHandler>,
+ argc: i32,
+ _known_recv_class: Option<VALUE>,
+) -> bool {
+ asm_comment!(asm, "String#byteslice");
+
+ if argc != 2 {
+ return false
+ }
+
+ // Raises when non-integers are passed in
+ jit_prepare_non_leaf_call(jit, asm);
+
+ let len = asm.stack_opnd(0);
+ let beg = asm.stack_opnd(1);
+ let recv = asm.stack_opnd(2);
+ let ret_opnd = asm.ccall(rb_str_byte_substr as *const u8, vec![recv, beg, len]);
+ asm.stack_pop(3);
+
+ let out_opnd = asm.stack_push(Type::TString);
+ asm.mov(out_opnd, ret_opnd);
+
+ true
+}
+
fn jit_rb_str_getbyte(
jit: &mut JITState,
asm: &mut Assembler,
@@ -9490,6 +9521,7 @@ pub fn yjit_reg_method_codegen_fns() {
yjit_reg_method(rb_cString, "size", jit_rb_str_length);
yjit_reg_method(rb_cString, "bytesize", jit_rb_str_bytesize);
yjit_reg_method(rb_cString, "getbyte", jit_rb_str_getbyte);
+ yjit_reg_method(rb_cString, "byteslice", jit_rb_str_byteslice);
yjit_reg_method(rb_cString, "<<", jit_rb_str_concat);
yjit_reg_method(rb_cString, "+@", jit_rb_str_uplus);
diff --git a/yjit/src/cruby_bindings.inc.rs b/yjit/src/cruby_bindings.inc.rs
index f0644edaf3..54cd51b61e 100644
--- a/yjit/src/cruby_bindings.inc.rs
+++ b/yjit/src/cruby_bindings.inc.rs
@@ -1049,6 +1049,7 @@ extern "C" {
pub fn rb_gvar_set(arg1: ID, arg2: VALUE) -> VALUE;
pub fn rb_ensure_iv_list_size(obj: VALUE, len: u32, newsize: u32);
pub fn rb_vm_barrier();
+ pub fn rb_str_byte_substr(str_: VALUE, beg: VALUE, len: VALUE) -> VALUE;
pub fn rb_obj_as_string_result(str_: VALUE, obj: VALUE) -> VALUE;
pub fn rb_str_concat_literals(num: usize, strary: *const VALUE) -> VALUE;
pub fn rb_ec_str_resurrect(ec: *mut rb_execution_context_struct, str_: VALUE) -> VALUE;