summaryrefslogtreecommitdiff
path: root/yjit/src
diff options
context:
space:
mode:
authorMaxime Chevalier-Boisvert <[email protected]>2024-06-07 16:26:14 -0400
committerGitHub <[email protected]>2024-06-07 16:26:14 -0400
commit425e630ce73cf79fa5529df199dde47fc109a5de (patch)
tree87766027af2f64de8a60b8826ecab568a9c0812d /yjit/src
parentfaad2bc6e13dc829f1d29feb5084761fb113fd77 (diff)
YJIT: implement variable-length context encoding scheme (#10888)
* Implement BitVector data structure for variable-length context encoding * Rename method to make intent clearer * Rename write_uint => push_uint to make intent clearer * Implement debug trait for BitVector * Fix bug in BitVector::read_uint_at(), enable more tests * Add one more test for good measure * Start sketching Context::encode() * Progress on variable length context encoding * Add tests. Fix bug. * Encode stack state * Add comments. Try to estimate context encoding size. * More compact encoding for stack size * Commit before rebase * Change Context::encode() to take a BitVector as input * Refactor BitVector::read_uint(), add helper read functions * Implement Context::decode() function. Add test. * Fix bug, add tests * Rename methods * Add Context::encode() and decode() methods using global data * Make encode and decode methods use u32 indices * Refactor YJIT to use variable-length context encoding * Tag functions as allow unused * Add a simple caching mechanism and stats for bytes per context etc * Add comments, fix formatting * Grow vector of bytes by 1.2x instead of 2x * Add debug assert to check round-trip encoding-decoding * Take some rustfmt formatting * Add decoded_from field to Context to reuse previous encodings * Remove olde context stats * Re-add stack_size assert * Disable decoded_from optimization for now
Diffstat (limited to 'yjit/src')
-rw-r--r--yjit/src/codegen.rs11
-rw-r--r--yjit/src/core.rs627
-rw-r--r--yjit/src/stats.rs35
3 files changed, 613 insertions, 60 deletions
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index e509683ae0..755e64c244 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -5789,7 +5789,7 @@ fn jit_rb_str_getbyte(
RUBY_OFFSET_RSTRING_LEN as i32,
);
- // Exit if the indes is out of bounds
+ // Exit if the index is out of bounds
asm.cmp(idx, str_len_opnd);
asm.jge(Target::side_exit(Counter::getbyte_idx_out_of_bounds));
@@ -10333,6 +10333,9 @@ fn yjit_reg_method(klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) {
/// Global state needed for code generation
pub struct CodegenGlobals {
+ /// Flat vector of bits to store compressed context data
+ context_data: BitVector,
+
/// Inline code block (fast path)
inline_cb: CodeBlock,
@@ -10448,6 +10451,7 @@ impl CodegenGlobals {
ocb.unwrap().mark_all_executable();
let codegen_globals = CodegenGlobals {
+ context_data: BitVector::new(),
inline_cb: cb,
outlined_cb: ocb,
leave_exit_code,
@@ -10476,6 +10480,11 @@ impl CodegenGlobals {
unsafe { CODEGEN_GLOBALS.as_mut().is_some() }
}
+ /// Get a mutable reference to the context data
+ pub fn get_context_data() -> &'static mut BitVector {
+ &mut CodegenGlobals::get_instance().context_data
+ }
+
/// Get a mutable reference to the inline code block
pub fn get_inline_cb() -> &'static mut CodeBlock {
&mut CodegenGlobals::get_instance().inline_cb
diff --git a/yjit/src/core.rs b/yjit/src/core.rs
index 7c8532a0c3..2a8dde7087 100644
--- a/yjit/src/core.rs
+++ b/yjit/src/core.rs
@@ -457,8 +457,13 @@ const CHAIN_DEPTH_MASK: u8 = 0b00111111; // 63
/// Contains information we can use to specialize/optimize code
/// There are a lot of context objects so we try to keep the size small.
#[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)]
-#[repr(packed)]
pub struct Context {
+ // FIXME: decoded_from breaks == on contexts
+ /*
+ // Offset at which this context was previously encoded (zero if not)
+ decoded_from: u32,
+ */
+
// Number of values currently on the temporary stack
stack_size: u8,
@@ -498,6 +503,568 @@ pub struct Context {
inline_block: u64,
}
+#[derive(Clone)]
+pub struct BitVector {
+ // Flat vector of bytes to write into
+ bytes: Vec<u8>,
+
+ // Number of bits taken out of bytes allocated
+ num_bits: usize,
+}
+
+impl BitVector {
+ pub fn new() -> Self {
+ Self {
+ bytes: Vec::with_capacity(4096),
+ num_bits: 0,
+ }
+ }
+
+ #[allow(unused)]
+ pub fn num_bits(&self) -> usize {
+ self.num_bits
+ }
+
+ // Total number of bytes taken
+ #[allow(unused)]
+ pub fn num_bytes(&self) -> usize {
+ (self.num_bits / 8) + if (self.num_bits % 8) != 0 { 1 } else { 0 }
+ }
+
+ // Write/append an unsigned integer value
+ fn push_uint(&mut self, mut val: u64, mut num_bits: usize) {
+ assert!(num_bits <= 64);
+
+ // Mask out bits above the number of bits requested
+ let mut val_bits = val;
+ if num_bits < 64 {
+ val_bits &= (1 << num_bits) - 1;
+ assert!(val == val_bits);
+ }
+
+ // Number of bits encoded in the last byte
+ let rem_bits = self.num_bits % 8;
+
+ // Encode as many bits as we can in this last byte
+ if rem_bits != 0 {
+ let num_enc = std::cmp::min(num_bits, 8 - rem_bits);
+ let bit_mask = (1 << num_enc) - 1;
+ let frac_bits = (val & bit_mask) << rem_bits;
+ let frac_bits: u8 = frac_bits.try_into().unwrap();
+ let last_byte_idx = self.bytes.len() - 1;
+ self.bytes[last_byte_idx] |= frac_bits;
+
+ self.num_bits += num_enc;
+ num_bits -= num_enc;
+ val >>= num_enc;
+ }
+
+ // While we have bits left to encode
+ while num_bits > 0 {
+ // Grow with a 1.2x growth factor instead of 2x
+ assert!(self.num_bits % 8 == 0);
+ let num_bytes = self.num_bits / 8;
+ if num_bytes == self.bytes.capacity() {
+ self.bytes.reserve_exact(self.bytes.len() / 5);
+ }
+
+ let bits = val & 0xFF;
+ let bits: u8 = bits.try_into().unwrap();
+ self.bytes.push(bits);
+
+ let bits_to_encode = std::cmp::min(num_bits, 8);
+ self.num_bits += bits_to_encode;
+ num_bits -= bits_to_encode;
+ val >>= bits_to_encode;
+ }
+ }
+
+ fn push_u8(&mut self, val: u8) {
+ self.push_uint(val as u64, 8);
+ }
+
+ fn push_u4(&mut self, val: u8) {
+ assert!(val < 16);
+ self.push_uint(val as u64, 4);
+ }
+
+ fn push_u3(&mut self, val: u8) {
+ assert!(val < 8);
+ self.push_uint(val as u64, 3);
+ }
+
+ fn push_u2(&mut self, val: u8) {
+ assert!(val < 4);
+ self.push_uint(val as u64, 2);
+ }
+
+ fn push_u1(&mut self, val: u8) {
+ assert!(val < 2);
+ self.push_uint(val as u64, 1);
+ }
+
+ // Push a context encoding opcode
+ fn push_op(&mut self, op: CtxOp) {
+ self.push_u4(op as u8);
+ }
+
+ // Read a uint value at a given bit index
+ // The bit index is incremented after the value is read
+ fn read_uint(&self, bit_idx: &mut usize, mut num_bits: usize) -> u64 {
+ let start_bit_idx = *bit_idx;
+ let mut cur_idx = *bit_idx;
+
+ // Read the bits in the first byte
+ let bit_mod = cur_idx % 8;
+ let bits_in_byte = self.bytes[cur_idx / 8] >> bit_mod;
+
+ let num_bits_in_byte = std::cmp::min(num_bits, 8 - bit_mod);
+ cur_idx += num_bits_in_byte;
+ num_bits -= num_bits_in_byte;
+
+ let mut out_bits = (bits_in_byte as u64) & ((1 << num_bits_in_byte) - 1);
+
+ // While we have bits left to read
+ while num_bits > 0 {
+ let num_bits_in_byte = std::cmp::min(num_bits, 8);
+ assert!(cur_idx % 8 == 0);
+ let byte = self.bytes[cur_idx / 8] as u64;
+
+ let bits_in_byte = byte & ((1 << num_bits) - 1);
+ out_bits |= bits_in_byte << (cur_idx - start_bit_idx);
+
+ // Move to the next byte/offset
+ cur_idx += num_bits_in_byte;
+ num_bits -= num_bits_in_byte;
+ }
+
+ // Update the read index
+ *bit_idx = cur_idx;
+
+ out_bits
+ }
+
+ fn read_u8(&self, bit_idx: &mut usize) -> u8 {
+ self.read_uint(bit_idx, 8) as u8
+ }
+
+ fn read_u4(&self, bit_idx: &mut usize) -> u8 {
+ self.read_uint(bit_idx, 4) as u8
+ }
+
+ fn read_u3(&self, bit_idx: &mut usize) -> u8 {
+ self.read_uint(bit_idx, 3) as u8
+ }
+
+ fn read_u2(&self, bit_idx: &mut usize) -> u8 {
+ self.read_uint(bit_idx, 2) as u8
+ }
+
+ fn read_u1(&self, bit_idx: &mut usize) -> u8 {
+ self.read_uint(bit_idx, 1) as u8
+ }
+
+ fn read_op(&self, bit_idx: &mut usize) -> CtxOp {
+ unsafe { std::mem::transmute(self.read_u4(bit_idx)) }
+ }
+}
+
+impl fmt::Debug for BitVector {
+ fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+ // We print the higher bytes first
+ for (idx, byte) in self.bytes.iter().enumerate().rev() {
+ write!(f, "{:08b}", byte)?;
+
+ // Insert a separator between each byte
+ if idx > 0 {
+ write!(f, "|")?;
+ }
+ }
+
+ Ok(())
+ }
+}
+
+#[cfg(test)]
+mod bitvector_tests {
+ use super::*;
+
+ #[test]
+ fn write_3() {
+ let mut arr = BitVector::new();
+ arr.push_uint(3, 2);
+ assert!(arr.read_uint(&mut 0, 2) == 3);
+ }
+
+ #[test]
+ fn write_11() {
+ let mut arr = BitVector::new();
+ arr.push_uint(1, 1);
+ arr.push_uint(1, 1);
+ assert!(arr.read_uint(&mut 0, 2) == 3);
+ }
+
+ #[test]
+ fn write_11_overlap() {
+ let mut arr = BitVector::new();
+ arr.push_uint(0, 7);
+ arr.push_uint(3, 2);
+ arr.push_uint(1, 1);
+
+ //dbg!(arr.read_uint(7, 2));
+ assert!(arr.read_uint(&mut 7, 2) == 3);
+ }
+
+ #[test]
+ fn write_ff_0() {
+ let mut arr = BitVector::new();
+ arr.push_uint(0xFF, 8);
+ assert!(arr.read_uint(&mut 0, 8) == 0xFF);
+ }
+
+ #[test]
+ fn write_ff_3() {
+ // Write 0xFF at bit index 3
+ let mut arr = BitVector::new();
+ arr.push_uint(0, 3);
+ arr.push_uint(0xFF, 8);
+ assert!(arr.read_uint(&mut 3, 8) == 0xFF);
+ }
+
+ #[test]
+ fn write_ff_sandwich() {
+ // Write 0xFF sandwiched between zeros
+ let mut arr = BitVector::new();
+ arr.push_uint(0, 3);
+ arr.push_u8(0xFF);
+ arr.push_uint(0, 3);
+ assert!(arr.read_uint(&mut 3, 8) == 0xFF);
+ }
+
+ #[test]
+ fn write_read_u32_max() {
+ let mut arr = BitVector::new();
+ arr.push_uint(0xFF_FF_FF_FF, 32);
+ assert!(arr.read_uint(&mut 0, 32) == 0xFF_FF_FF_FF);
+ }
+
+ #[test]
+ fn write_read_u32_max_64b() {
+ let mut arr = BitVector::new();
+ arr.push_uint(0xFF_FF_FF_FF, 64);
+ assert!(arr.read_uint(&mut 0, 64) == 0xFF_FF_FF_FF);
+ }
+
+ #[test]
+ fn write_read_u64_max() {
+ let mut arr = BitVector::new();
+ arr.push_uint(u64::MAX, 64);
+ assert!(arr.read_uint(&mut 0, 64) == u64::MAX);
+ }
+
+ #[test]
+ fn encode_default() {
+ let mut bits = BitVector::new();
+ let ctx = Context::default();
+ let start_idx = ctx.encode_into(&mut bits);
+ assert!(start_idx == 0);
+ assert!(bits.num_bits() > 0);
+ assert!(bits.num_bytes() > 0);
+
+ // Make sure that the round trip matches the input
+ let ctx2 = Context::decode_from(&bits, 0);
+ assert!(ctx2 == ctx);
+ }
+
+ #[test]
+ fn encode_default_2x() {
+ let mut bits = BitVector::new();
+
+ let ctx0 = Context::default();
+ let idx0 = ctx0.encode_into(&mut bits);
+
+ let mut ctx1 = Context::default();
+ ctx1.reg_temps = RegTemps(1);
+ let idx1 = ctx1.encode_into(&mut bits);
+
+ // Make sure that we can encode two contexts successively
+ let ctx0_dec = Context::decode_from(&bits, idx0);
+ let ctx1_dec = Context::decode_from(&bits, idx1);
+ assert!(ctx0_dec == ctx0);
+ assert!(ctx1_dec == ctx1);
+ }
+
+ #[test]
+ fn regress_reg_temps() {
+ let mut bits = BitVector::new();
+ let mut ctx = Context::default();
+ ctx.reg_temps = RegTemps(1);
+ ctx.encode_into(&mut bits);
+
+ let b0 = bits.read_u1(&mut 0);
+ assert!(b0 == 1);
+
+ // Make sure that the round trip matches the input
+ let ctx2 = Context::decode_from(&bits, 0);
+ assert!(ctx2 == ctx);
+ }
+}
+
+// Context encoding opcodes (4 bits)
+#[derive(Debug, Copy, Clone)]
+#[repr(u8)]
+enum CtxOp {
+ // Self type (4 bits)
+ SetSelfType = 0,
+
+ // Local idx (3 bits), temp type (4 bits)
+ SetLocalType,
+
+ // Map stack temp to self with known type
+ // Temp idx (3 bits), known type (4 bits)
+ SetTempType,
+
+ // Map stack temp to a local variable
+ // Temp idx (3 bits), local idx (3 bits)
+ MapTempLocal,
+
+ // Map a stack temp to self
+ // Temp idx (3 bits)
+ MapTempSelf,
+
+ // Set inline block pointer (8 bytes)
+ SetInlineBlock,
+
+ // End of encoding
+ EndOfCode,
+}
+
+// Cache of the last context encoded
+// Empirically this saves a few percent of memory
+// We can experiment with varying the size of this cache
+static mut LAST_CTX_ENCODED: Option<(Context, u32)> = None;
+
+impl Context {
+ pub fn encode(&self) -> u32 {
+ incr_counter!(num_contexts_encoded);
+
+ if *self == Context::default() {
+ return 0;
+ }
+
+ /*
+ // If this context was previously decoded and was not changed since
+ if self.decoded_from != 0 && Self::decode(self.decoded_from) == *self {
+ return self.decoded_from;
+ }
+ */
+
+ // If this context was recently encoded (cache check)
+ unsafe {
+ if let Some((ctx, idx)) = LAST_CTX_ENCODED {
+ if ctx == *self {
+ return idx;
+ }
+ }
+ }
+
+ let context_data = CodegenGlobals::get_context_data();
+
+ // Offset 0 is reserved for the default context
+ if context_data.num_bits() == 0 {
+ context_data.push_u1(0);
+ }
+
+ let idx = self.encode_into(context_data);
+ let idx: u32 = idx.try_into().unwrap();
+
+ unsafe {
+ LAST_CTX_ENCODED = Some((*self, idx));
+ }
+
+ // In debug mode, check that the round-trip decoding always matches
+ debug_assert!(Self::decode(idx) == *self);
+
+ idx
+ }
+
+ pub fn decode(start_idx: u32) -> Context {
+ if start_idx == 0 {
+ return Context::default();
+ };
+
+ let context_data = CodegenGlobals::get_context_data();
+ let ctx = Self::decode_from(context_data, start_idx as usize);
+
+ // Keep track of the fact that this context was previously encoded
+ //ctx.decoded_from = start_idx;
+
+ ctx
+ }
+
+ // Encode into a compressed context representation in a bit vector
+ fn encode_into(&self, bits: &mut BitVector) -> usize {
+ let start_idx = bits.num_bits();
+
+ // NOTE: this value is often zero or falls within
+ // a small range, so could be compressed
+ //println!("stack_size={}", self.stack_size);
+ //println!("sp_offset={}", self.sp_offset);
+ //println!("chain_depth_and_flags={}", self.chain_depth_and_flags);
+
+ // Most of the time, the stack size is small and sp offset has the same value
+ if (self.stack_size as i64) == (self.sp_offset as i64) && self.stack_size < 4 {
+ // One single bit to signify a compact stack_size/sp_offset encoding
+ bits.push_u1(1);
+ bits.push_u2(self.stack_size);
+ } else {
+ // Full stack size encoding
+ bits.push_u1(0);
+
+ // Number of values currently on the temporary stack
+ bits.push_u8(self.stack_size);
+
+ // sp_offset: i8,
+ bits.push_u8(self.sp_offset as u8);
+ }
+
+ // Bitmap of which stack temps are in a register
+ let RegTemps(reg_temps) = self.reg_temps;
+ bits.push_u8(reg_temps);
+
+ // chain_depth_and_flags: u8,
+ bits.push_u8(self.chain_depth_and_flags);
+
+ // Encode the self type if known
+ if self.self_type != Type::Unknown {
+ bits.push_op(CtxOp::SetSelfType);
+ bits.push_u4(self.self_type as u8);
+ }
+
+ // Encode the local types if known
+ for local_idx in 0..MAX_LOCAL_TYPES {
+ let t = self.get_local_type(local_idx);
+ if t != Type::Unknown {
+ bits.push_op(CtxOp::SetLocalType);
+ bits.push_u3(local_idx as u8);
+ bits.push_u4(t as u8);
+ }
+ }
+
+ // Encode stack temps
+ for stack_idx in 0..MAX_TEMP_TYPES {
+ let mapping = self.get_temp_mapping(stack_idx);
+
+ match mapping.get_kind() {
+ MapToStack => {
+ let t = mapping.get_type();
+ if t != Type::Unknown {
+ // Temp idx (3 bits), known type (4 bits)
+ bits.push_op(CtxOp::SetTempType);
+ bits.push_u3(stack_idx as u8);
+ bits.push_u4(t as u8);
+ }
+ }
+
+ MapToLocal => {
+ // Temp idx (3 bits), local idx (3 bits)
+ let local_idx = mapping.get_local_idx();
+ bits.push_op(CtxOp::MapTempLocal);
+ bits.push_u3(stack_idx as u8);
+ bits.push_u3(local_idx as u8);
+ }
+
+ MapToSelf => {
+ // Temp idx (3 bits)
+ bits.push_op(CtxOp::MapTempSelf);
+ bits.push_u3(stack_idx as u8);
+ }
+ }
+ }
+
+ // Inline block pointer
+ if self.inline_block != 0 {
+ bits.push_op(CtxOp::SetInlineBlock);
+ bits.push_uint(self.inline_block, 64);
+ }
+
+ // TODO: should we add an op for end-of-encoding,
+ // or store num ops at the beginning?
+ bits.push_op(CtxOp::EndOfCode);
+
+ start_idx
+ }
+
+ // Decode a compressed context representation from a bit vector
+ fn decode_from(bits: &BitVector, start_idx: usize) -> Context {
+ let mut ctx = Context::default();
+
+ let mut idx = start_idx;
+
+ // Small vs large stack size encoding
+ if bits.read_u1(&mut idx) == 1 {
+ ctx.stack_size = bits.read_u2(&mut idx);
+ ctx.sp_offset = ctx.stack_size as i8;
+ } else {
+ ctx.stack_size = bits.read_u8(&mut idx);
+ ctx.sp_offset = bits.read_u8(&mut idx) as i8;
+ }
+
+ // Bitmap of which stack temps are in a register
+ ctx.reg_temps = RegTemps(bits.read_u8(&mut idx));
+
+ // chain_depth_and_flags: u8
+ ctx.chain_depth_and_flags = bits.read_u8(&mut idx);
+
+ loop {
+ //println!("reading op");
+ let op = bits.read_op(&mut idx);
+ //println!("got op {:?}", op);
+
+ match op {
+ CtxOp::SetSelfType => {
+ ctx.self_type = unsafe { transmute(bits.read_u4(&mut idx)) };
+ }
+
+ CtxOp::SetLocalType => {
+ let local_idx = bits.read_u3(&mut idx) as usize;
+ let t = unsafe { transmute(bits.read_u4(&mut idx)) };
+ ctx.set_local_type(local_idx, t);
+ }
+
+ // Map temp to stack (known type)
+ CtxOp::SetTempType => {
+ let temp_idx = bits.read_u3(&mut idx) as usize;
+ let t = unsafe { transmute(bits.read_u4(&mut idx)) };
+ ctx.set_temp_mapping(temp_idx, TempMapping::map_to_stack(t));
+ }
+
+ // Map temp to local
+ CtxOp::MapTempLocal => {
+ let temp_idx = bits.read_u3(&mut idx) as usize;
+ let local_idx = bits.read_u3(&mut idx);
+ ctx.set_temp_mapping(temp_idx, TempMapping::map_to_local(local_idx));
+ }
+
+ // Map temp to self
+ CtxOp::MapTempSelf => {
+ let temp_idx = bits.read_u3(&mut idx) as usize;
+ ctx.set_temp_mapping(temp_idx, TempMapping::map_to_self());
+ }
+
+ // Inline block pointer
+ CtxOp::SetInlineBlock => {
+ ctx.inline_block = bits.read_uint(&mut idx, 64);
+ }
+
+ CtxOp::EndOfCode => break,
+ }
+ }
+
+ ctx
+ }
+}
+
/// Tuple of (iseq, idx) used to identify basic blocks
/// There are a lot of blockid objects so we try to keep the size small.
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
@@ -659,7 +1226,7 @@ impl BranchTarget {
}
}
- fn get_ctx(&self) -> Context {
+ fn get_ctx(&self) -> u32 {
match self {
BranchTarget::Stub(stub) => stub.ctx,
BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.ctx,
@@ -686,7 +1253,7 @@ struct BranchStub {
address: Option<CodePtr>,
iseq: Cell<IseqPtr>,
iseq_idx: IseqIdx,
- ctx: Context,
+ ctx: u32,
}
/// Store info about an outgoing branch in a code segment
@@ -808,6 +1375,9 @@ impl PendingBranch {
return Some(block.start_addr);
}
+ // Compress/encode the context
+ let ctx = Context::encode(ctx);
+
// The branch struct is uninitialized right now but as a stable address.
// We make sure the stub runs after the branch is initialized.
let branch_struct_addr = self.uninit_branch.as_ptr() as usize;
@@ -819,7 +1389,7 @@ impl PendingBranch {
address: Some(stub_addr),
iseq: Cell::new(target.iseq),
iseq_idx: target.idx,
- ctx: *ctx,
+ ctx,
})))));
}
@@ -912,7 +1482,7 @@ pub struct Block {
// Context at the start of the block
// This should never be mutated
- ctx: Context,
+ ctx: u32,
// Positions where the generated code starts and ends
start_addr: CodePtr,
@@ -1085,15 +1655,6 @@ pub fn for_each_iseq<F: FnMut(IseqPtr)>(mut callback: F) {
unsafe { rb_yjit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) };
}
-/// Iterate over all ISEQ payloads
-pub fn for_each_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) {
- for_each_iseq(|iseq| {
- if let Some(iseq_payload) = get_iseq_payload(iseq) {
- callback(iseq_payload);
- }
- });
-}
-
/// Iterate over all on-stack ISEQs
pub fn for_each_on_stack_iseq<F: FnMut(IseqPtr)>(mut callback: F) {
unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) {
@@ -1425,13 +1986,17 @@ pub fn take_version_list(blockid: BlockId) -> VersionList {
fn get_num_versions(blockid: BlockId, inlined: bool) -> usize {
let insn_idx = blockid.idx.as_usize();
match get_iseq_payload(blockid.iseq) {
+
+ // FIXME: this counting logic is going to be expensive.
+ // We should avoid it if possible
+
Some(payload) => {
payload
.version_map
.get(insn_idx)
.map(|versions| {
versions.iter().filter(|&&version|
- unsafe { version.as_ref() }.ctx.inline() == inlined
+ Context::decode(unsafe { version.as_ref() }.ctx).inline() == inlined
).count()
})
.unwrap_or(0)
@@ -1476,10 +2041,11 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> {
// For each version matching the blockid
for blockref in versions.iter() {
let block = unsafe { blockref.as_ref() };
+ let block_ctx = Context::decode(block.ctx);
// Note that we always prefer the first matching
// version found because of inline-cache chains
- match ctx.diff(&block.ctx) {
+ match ctx.diff(&block_ctx) {
TypeDiff::Compatible(diff) if diff < best_diff => {
best_version = Some(*blockref);
best_diff = diff;
@@ -1561,7 +2127,7 @@ unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) {
let block = unsafe { blockref.as_ref() };
// Function entry blocks must have stack size 0
- assert!(!(block.iseq_range.start == 0 && block.ctx.stack_size > 0));
+ debug_assert!(!(block.iseq_range.start == 0 && Context::decode(block.ctx).stack_size > 0));
let version_list = get_or_create_version_list(block.get_blockid());
@@ -1620,12 +2186,14 @@ impl JITState {
incr_counter_by!(num_gc_obj_refs, gc_obj_offsets.len());
+ let ctx = Context::encode(&self.get_starting_ctx());
+
// Make the new block
let block = MaybeUninit::new(Block {
start_addr,
iseq: Cell::new(self.get_iseq()),
iseq_range: self.get_starting_insn_idx()..end_insn_idx,
- ctx: self.get_starting_ctx(),
+ ctx,
end_addr: Cell::new(end_addr),
incoming: MutableBranchList(Cell::default()),
gc_obj_offsets: gc_obj_offsets.into_boxed_slice(),
@@ -2382,6 +2950,7 @@ fn gen_block_series_body(
};
// Generate new block using context from the last branch.
+ let requested_ctx = Context::decode(requested_ctx);
let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb);
// If the block failed to compile
@@ -2769,7 +3338,8 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
return target.get_address().unwrap().raw_ptr(cb);
}
- (target.get_blockid(), target.get_ctx())
+ let target_ctx = Context::decode(target.get_ctx());
+ (target.get_blockid(), target_ctx)
};
let (cfp, original_interp_sp) = unsafe {
@@ -2906,7 +3476,7 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) -
/// Generate a "stub", a piece of code that calls the compiler back when run.
/// A piece of code that redeems for more code; a thunk for code.
fn gen_branch_stub(
- ctx: &Context,
+ ctx: u32,
ocb: &mut OutlinedCb,
branch_struct_address: usize,
target_idx: u32,
@@ -2914,8 +3484,8 @@ fn gen_branch_stub(
let ocb = ocb.unwrap();
let mut asm = Assembler::new();
- asm.ctx = *ctx;
- asm.set_reg_temps(ctx.reg_temps);
+ asm.ctx = Context::decode(ctx);
+ asm.set_reg_temps(asm.ctx.reg_temps);
asm_comment!(asm, "branch stub hit");
if asm.ctx.is_return_landing() {
@@ -3112,7 +3682,7 @@ pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm:
// compile the target block right after this one (fallthrough).
BranchTarget::Stub(Box::new(BranchStub {
address: None,
- ctx: *ctx,
+ ctx: Context::encode(ctx),
iseq: Cell::new(target0.iseq),
iseq_idx: target0.idx,
}))
@@ -3364,7 +3934,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) {
}
// Create a stub for this branch target
- let stub_addr = gen_branch_stub(&block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32);
+ let stub_addr = gen_branch_stub(block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32);
// In case we were unable to generate a stub (e.g. OOM). Use the block's
// exit instead of a stub for the block. It's important that we
@@ -3547,11 +4117,6 @@ mod tests {
}
#[test]
- fn context_size() {
- assert_eq!(mem::size_of::<Context>(), 23);
- }
-
- #[test]
fn types() {
// Valid src => dst
assert_eq!(Type::Unknown.diff(Type::Unknown), TypeDiff::Compatible(0));
@@ -3695,7 +4260,7 @@ mod tests {
iseq: Cell::new(ptr::null()),
iseq_idx: 0,
address: None,
- ctx: Context::default(),
+ ctx: 0,
})))))]
};
// For easier soundness reasoning, make sure the reference returned does not out live the
@@ -3728,7 +4293,7 @@ mod tests {
iseq: Cell::new(ptr::null()),
iseq_idx: 0,
address: None,
- ctx: Context::default(),
+ ctx: 0,
})))));
// Invalid ISeq; we never dereference it.
let secret_iseq = NonNull::<rb_iseq_t>::dangling().as_ptr();
diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs
index 1f94c19398..6a7de68576 100644
--- a/yjit/src/stats.rs
+++ b/yjit/src/stats.rs
@@ -10,8 +10,6 @@ use std::time::Instant;
use std::collections::HashMap;
use crate::codegen::CodegenGlobals;
-use crate::core::Context;
-use crate::core::for_each_iseq_payload;
use crate::cruby::*;
use crate::options::*;
use crate::yjit::yjit_enabled_p;
@@ -557,6 +555,7 @@ make_counters! {
branch_insn_count,
branch_known_count,
max_inline_versions,
+ num_contexts_encoded,
freed_iseq_count,
@@ -641,8 +640,8 @@ pub extern "C" fn rb_yjit_print_stats_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE
/// Primitive called in yjit.rb.
/// Export all YJIT statistics as a Ruby hash.
#[no_mangle]
-pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE, context: VALUE) -> VALUE {
- with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict(context == Qtrue))
+pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE) -> VALUE {
+ with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict())
}
/// Primitive called in yjit.rb
@@ -701,7 +700,7 @@ pub extern "C" fn rb_yjit_incr_counter(counter_name: *const std::os::raw::c_char
}
/// Export all YJIT statistics as a Ruby hash.
-fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
+fn rb_yjit_gen_stats_dict() -> VALUE {
// If YJIT is not enabled, return Qnil
if !yjit_enabled_p() {
return Qnil;
@@ -744,14 +743,9 @@ fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
// Rust global allocations in bytes
hash_aset_usize!(hash, "yjit_alloc_size", GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst));
- // `context` is true at RubyVM::YJIT._print_stats for --yjit-stats. It's false by default
- // for RubyVM::YJIT.runtime_stats because counting all Contexts could be expensive.
- if context {
- let live_context_count = get_live_context_count();
- let context_size = std::mem::size_of::<Context>();
- hash_aset_usize!(hash, "live_context_count", live_context_count);
- hash_aset_usize!(hash, "live_context_size", live_context_count * context_size);
- }
+ // How many bytes we are using to store context data
+ let context_data = CodegenGlobals::get_context_data();
+ hash_aset_usize!(hash, "context_data_bytes", context_data.num_bytes());
// VM instructions count
hash_aset_usize!(hash, "vm_insns_count", rb_vm_insns_count as usize);
@@ -846,21 +840,6 @@ fn rb_yjit_gen_stats_dict(context: bool) -> VALUE {
hash
}
-fn get_live_context_count() -> usize {
- let mut count = 0;
- for_each_iseq_payload(|iseq_payload| {
- for blocks in iseq_payload.version_map.iter() {
- for block in blocks.iter() {
- count += unsafe { block.as_ref() }.get_ctx_count();
- }
- }
- for block in iseq_payload.dead_blocks.iter() {
- count += unsafe { block.as_ref() }.get_ctx_count();
- }
- });
- count
-}
-
/// Record the backtrace when a YJIT exit occurs. This functionality requires
/// that the stats feature is enabled as well as the --yjit-trace-exits option.
///