diff options
author | Maxime Chevalier-Boisvert <[email protected]> | 2024-06-07 16:26:14 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2024-06-07 16:26:14 -0400 |
commit | 425e630ce73cf79fa5529df199dde47fc109a5de (patch) | |
tree | 87766027af2f64de8a60b8826ecab568a9c0812d /yjit/src | |
parent | faad2bc6e13dc829f1d29feb5084761fb113fd77 (diff) |
YJIT: implement variable-length context encoding scheme (#10888)
* Implement BitVector data structure for variable-length context encoding
* Rename method to make intent clearer
* Rename write_uint => push_uint to make intent clearer
* Implement debug trait for BitVector
* Fix bug in BitVector::read_uint_at(), enable more tests
* Add one more test for good measure
* Start sketching Context::encode()
* Progress on variable length context encoding
* Add tests. Fix bug.
* Encode stack state
* Add comments. Try to estimate context encoding size.
* More compact encoding for stack size
* Commit before rebase
* Change Context::encode() to take a BitVector as input
* Refactor BitVector::read_uint(), add helper read functions
* Implement Context::decode() function. Add test.
* Fix bug, add tests
* Rename methods
* Add Context::encode() and decode() methods using global data
* Make encode and decode methods use u32 indices
* Refactor YJIT to use variable-length context encoding
* Tag functions as allow unused
* Add a simple caching mechanism and stats for bytes per context etc
* Add comments, fix formatting
* Grow vector of bytes by 1.2x instead of 2x
* Add debug assert to check round-trip encoding-decoding
* Take some rustfmt formatting
* Add decoded_from field to Context to reuse previous encodings
* Remove olde context stats
* Re-add stack_size assert
* Disable decoded_from optimization for now
Diffstat (limited to 'yjit/src')
-rw-r--r-- | yjit/src/codegen.rs | 11 | ||||
-rw-r--r-- | yjit/src/core.rs | 627 | ||||
-rw-r--r-- | yjit/src/stats.rs | 35 |
3 files changed, 613 insertions, 60 deletions
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs index e509683ae0..755e64c244 100644 --- a/yjit/src/codegen.rs +++ b/yjit/src/codegen.rs @@ -5789,7 +5789,7 @@ fn jit_rb_str_getbyte( RUBY_OFFSET_RSTRING_LEN as i32, ); - // Exit if the indes is out of bounds + // Exit if the index is out of bounds asm.cmp(idx, str_len_opnd); asm.jge(Target::side_exit(Counter::getbyte_idx_out_of_bounds)); @@ -10333,6 +10333,9 @@ fn yjit_reg_method(klass: VALUE, mid_str: &str, gen_fn: MethodGenFn) { /// Global state needed for code generation pub struct CodegenGlobals { + /// Flat vector of bits to store compressed context data + context_data: BitVector, + /// Inline code block (fast path) inline_cb: CodeBlock, @@ -10448,6 +10451,7 @@ impl CodegenGlobals { ocb.unwrap().mark_all_executable(); let codegen_globals = CodegenGlobals { + context_data: BitVector::new(), inline_cb: cb, outlined_cb: ocb, leave_exit_code, @@ -10476,6 +10480,11 @@ impl CodegenGlobals { unsafe { CODEGEN_GLOBALS.as_mut().is_some() } } + /// Get a mutable reference to the context data + pub fn get_context_data() -> &'static mut BitVector { + &mut CodegenGlobals::get_instance().context_data + } + /// Get a mutable reference to the inline code block pub fn get_inline_cb() -> &'static mut CodeBlock { &mut CodegenGlobals::get_instance().inline_cb diff --git a/yjit/src/core.rs b/yjit/src/core.rs index 7c8532a0c3..2a8dde7087 100644 --- a/yjit/src/core.rs +++ b/yjit/src/core.rs @@ -457,8 +457,13 @@ const CHAIN_DEPTH_MASK: u8 = 0b00111111; // 63 /// Contains information we can use to specialize/optimize code /// There are a lot of context objects so we try to keep the size small. #[derive(Copy, Clone, Default, Eq, Hash, PartialEq, Debug)] -#[repr(packed)] pub struct Context { + // FIXME: decoded_from breaks == on contexts + /* + // Offset at which this context was previously encoded (zero if not) + decoded_from: u32, + */ + // Number of values currently on the temporary stack stack_size: u8, @@ -498,6 +503,568 @@ pub struct Context { inline_block: u64, } +#[derive(Clone)] +pub struct BitVector { + // Flat vector of bytes to write into + bytes: Vec<u8>, + + // Number of bits taken out of bytes allocated + num_bits: usize, +} + +impl BitVector { + pub fn new() -> Self { + Self { + bytes: Vec::with_capacity(4096), + num_bits: 0, + } + } + + #[allow(unused)] + pub fn num_bits(&self) -> usize { + self.num_bits + } + + // Total number of bytes taken + #[allow(unused)] + pub fn num_bytes(&self) -> usize { + (self.num_bits / 8) + if (self.num_bits % 8) != 0 { 1 } else { 0 } + } + + // Write/append an unsigned integer value + fn push_uint(&mut self, mut val: u64, mut num_bits: usize) { + assert!(num_bits <= 64); + + // Mask out bits above the number of bits requested + let mut val_bits = val; + if num_bits < 64 { + val_bits &= (1 << num_bits) - 1; + assert!(val == val_bits); + } + + // Number of bits encoded in the last byte + let rem_bits = self.num_bits % 8; + + // Encode as many bits as we can in this last byte + if rem_bits != 0 { + let num_enc = std::cmp::min(num_bits, 8 - rem_bits); + let bit_mask = (1 << num_enc) - 1; + let frac_bits = (val & bit_mask) << rem_bits; + let frac_bits: u8 = frac_bits.try_into().unwrap(); + let last_byte_idx = self.bytes.len() - 1; + self.bytes[last_byte_idx] |= frac_bits; + + self.num_bits += num_enc; + num_bits -= num_enc; + val >>= num_enc; + } + + // While we have bits left to encode + while num_bits > 0 { + // Grow with a 1.2x growth factor instead of 2x + assert!(self.num_bits % 8 == 0); + let num_bytes = self.num_bits / 8; + if num_bytes == self.bytes.capacity() { + self.bytes.reserve_exact(self.bytes.len() / 5); + } + + let bits = val & 0xFF; + let bits: u8 = bits.try_into().unwrap(); + self.bytes.push(bits); + + let bits_to_encode = std::cmp::min(num_bits, 8); + self.num_bits += bits_to_encode; + num_bits -= bits_to_encode; + val >>= bits_to_encode; + } + } + + fn push_u8(&mut self, val: u8) { + self.push_uint(val as u64, 8); + } + + fn push_u4(&mut self, val: u8) { + assert!(val < 16); + self.push_uint(val as u64, 4); + } + + fn push_u3(&mut self, val: u8) { + assert!(val < 8); + self.push_uint(val as u64, 3); + } + + fn push_u2(&mut self, val: u8) { + assert!(val < 4); + self.push_uint(val as u64, 2); + } + + fn push_u1(&mut self, val: u8) { + assert!(val < 2); + self.push_uint(val as u64, 1); + } + + // Push a context encoding opcode + fn push_op(&mut self, op: CtxOp) { + self.push_u4(op as u8); + } + + // Read a uint value at a given bit index + // The bit index is incremented after the value is read + fn read_uint(&self, bit_idx: &mut usize, mut num_bits: usize) -> u64 { + let start_bit_idx = *bit_idx; + let mut cur_idx = *bit_idx; + + // Read the bits in the first byte + let bit_mod = cur_idx % 8; + let bits_in_byte = self.bytes[cur_idx / 8] >> bit_mod; + + let num_bits_in_byte = std::cmp::min(num_bits, 8 - bit_mod); + cur_idx += num_bits_in_byte; + num_bits -= num_bits_in_byte; + + let mut out_bits = (bits_in_byte as u64) & ((1 << num_bits_in_byte) - 1); + + // While we have bits left to read + while num_bits > 0 { + let num_bits_in_byte = std::cmp::min(num_bits, 8); + assert!(cur_idx % 8 == 0); + let byte = self.bytes[cur_idx / 8] as u64; + + let bits_in_byte = byte & ((1 << num_bits) - 1); + out_bits |= bits_in_byte << (cur_idx - start_bit_idx); + + // Move to the next byte/offset + cur_idx += num_bits_in_byte; + num_bits -= num_bits_in_byte; + } + + // Update the read index + *bit_idx = cur_idx; + + out_bits + } + + fn read_u8(&self, bit_idx: &mut usize) -> u8 { + self.read_uint(bit_idx, 8) as u8 + } + + fn read_u4(&self, bit_idx: &mut usize) -> u8 { + self.read_uint(bit_idx, 4) as u8 + } + + fn read_u3(&self, bit_idx: &mut usize) -> u8 { + self.read_uint(bit_idx, 3) as u8 + } + + fn read_u2(&self, bit_idx: &mut usize) -> u8 { + self.read_uint(bit_idx, 2) as u8 + } + + fn read_u1(&self, bit_idx: &mut usize) -> u8 { + self.read_uint(bit_idx, 1) as u8 + } + + fn read_op(&self, bit_idx: &mut usize) -> CtxOp { + unsafe { std::mem::transmute(self.read_u4(bit_idx)) } + } +} + +impl fmt::Debug for BitVector { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + // We print the higher bytes first + for (idx, byte) in self.bytes.iter().enumerate().rev() { + write!(f, "{:08b}", byte)?; + + // Insert a separator between each byte + if idx > 0 { + write!(f, "|")?; + } + } + + Ok(()) + } +} + +#[cfg(test)] +mod bitvector_tests { + use super::*; + + #[test] + fn write_3() { + let mut arr = BitVector::new(); + arr.push_uint(3, 2); + assert!(arr.read_uint(&mut 0, 2) == 3); + } + + #[test] + fn write_11() { + let mut arr = BitVector::new(); + arr.push_uint(1, 1); + arr.push_uint(1, 1); + assert!(arr.read_uint(&mut 0, 2) == 3); + } + + #[test] + fn write_11_overlap() { + let mut arr = BitVector::new(); + arr.push_uint(0, 7); + arr.push_uint(3, 2); + arr.push_uint(1, 1); + + //dbg!(arr.read_uint(7, 2)); + assert!(arr.read_uint(&mut 7, 2) == 3); + } + + #[test] + fn write_ff_0() { + let mut arr = BitVector::new(); + arr.push_uint(0xFF, 8); + assert!(arr.read_uint(&mut 0, 8) == 0xFF); + } + + #[test] + fn write_ff_3() { + // Write 0xFF at bit index 3 + let mut arr = BitVector::new(); + arr.push_uint(0, 3); + arr.push_uint(0xFF, 8); + assert!(arr.read_uint(&mut 3, 8) == 0xFF); + } + + #[test] + fn write_ff_sandwich() { + // Write 0xFF sandwiched between zeros + let mut arr = BitVector::new(); + arr.push_uint(0, 3); + arr.push_u8(0xFF); + arr.push_uint(0, 3); + assert!(arr.read_uint(&mut 3, 8) == 0xFF); + } + + #[test] + fn write_read_u32_max() { + let mut arr = BitVector::new(); + arr.push_uint(0xFF_FF_FF_FF, 32); + assert!(arr.read_uint(&mut 0, 32) == 0xFF_FF_FF_FF); + } + + #[test] + fn write_read_u32_max_64b() { + let mut arr = BitVector::new(); + arr.push_uint(0xFF_FF_FF_FF, 64); + assert!(arr.read_uint(&mut 0, 64) == 0xFF_FF_FF_FF); + } + + #[test] + fn write_read_u64_max() { + let mut arr = BitVector::new(); + arr.push_uint(u64::MAX, 64); + assert!(arr.read_uint(&mut 0, 64) == u64::MAX); + } + + #[test] + fn encode_default() { + let mut bits = BitVector::new(); + let ctx = Context::default(); + let start_idx = ctx.encode_into(&mut bits); + assert!(start_idx == 0); + assert!(bits.num_bits() > 0); + assert!(bits.num_bytes() > 0); + + // Make sure that the round trip matches the input + let ctx2 = Context::decode_from(&bits, 0); + assert!(ctx2 == ctx); + } + + #[test] + fn encode_default_2x() { + let mut bits = BitVector::new(); + + let ctx0 = Context::default(); + let idx0 = ctx0.encode_into(&mut bits); + + let mut ctx1 = Context::default(); + ctx1.reg_temps = RegTemps(1); + let idx1 = ctx1.encode_into(&mut bits); + + // Make sure that we can encode two contexts successively + let ctx0_dec = Context::decode_from(&bits, idx0); + let ctx1_dec = Context::decode_from(&bits, idx1); + assert!(ctx0_dec == ctx0); + assert!(ctx1_dec == ctx1); + } + + #[test] + fn regress_reg_temps() { + let mut bits = BitVector::new(); + let mut ctx = Context::default(); + ctx.reg_temps = RegTemps(1); + ctx.encode_into(&mut bits); + + let b0 = bits.read_u1(&mut 0); + assert!(b0 == 1); + + // Make sure that the round trip matches the input + let ctx2 = Context::decode_from(&bits, 0); + assert!(ctx2 == ctx); + } +} + +// Context encoding opcodes (4 bits) +#[derive(Debug, Copy, Clone)] +#[repr(u8)] +enum CtxOp { + // Self type (4 bits) + SetSelfType = 0, + + // Local idx (3 bits), temp type (4 bits) + SetLocalType, + + // Map stack temp to self with known type + // Temp idx (3 bits), known type (4 bits) + SetTempType, + + // Map stack temp to a local variable + // Temp idx (3 bits), local idx (3 bits) + MapTempLocal, + + // Map a stack temp to self + // Temp idx (3 bits) + MapTempSelf, + + // Set inline block pointer (8 bytes) + SetInlineBlock, + + // End of encoding + EndOfCode, +} + +// Cache of the last context encoded +// Empirically this saves a few percent of memory +// We can experiment with varying the size of this cache +static mut LAST_CTX_ENCODED: Option<(Context, u32)> = None; + +impl Context { + pub fn encode(&self) -> u32 { + incr_counter!(num_contexts_encoded); + + if *self == Context::default() { + return 0; + } + + /* + // If this context was previously decoded and was not changed since + if self.decoded_from != 0 && Self::decode(self.decoded_from) == *self { + return self.decoded_from; + } + */ + + // If this context was recently encoded (cache check) + unsafe { + if let Some((ctx, idx)) = LAST_CTX_ENCODED { + if ctx == *self { + return idx; + } + } + } + + let context_data = CodegenGlobals::get_context_data(); + + // Offset 0 is reserved for the default context + if context_data.num_bits() == 0 { + context_data.push_u1(0); + } + + let idx = self.encode_into(context_data); + let idx: u32 = idx.try_into().unwrap(); + + unsafe { + LAST_CTX_ENCODED = Some((*self, idx)); + } + + // In debug mode, check that the round-trip decoding always matches + debug_assert!(Self::decode(idx) == *self); + + idx + } + + pub fn decode(start_idx: u32) -> Context { + if start_idx == 0 { + return Context::default(); + }; + + let context_data = CodegenGlobals::get_context_data(); + let ctx = Self::decode_from(context_data, start_idx as usize); + + // Keep track of the fact that this context was previously encoded + //ctx.decoded_from = start_idx; + + ctx + } + + // Encode into a compressed context representation in a bit vector + fn encode_into(&self, bits: &mut BitVector) -> usize { + let start_idx = bits.num_bits(); + + // NOTE: this value is often zero or falls within + // a small range, so could be compressed + //println!("stack_size={}", self.stack_size); + //println!("sp_offset={}", self.sp_offset); + //println!("chain_depth_and_flags={}", self.chain_depth_and_flags); + + // Most of the time, the stack size is small and sp offset has the same value + if (self.stack_size as i64) == (self.sp_offset as i64) && self.stack_size < 4 { + // One single bit to signify a compact stack_size/sp_offset encoding + bits.push_u1(1); + bits.push_u2(self.stack_size); + } else { + // Full stack size encoding + bits.push_u1(0); + + // Number of values currently on the temporary stack + bits.push_u8(self.stack_size); + + // sp_offset: i8, + bits.push_u8(self.sp_offset as u8); + } + + // Bitmap of which stack temps are in a register + let RegTemps(reg_temps) = self.reg_temps; + bits.push_u8(reg_temps); + + // chain_depth_and_flags: u8, + bits.push_u8(self.chain_depth_and_flags); + + // Encode the self type if known + if self.self_type != Type::Unknown { + bits.push_op(CtxOp::SetSelfType); + bits.push_u4(self.self_type as u8); + } + + // Encode the local types if known + for local_idx in 0..MAX_LOCAL_TYPES { + let t = self.get_local_type(local_idx); + if t != Type::Unknown { + bits.push_op(CtxOp::SetLocalType); + bits.push_u3(local_idx as u8); + bits.push_u4(t as u8); + } + } + + // Encode stack temps + for stack_idx in 0..MAX_TEMP_TYPES { + let mapping = self.get_temp_mapping(stack_idx); + + match mapping.get_kind() { + MapToStack => { + let t = mapping.get_type(); + if t != Type::Unknown { + // Temp idx (3 bits), known type (4 bits) + bits.push_op(CtxOp::SetTempType); + bits.push_u3(stack_idx as u8); + bits.push_u4(t as u8); + } + } + + MapToLocal => { + // Temp idx (3 bits), local idx (3 bits) + let local_idx = mapping.get_local_idx(); + bits.push_op(CtxOp::MapTempLocal); + bits.push_u3(stack_idx as u8); + bits.push_u3(local_idx as u8); + } + + MapToSelf => { + // Temp idx (3 bits) + bits.push_op(CtxOp::MapTempSelf); + bits.push_u3(stack_idx as u8); + } + } + } + + // Inline block pointer + if self.inline_block != 0 { + bits.push_op(CtxOp::SetInlineBlock); + bits.push_uint(self.inline_block, 64); + } + + // TODO: should we add an op for end-of-encoding, + // or store num ops at the beginning? + bits.push_op(CtxOp::EndOfCode); + + start_idx + } + + // Decode a compressed context representation from a bit vector + fn decode_from(bits: &BitVector, start_idx: usize) -> Context { + let mut ctx = Context::default(); + + let mut idx = start_idx; + + // Small vs large stack size encoding + if bits.read_u1(&mut idx) == 1 { + ctx.stack_size = bits.read_u2(&mut idx); + ctx.sp_offset = ctx.stack_size as i8; + } else { + ctx.stack_size = bits.read_u8(&mut idx); + ctx.sp_offset = bits.read_u8(&mut idx) as i8; + } + + // Bitmap of which stack temps are in a register + ctx.reg_temps = RegTemps(bits.read_u8(&mut idx)); + + // chain_depth_and_flags: u8 + ctx.chain_depth_and_flags = bits.read_u8(&mut idx); + + loop { + //println!("reading op"); + let op = bits.read_op(&mut idx); + //println!("got op {:?}", op); + + match op { + CtxOp::SetSelfType => { + ctx.self_type = unsafe { transmute(bits.read_u4(&mut idx)) }; + } + + CtxOp::SetLocalType => { + let local_idx = bits.read_u3(&mut idx) as usize; + let t = unsafe { transmute(bits.read_u4(&mut idx)) }; + ctx.set_local_type(local_idx, t); + } + + // Map temp to stack (known type) + CtxOp::SetTempType => { + let temp_idx = bits.read_u3(&mut idx) as usize; + let t = unsafe { transmute(bits.read_u4(&mut idx)) }; + ctx.set_temp_mapping(temp_idx, TempMapping::map_to_stack(t)); + } + + // Map temp to local + CtxOp::MapTempLocal => { + let temp_idx = bits.read_u3(&mut idx) as usize; + let local_idx = bits.read_u3(&mut idx); + ctx.set_temp_mapping(temp_idx, TempMapping::map_to_local(local_idx)); + } + + // Map temp to self + CtxOp::MapTempSelf => { + let temp_idx = bits.read_u3(&mut idx) as usize; + ctx.set_temp_mapping(temp_idx, TempMapping::map_to_self()); + } + + // Inline block pointer + CtxOp::SetInlineBlock => { + ctx.inline_block = bits.read_uint(&mut idx, 64); + } + + CtxOp::EndOfCode => break, + } + } + + ctx + } +} + /// Tuple of (iseq, idx) used to identify basic blocks /// There are a lot of blockid objects so we try to keep the size small. #[derive(Copy, Clone, PartialEq, Eq, Debug)] @@ -659,7 +1226,7 @@ impl BranchTarget { } } - fn get_ctx(&self) -> Context { + fn get_ctx(&self) -> u32 { match self { BranchTarget::Stub(stub) => stub.ctx, BranchTarget::Block(blockref) => unsafe { blockref.as_ref() }.ctx, @@ -686,7 +1253,7 @@ struct BranchStub { address: Option<CodePtr>, iseq: Cell<IseqPtr>, iseq_idx: IseqIdx, - ctx: Context, + ctx: u32, } /// Store info about an outgoing branch in a code segment @@ -808,6 +1375,9 @@ impl PendingBranch { return Some(block.start_addr); } + // Compress/encode the context + let ctx = Context::encode(ctx); + // The branch struct is uninitialized right now but as a stable address. // We make sure the stub runs after the branch is initialized. let branch_struct_addr = self.uninit_branch.as_ptr() as usize; @@ -819,7 +1389,7 @@ impl PendingBranch { address: Some(stub_addr), iseq: Cell::new(target.iseq), iseq_idx: target.idx, - ctx: *ctx, + ctx, }))))); } @@ -912,7 +1482,7 @@ pub struct Block { // Context at the start of the block // This should never be mutated - ctx: Context, + ctx: u32, // Positions where the generated code starts and ends start_addr: CodePtr, @@ -1085,15 +1655,6 @@ pub fn for_each_iseq<F: FnMut(IseqPtr)>(mut callback: F) { unsafe { rb_yjit_for_each_iseq(Some(callback_wrapper), (&mut data) as *mut _ as *mut c_void) }; } -/// Iterate over all ISEQ payloads -pub fn for_each_iseq_payload<F: FnMut(&IseqPayload)>(mut callback: F) { - for_each_iseq(|iseq| { - if let Some(iseq_payload) = get_iseq_payload(iseq) { - callback(iseq_payload); - } - }); -} - /// Iterate over all on-stack ISEQs pub fn for_each_on_stack_iseq<F: FnMut(IseqPtr)>(mut callback: F) { unsafe extern "C" fn callback_wrapper(iseq: IseqPtr, data: *mut c_void) { @@ -1425,13 +1986,17 @@ pub fn take_version_list(blockid: BlockId) -> VersionList { fn get_num_versions(blockid: BlockId, inlined: bool) -> usize { let insn_idx = blockid.idx.as_usize(); match get_iseq_payload(blockid.iseq) { + + // FIXME: this counting logic is going to be expensive. + // We should avoid it if possible + Some(payload) => { payload .version_map .get(insn_idx) .map(|versions| { versions.iter().filter(|&&version| - unsafe { version.as_ref() }.ctx.inline() == inlined + Context::decode(unsafe { version.as_ref() }.ctx).inline() == inlined ).count() }) .unwrap_or(0) @@ -1476,10 +2041,11 @@ fn find_block_version(blockid: BlockId, ctx: &Context) -> Option<BlockRef> { // For each version matching the blockid for blockref in versions.iter() { let block = unsafe { blockref.as_ref() }; + let block_ctx = Context::decode(block.ctx); // Note that we always prefer the first matching // version found because of inline-cache chains - match ctx.diff(&block.ctx) { + match ctx.diff(&block_ctx) { TypeDiff::Compatible(diff) if diff < best_diff => { best_version = Some(*blockref); best_diff = diff; @@ -1561,7 +2127,7 @@ unsafe fn add_block_version(blockref: BlockRef, cb: &CodeBlock) { let block = unsafe { blockref.as_ref() }; // Function entry blocks must have stack size 0 - assert!(!(block.iseq_range.start == 0 && block.ctx.stack_size > 0)); + debug_assert!(!(block.iseq_range.start == 0 && Context::decode(block.ctx).stack_size > 0)); let version_list = get_or_create_version_list(block.get_blockid()); @@ -1620,12 +2186,14 @@ impl JITState { incr_counter_by!(num_gc_obj_refs, gc_obj_offsets.len()); + let ctx = Context::encode(&self.get_starting_ctx()); + // Make the new block let block = MaybeUninit::new(Block { start_addr, iseq: Cell::new(self.get_iseq()), iseq_range: self.get_starting_insn_idx()..end_insn_idx, - ctx: self.get_starting_ctx(), + ctx, end_addr: Cell::new(end_addr), incoming: MutableBranchList(Cell::default()), gc_obj_offsets: gc_obj_offsets.into_boxed_slice(), @@ -2382,6 +2950,7 @@ fn gen_block_series_body( }; // Generate new block using context from the last branch. + let requested_ctx = Context::decode(requested_ctx); let result = gen_single_block(requested_blockid, &requested_ctx, ec, cb, ocb); // If the block failed to compile @@ -2769,7 +3338,8 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - return target.get_address().unwrap().raw_ptr(cb); } - (target.get_blockid(), target.get_ctx()) + let target_ctx = Context::decode(target.get_ctx()); + (target.get_blockid(), target_ctx) }; let (cfp, original_interp_sp) = unsafe { @@ -2906,7 +3476,7 @@ fn branch_stub_hit_body(branch_ptr: *const c_void, target_idx: u32, ec: EcPtr) - /// Generate a "stub", a piece of code that calls the compiler back when run. /// A piece of code that redeems for more code; a thunk for code. fn gen_branch_stub( - ctx: &Context, + ctx: u32, ocb: &mut OutlinedCb, branch_struct_address: usize, target_idx: u32, @@ -2914,8 +3484,8 @@ fn gen_branch_stub( let ocb = ocb.unwrap(); let mut asm = Assembler::new(); - asm.ctx = *ctx; - asm.set_reg_temps(ctx.reg_temps); + asm.ctx = Context::decode(ctx); + asm.set_reg_temps(asm.ctx.reg_temps); asm_comment!(asm, "branch stub hit"); if asm.ctx.is_return_landing() { @@ -3112,7 +3682,7 @@ pub fn gen_direct_jump(jit: &mut JITState, ctx: &Context, target0: BlockId, asm: // compile the target block right after this one (fallthrough). BranchTarget::Stub(Box::new(BranchStub { address: None, - ctx: *ctx, + ctx: Context::encode(ctx), iseq: Cell::new(target0.iseq), iseq_idx: target0.idx, })) @@ -3364,7 +3934,7 @@ pub fn invalidate_block_version(blockref: &BlockRef) { } // Create a stub for this branch target - let stub_addr = gen_branch_stub(&block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32); + let stub_addr = gen_branch_stub(block.ctx, ocb, branchref.as_ptr() as usize, target_idx as u32); // In case we were unable to generate a stub (e.g. OOM). Use the block's // exit instead of a stub for the block. It's important that we @@ -3547,11 +4117,6 @@ mod tests { } #[test] - fn context_size() { - assert_eq!(mem::size_of::<Context>(), 23); - } - - #[test] fn types() { // Valid src => dst assert_eq!(Type::Unknown.diff(Type::Unknown), TypeDiff::Compatible(0)); @@ -3695,7 +4260,7 @@ mod tests { iseq: Cell::new(ptr::null()), iseq_idx: 0, address: None, - ctx: Context::default(), + ctx: 0, })))))] }; // For easier soundness reasoning, make sure the reference returned does not out live the @@ -3728,7 +4293,7 @@ mod tests { iseq: Cell::new(ptr::null()), iseq_idx: 0, address: None, - ctx: Context::default(), + ctx: 0, }))))); // Invalid ISeq; we never dereference it. let secret_iseq = NonNull::<rb_iseq_t>::dangling().as_ptr(); diff --git a/yjit/src/stats.rs b/yjit/src/stats.rs index 1f94c19398..6a7de68576 100644 --- a/yjit/src/stats.rs +++ b/yjit/src/stats.rs @@ -10,8 +10,6 @@ use std::time::Instant; use std::collections::HashMap; use crate::codegen::CodegenGlobals; -use crate::core::Context; -use crate::core::for_each_iseq_payload; use crate::cruby::*; use crate::options::*; use crate::yjit::yjit_enabled_p; @@ -557,6 +555,7 @@ make_counters! { branch_insn_count, branch_known_count, max_inline_versions, + num_contexts_encoded, freed_iseq_count, @@ -641,8 +640,8 @@ pub extern "C" fn rb_yjit_print_stats_p(_ec: EcPtr, _ruby_self: VALUE) -> VALUE /// Primitive called in yjit.rb. /// Export all YJIT statistics as a Ruby hash. #[no_mangle] -pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE, context: VALUE) -> VALUE { - with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict(context == Qtrue)) +pub extern "C" fn rb_yjit_get_stats(_ec: EcPtr, _ruby_self: VALUE) -> VALUE { + with_vm_lock(src_loc!(), || rb_yjit_gen_stats_dict()) } /// Primitive called in yjit.rb @@ -701,7 +700,7 @@ pub extern "C" fn rb_yjit_incr_counter(counter_name: *const std::os::raw::c_char } /// Export all YJIT statistics as a Ruby hash. -fn rb_yjit_gen_stats_dict(context: bool) -> VALUE { +fn rb_yjit_gen_stats_dict() -> VALUE { // If YJIT is not enabled, return Qnil if !yjit_enabled_p() { return Qnil; @@ -744,14 +743,9 @@ fn rb_yjit_gen_stats_dict(context: bool) -> VALUE { // Rust global allocations in bytes hash_aset_usize!(hash, "yjit_alloc_size", GLOBAL_ALLOCATOR.alloc_size.load(Ordering::SeqCst)); - // `context` is true at RubyVM::YJIT._print_stats for --yjit-stats. It's false by default - // for RubyVM::YJIT.runtime_stats because counting all Contexts could be expensive. - if context { - let live_context_count = get_live_context_count(); - let context_size = std::mem::size_of::<Context>(); - hash_aset_usize!(hash, "live_context_count", live_context_count); - hash_aset_usize!(hash, "live_context_size", live_context_count * context_size); - } + // How many bytes we are using to store context data + let context_data = CodegenGlobals::get_context_data(); + hash_aset_usize!(hash, "context_data_bytes", context_data.num_bytes()); // VM instructions count hash_aset_usize!(hash, "vm_insns_count", rb_vm_insns_count as usize); @@ -846,21 +840,6 @@ fn rb_yjit_gen_stats_dict(context: bool) -> VALUE { hash } -fn get_live_context_count() -> usize { - let mut count = 0; - for_each_iseq_payload(|iseq_payload| { - for blocks in iseq_payload.version_map.iter() { - for block in blocks.iter() { - count += unsafe { block.as_ref() }.get_ctx_count(); - } - } - for block in iseq_payload.dead_blocks.iter() { - count += unsafe { block.as_ref() }.get_ctx_count(); - } - }); - count -} - /// Record the backtrace when a YJIT exit occurs. This functionality requires /// that the stats feature is enabled as well as the --yjit-trace-exits option. /// |