diff options
author | Alan Wu <[email protected]> | 2022-06-14 10:23:13 -0400 |
---|---|---|
committer | GitHub <[email protected]> | 2022-06-14 10:23:13 -0400 |
commit | 9f09397bfe6762bf19ef47b2f60988e49b80560d (patch) | |
tree | 2be526b0bc34af44937eab15f31f131c85df6b03 /yjit/src/asm/mod.rs | |
parent | 9b9cc8ad34fdecdede439f14c027c5eefef5541e (diff) |
YJIT: On-demand executable memory allocation; faster boot (#5944)
This commit makes YJIT allocate memory for generated code gradually as
needed. Previously, YJIT allocates all the memory it needs on boot in
one go, leading to higher than necessary resident set size (RSS) and
time spent on boot initializing the memory with a large memset().
Users should no longer need to search for a magic number to pass to
`--yjit-exec-mem` since physical memory consumption should now more
accurately reflect the requirement of the workload.
YJIT now reserves a range of addresses on boot. This region start out
with no access permission at all so buggy attempts to jump to the region
crashes like before this change. To get this hardening at finer
granularity than the page size, we fill each page with trapping
instructions when we first allocate physical memory for the page.
Most of the time applications don't need 256 MiB of executable code, so
allocating on-demand ends up doing less total work than before. Case in
point, a simple `ruby --yjit-call-threshold=1 -eitself` takes about
half as long after this change. In terms of memory consumption, here is
a table to give a rough summary of the impact:
| Peak RSS in MiB | -eitself example | railsbench once |
| :-------------: | ---------------: | --------------: |
| before | 265 | 377 |
| after | 11 | 143 |
| no YJIT | 10 | 101 |
A new module is introduced to handle allocation bookkeeping.
`CodePtr` is moved into the module since it has a close relationship
with the new `VirtualMemory` struct. This new interface has a slightly
smaller surface than before in that marking a region as writable is no
longer a public operation.
Notes
Notes:
Merged-By: maximecb <[email protected]>
Diffstat (limited to 'yjit/src/asm/mod.rs')
-rw-r--r-- | yjit/src/asm/mod.rs | 144 |
1 files changed, 32 insertions, 112 deletions
diff --git a/yjit/src/asm/mod.rs b/yjit/src/asm/mod.rs index 1d31facb78..e16e856925 100644 --- a/yjit/src/asm/mod.rs +++ b/yjit/src/asm/mod.rs @@ -3,49 +3,16 @@ use std::mem; #[cfg(feature = "asm_comments")] use std::collections::BTreeMap; +use crate::virtualmem::{VirtualMem, CodePtr}; + // Lots of manual vertical alignment in there that rustfmt doesn't handle well. #[rustfmt::skip] pub mod x86_64; -/// Pointer to a piece of machine code -/// We may later change this to wrap an u32 -/// Note: there is no NULL constant for CodePtr. You should use Option<CodePtr> instead. -#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Debug)] -#[repr(C)] -pub struct CodePtr(*const u8); - -impl CodePtr { - pub fn raw_ptr(&self) -> *const u8 { - let CodePtr(ptr) = *self; - return ptr; - } - - fn into_i64(&self) -> i64 { - let CodePtr(ptr) = self; - *ptr as i64 - } - - #[allow(unused)] - fn into_usize(&self) -> usize { - let CodePtr(ptr) = self; - *ptr as usize - } -} - -impl From<*mut u8> for CodePtr { - fn from(value: *mut u8) -> Self { - assert!(value as usize != 0); - return CodePtr(value); - } -} - // // TODO: need a field_size_of macro, to compute the size of a struct field in bytes // -// 1 is not aligned so this won't match any pages -const ALIGNED_WRITE_POSITION_NONE: usize = 1; - /// Reference to an ASM label struct LabelRef { // Position in the code block where the label reference exists @@ -57,13 +24,8 @@ struct LabelRef { /// Block of memory into which instructions can be assembled pub struct CodeBlock { - // Block of non-executable memory used for dummy code blocks - // This memory is owned by this block and lives as long as the block - #[allow(unused)] - dummy_block: Vec<u8>, - - // Pointer to memory we are writing into - mem_block: *mut u8, + // Memory for storing the encoded instructions + mem_block: VirtualMem, // Memory block size mem_size: usize, @@ -84,14 +46,6 @@ pub struct CodeBlock { #[cfg(feature = "asm_comments")] asm_comments: BTreeMap<usize, Vec<String>>, - // Keep track of the current aligned write position. - // Used for changing protection when writing to the JIT buffer - current_aligned_write_pos: usize, - - // Memory protection works at page granularity and this is the - // the size of each page. Used to implement W^X. - page_size: usize, - // Set if the CodeBlock is unable to output some instructions, // for example, when there is not enough space or when a jump // target is too far away. @@ -99,47 +53,22 @@ pub struct CodeBlock { } impl CodeBlock { - #[cfg(test)] - pub fn new_dummy(mem_size: usize) -> Self { - // Allocate some non-executable memory - let mut dummy_block = vec![0; mem_size]; - let mem_ptr = dummy_block.as_mut_ptr(); - - Self { - dummy_block: dummy_block, - mem_block: mem_ptr, - mem_size: mem_size, - write_pos: 0, - label_addrs: Vec::new(), - label_names: Vec::new(), - label_refs: Vec::new(), - #[cfg(feature = "asm_comments")] - asm_comments: BTreeMap::new(), - current_aligned_write_pos: ALIGNED_WRITE_POSITION_NONE, - page_size: 4096, - dropped_bytes: false, - } - } - - #[cfg(not(test))] - pub fn new(mem_block: *mut u8, mem_size: usize, page_size: usize) -> Self { + /// Make a new CodeBlock + pub fn new(mem_block: VirtualMem) -> Self { Self { - dummy_block: vec![0; 0], - mem_block: mem_block, - mem_size: mem_size, + mem_size: mem_block.virtual_region_size(), + mem_block, write_pos: 0, label_addrs: Vec::new(), label_names: Vec::new(), label_refs: Vec::new(), #[cfg(feature = "asm_comments")] asm_comments: BTreeMap::new(), - current_aligned_write_pos: ALIGNED_WRITE_POSITION_NONE, - page_size, dropped_bytes: false, } } - // Check if this code block has sufficient remaining capacity + /// Check if this code block has sufficient remaining capacity pub fn has_capacity(&self, num_bytes: usize) -> bool { self.write_pos + num_bytes < self.mem_size } @@ -175,6 +104,10 @@ impl CodeBlock { self.write_pos } + pub fn get_mem(&mut self) -> &mut VirtualMem { + &mut self.mem_block + } + // Set the current write position pub fn set_pos(&mut self, pos: usize) { // Assert here since while CodeBlock functions do bounds checking, there is @@ -204,16 +137,13 @@ impl CodeBlock { // Set the current write position from a pointer pub fn set_write_ptr(&mut self, code_ptr: CodePtr) { - let pos = (code_ptr.raw_ptr() as usize) - (self.mem_block as usize); + let pos = code_ptr.into_usize() - self.mem_block.start_ptr().into_usize(); self.set_pos(pos); } // Get a direct pointer into the executable memory block pub fn get_ptr(&self, offset: usize) -> CodePtr { - unsafe { - let ptr = self.mem_block.add(offset); - CodePtr(ptr) - } + self.mem_block.start_ptr().add_bytes(offset) } // Get a direct pointer to the current write position @@ -223,9 +153,9 @@ impl CodeBlock { // Write a single byte at the current position pub fn write_byte(&mut self, byte: u8) { - if self.write_pos < self.mem_size { - self.mark_position_writable(self.write_pos); - unsafe { self.mem_block.add(self.write_pos).write(byte) }; + let write_ptr = self.get_write_ptr(); + + if self.mem_block.write_byte(write_ptr, byte).is_ok() { self.write_pos += 1; } else { self.dropped_bytes = true; @@ -328,33 +258,23 @@ impl CodeBlock { assert!(self.label_refs.is_empty()); } - pub fn mark_position_writable(&mut self, write_pos: usize) { - let page_size = self.page_size; - let aligned_position = (write_pos / page_size) * page_size; + pub fn mark_all_executable(&mut self) { + self.mem_block.mark_all_executable(); + } +} - if self.current_aligned_write_pos != aligned_position { - self.current_aligned_write_pos = aligned_position; +#[cfg(test)] +impl CodeBlock { + /// Stubbed CodeBlock for testing. Can't execute generated code. + pub fn new_dummy(mem_size: usize) -> Self { + use crate::virtualmem::*; + use crate::virtualmem::tests::TestingAllocator; - #[cfg(not(test))] - unsafe { - use core::ffi::c_void; - let page_ptr = self.get_ptr(aligned_position).raw_ptr() as *mut c_void; - crate::cruby::rb_yjit_mark_writable(page_ptr, page_size.try_into().unwrap()); - } - } - } + let alloc = TestingAllocator::new(mem_size); + let mem_start: *const u8 = alloc.mem_start(); + let virt_mem = VirtualMem::new(alloc, 1, mem_start as *mut u8, mem_size); - pub fn mark_all_executable(&mut self) { - self.current_aligned_write_pos = ALIGNED_WRITE_POSITION_NONE; - - #[cfg(not(test))] - unsafe { - use core::ffi::c_void; - // NOTE(alan): Right now we do allocate one big chunck and give the top half to the outlined codeblock - // The start of the top half of the region isn't necessarily a page boundary... - let cb_start = self.get_ptr(0).raw_ptr() as *mut c_void; - crate::cruby::rb_yjit_mark_executable(cb_start, self.mem_size.try_into().unwrap()); - } + Self::new(virt_mem) } } |