summaryrefslogtreecommitdiff
path: root/yjit/src/codegen.rs
diff options
context:
space:
mode:
authorAlan Wu <[email protected]>2022-06-14 10:23:13 -0400
committerGitHub <[email protected]>2022-06-14 10:23:13 -0400
commit9f09397bfe6762bf19ef47b2f60988e49b80560d (patch)
tree2be526b0bc34af44937eab15f31f131c85df6b03 /yjit/src/codegen.rs
parent9b9cc8ad34fdecdede439f14c027c5eefef5541e (diff)
YJIT: On-demand executable memory allocation; faster boot (#5944)
This commit makes YJIT allocate memory for generated code gradually as needed. Previously, YJIT allocates all the memory it needs on boot in one go, leading to higher than necessary resident set size (RSS) and time spent on boot initializing the memory with a large memset(). Users should no longer need to search for a magic number to pass to `--yjit-exec-mem` since physical memory consumption should now more accurately reflect the requirement of the workload. YJIT now reserves a range of addresses on boot. This region start out with no access permission at all so buggy attempts to jump to the region crashes like before this change. To get this hardening at finer granularity than the page size, we fill each page with trapping instructions when we first allocate physical memory for the page. Most of the time applications don't need 256 MiB of executable code, so allocating on-demand ends up doing less total work than before. Case in point, a simple `ruby --yjit-call-threshold=1 -eitself` takes about half as long after this change. In terms of memory consumption, here is a table to give a rough summary of the impact: | Peak RSS in MiB | -eitself example | railsbench once | | :-------------: | ---------------: | --------------: | | before | 265 | 377 | | after | 11 | 143 | | no YJIT | 10 | 101 | A new module is introduced to handle allocation bookkeeping. `CodePtr` is moved into the module since it has a close relationship with the new `VirtualMemory` struct. This new interface has a slightly smaller surface than before in that marking a region as writable is no longer a public operation.
Notes
Notes: Merged-By: maximecb <[email protected]>
Diffstat (limited to 'yjit/src/codegen.rs')
-rw-r--r--yjit/src/codegen.rs55
1 files changed, 48 insertions, 7 deletions
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index ca2c237e2d..75249658fb 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -21,6 +21,8 @@ use std::os::raw::c_uint;
use std::ptr;
use std::slice;
+pub use crate::virtualmem::CodePtr;
+
// Callee-saved registers
pub const REG_CFP: X86Opnd = R13;
pub const REG_EC: X86Opnd = R12;
@@ -5982,14 +5984,53 @@ impl CodegenGlobals {
#[cfg(not(test))]
let (mut cb, mut ocb) = {
- let page_size = unsafe { rb_yjit_get_page_size() }.as_usize();
- let mem_block: *mut u8 = unsafe { alloc_exec_mem(mem_size.try_into().unwrap()) };
- let cb = CodeBlock::new(mem_block, mem_size / 2, page_size);
- let ocb = OutlinedCb::wrap(CodeBlock::new(
- unsafe { mem_block.add(mem_size / 2) },
- mem_size / 2,
+ // TODO(alan): we can error more gracefully when the user gives
+ // --yjit-exec-mem=absurdly-large-number
+ //
+ // 2 GiB. It's likely a bug if we generate this much code.
+ const MAX_BUFFER_SIZE: usize = 2 * 1024 * 1024 * 1024;
+ assert!(mem_size <= MAX_BUFFER_SIZE);
+ let mem_size_u32 = mem_size as u32;
+ let half_size = mem_size / 2;
+
+ let page_size = unsafe { rb_yjit_get_page_size() };
+ let assert_page_aligned = |ptr| assert_eq!(
+ 0,
+ ptr as usize % page_size.as_usize(),
+ "Start of virtual address block should be page-aligned",
+ );
+
+ let virt_block: *mut u8 = unsafe { rb_yjit_reserve_addr_space(mem_size_u32) };
+ let second_half = virt_block.wrapping_add(half_size);
+
+ // Memory protection syscalls need page-aligned addresses, so check it here. Assuming
+ // `virt_block` is page-aligned, `second_half` should be page-aligned as long as the
+ // page size in bytes is a power of two 2¹⁹ or smaller. This is because the user
+ // requested size is half of mem_option × 2²⁰ as it's in MiB.
+ //
+ // Basically, we don't support x86-64 2MiB and 1GiB pages. ARMv8 can do up to 64KiB
+ // (2¹⁶ bytes) pages, which should be fine. 4KiB pages seem to be the most popular though.
+ assert_page_aligned(virt_block);
+ assert_page_aligned(second_half);
+
+ use crate::virtualmem::*;
+
+ let first_half = VirtualMem::new(
+ SystemAllocator {},
page_size,
- ));
+ virt_block,
+ half_size
+ );
+ let second_half = VirtualMem::new(
+ SystemAllocator {},
+ page_size,
+ second_half,
+ half_size
+ );
+
+ let cb = CodeBlock::new(first_half);
+ let ocb = OutlinedCb::wrap(CodeBlock::new(second_half));
+
(cb, ocb)
};