use std::mem::take; use crate::asm::{CodeBlock, OutlinedCb}; use crate::asm::arm64::*; use crate::cruby::*; use crate::backend::ir::*; use crate::virtualmem::CodePtr; use crate::utils::*; // Use the arm64 register type for this platform pub type Reg = A64Reg; // Callee-saved registers pub const _CFP: Opnd = Opnd::Reg(X19_REG); pub const _EC: Opnd = Opnd::Reg(X20_REG); pub const _SP: Opnd = Opnd::Reg(X21_REG); // C argument registers on this platform pub const _C_ARG_OPNDS: [Opnd; 6] = [ Opnd::Reg(X0_REG), Opnd::Reg(X1_REG), Opnd::Reg(X2_REG), Opnd::Reg(X3_REG), Opnd::Reg(X4_REG), Opnd::Reg(X5_REG) ]; // C return value register on this platform pub const C_RET_REG: Reg = X0_REG; pub const _C_RET_OPND: Opnd = Opnd::Reg(X0_REG); // These constants define the way we work with Arm64's stack pointer. The stack // pointer always needs to be aligned to a 16-byte boundary. pub const C_SP_REG: A64Opnd = X31; pub const C_SP_STEP: i32 = 16; impl CodeBlock { // The maximum number of bytes that can be generated by emit_jmp_ptr. pub fn jmp_ptr_bytes(&self) -> usize { // b instruction's offset is encoded as imm26 times 4. It can jump to // +/-128MiB, so this can be used when --yjit-exec-mem-size <= 128. let num_insns = if b_offset_fits_bits(self.virtual_region_size() as i64 / 4) { 1 // b instruction } else { 5 // 4 instructions to load a 64-bit absolute address + br instruction }; num_insns * 4 } // The maximum number of instructions that can be generated by emit_conditional_jump. fn conditional_jump_insns(&self) -> i32 { // The worst case is instructions for a jump + bcond. self.jmp_ptr_bytes() as i32 / 4 + 1 } } /// Map Opnd to A64Opnd impl From for A64Opnd { fn from(opnd: Opnd) -> Self { match opnd { Opnd::UImm(value) => A64Opnd::new_uimm(value), Opnd::Imm(value) => A64Opnd::new_imm(value), Opnd::Reg(reg) => A64Opnd::Reg(reg), Opnd::Mem(Mem { base: MemBase::Reg(reg_no), num_bits, disp }) => { A64Opnd::new_mem(num_bits, A64Opnd::Reg(A64Reg { num_bits, reg_no }), disp) }, Opnd::Mem(Mem { base: MemBase::InsnOut(_), .. }) => { panic!("attempted to lower an Opnd::Mem with a MemBase::InsnOut base") }, Opnd::CArg(_) => panic!("attempted to lower an Opnd::CArg"), Opnd::InsnOut { .. } => panic!("attempted to lower an Opnd::InsnOut"), Opnd::Value(_) => panic!("attempted to lower an Opnd::Value"), Opnd::Stack { .. } => panic!("attempted to lower an Opnd::Stack"), Opnd::None => panic!( "Attempted to lower an Opnd::None. This often happens when an out operand was not allocated for an instruction because the output of the instruction was not used. Please ensure you are using the output." ), } } } /// Also implement going from a reference to an operand for convenience. impl From<&Opnd> for A64Opnd { fn from(opnd: &Opnd) -> Self { A64Opnd::from(*opnd) } } /// Call emit_jmp_ptr and immediately invalidate the written range. /// This is needed when next_page also moves other_cb that is not invalidated /// by compile_with_regs. Doing it here allows you to avoid invalidating a lot /// more than necessary when other_cb jumps from a position early in the page. /// This invalidates a small range of cb twice, but we accept the small cost. fn emit_jmp_ptr_with_invalidation(cb: &mut CodeBlock, dst_ptr: CodePtr) { #[cfg(not(test))] let start = cb.get_write_ptr(); emit_jmp_ptr(cb, dst_ptr, true); #[cfg(not(test))] { let end = cb.get_write_ptr(); unsafe { rb_yjit_icache_invalidate(start.raw_ptr(cb) as _, end.raw_ptr(cb) as _) }; } } fn emit_jmp_ptr(cb: &mut CodeBlock, dst_ptr: CodePtr, padding: bool) { let src_addr = cb.get_write_ptr().as_offset(); let dst_addr = dst_ptr.as_offset(); // If the offset is short enough, then we'll use the // branch instruction. Otherwise, we'll move the // destination into a register and use the branch // register instruction. let num_insns = if b_offset_fits_bits((dst_addr - src_addr) / 4) { b(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); 1 } else { let num_insns = emit_load_value(cb, Assembler::SCRATCH0, dst_addr as u64); br(cb, Assembler::SCRATCH0); num_insns + 1 }; if padding { // Make sure it's always a consistent number of // instructions in case it gets patched and has to // use the other branch. assert!(num_insns * 4 <= cb.jmp_ptr_bytes()); for _ in num_insns..(cb.jmp_ptr_bytes() / 4) { nop(cb); } } } /// Emit the required instructions to load the given value into the /// given register. Our goal here is to use as few instructions as /// possible to get this value into the register. fn emit_load_value(cb: &mut CodeBlock, rd: A64Opnd, value: u64) -> usize { let mut current = value; if current <= 0xffff { // If the value fits into a single movz // instruction, then we'll use that. movz(cb, rd, A64Opnd::new_uimm(current), 0); return 1; } else if BitmaskImmediate::try_from(current).is_ok() { // Otherwise, if the immediate can be encoded // with the special bitmask immediate encoding, // we'll use that. mov(cb, rd, A64Opnd::new_uimm(current)); return 1; } else { // Finally we'll fall back to encoding the value // using movz for the first 16 bits and movk for // each subsequent set of 16 bits as long we // they are necessary. movz(cb, rd, A64Opnd::new_uimm(current & 0xffff), 0); let mut num_insns = 1; // (We're sure this is necessary since we // checked if it only fit into movz above). current >>= 16; movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 16); num_insns += 1; if current > 0xffff { current >>= 16; movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 32); num_insns += 1; } if current > 0xffff { current >>= 16; movk(cb, rd, A64Opnd::new_uimm(current & 0xffff), 48); num_insns += 1; } return num_insns; } } /// List of registers that can be used for stack temps. /// These are caller-saved registers. pub static TEMP_REGS: [Reg; 5] = [X1_REG, X9_REG, X10_REG, X14_REG, X15_REG]; #[derive(Debug, PartialEq)] enum EmitError { RetryOnNextPage, OutOfMemory, } impl Assembler { // Special scratch registers for intermediate processing. // This register is caller-saved (so we don't have to save it before using it) pub const SCRATCH_REG: Reg = X16_REG; const SCRATCH0: A64Opnd = A64Opnd::Reg(Assembler::SCRATCH_REG); const SCRATCH1: A64Opnd = A64Opnd::Reg(X17_REG); /// Get the list of registers from which we will allocate on this platform /// These are caller-saved registers /// Note: we intentionally exclude C_RET_REG (X0) from this list /// because of the way it's used in gen_leave() and gen_leave_exit() pub fn get_alloc_regs() -> Vec { vec![X11_REG, X12_REG, X13_REG] } /// Get a list of all of the caller-saved registers pub fn get_caller_save_regs() -> Vec { vec![X1_REG, X9_REG, X10_REG, X11_REG, X12_REG, X13_REG, X14_REG, X15_REG] } /// Split platform-specific instructions /// The transformations done here are meant to make our lives simpler in later /// stages of the compilation pipeline. /// Here we may want to make sure that all instructions (except load and store) /// have no memory operands. fn arm64_split(mut self) -> Assembler { /// When we're attempting to load a memory address into a register, the /// displacement must fit into the maximum number of bits for an Op::Add /// immediate. If it doesn't, we have to load the displacement into a /// register first. fn split_lea_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { Opnd::Mem(Mem { base, disp, num_bits }) => { if disp >= 0 && ShiftedImmediate::try_from(disp as u64).is_ok() { asm.lea(opnd) } else { let disp = asm.load(Opnd::Imm(disp.into())); let reg = match base { MemBase::Reg(reg_no) => Opnd::Reg(Reg { reg_no, num_bits }), MemBase::InsnOut(idx) => Opnd::InsnOut { idx, num_bits } }; asm.add(reg, disp) } }, _ => unreachable!("Op::Lea only accepts Opnd::Mem operands.") } } /// When you're storing a register into a memory location or loading a /// memory location into a register, the displacement from the base /// register of the memory location must fit into 9 bits. If it doesn't, /// then we need to load that memory address into a register first. fn split_memory_address(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { Opnd::Mem(mem) => { if mem_disp_fits_bits(mem.disp) { opnd } else { let base = split_lea_operand(asm, opnd); Opnd::mem(64, base, 0) } }, _ => unreachable!("Can only split memory addresses.") } } /// Any memory operands you're sending into an Op::Load instruction need /// to be split in case their displacement doesn't fit into 9 bits. fn split_load_operand(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd, Opnd::Mem(_) => { let split_opnd = split_memory_address(asm, opnd); let out_opnd = asm.load(split_opnd); // Many Arm insns support only 32-bit or 64-bit operands. asm.load with fewer // bits zero-extends the value, so it's safe to recognize it as a 32-bit value. if out_opnd.rm_num_bits() < 32 { out_opnd.with_num_bits(32).unwrap() } else { out_opnd } }, _ => asm.load(opnd) } } /// Operands that take the place of bitmask immediates must follow a /// certain encoding. In this function we ensure that those operands /// do follow that encoding, and if they don't then we load them first. fn split_bitmask_immediate(asm: &mut Assembler, opnd: Opnd, dest_num_bits: u8) -> Opnd { match opnd { Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } | Opnd::Stack { .. } => opnd, Opnd::Mem(_) => split_load_operand(asm, opnd), Opnd::Imm(imm) => { if imm == 0 { Opnd::Reg(XZR_REG) } else if (dest_num_bits == 64 && BitmaskImmediate::try_from(imm as u64).is_ok()) || (dest_num_bits == 32 && u32::try_from(imm).is_ok() && BitmaskImmediate::new_32b_reg(imm as u32).is_ok()) { Opnd::UImm(imm as u64) } else { asm.load(opnd).with_num_bits(dest_num_bits).unwrap() } }, Opnd::UImm(uimm) => { if (dest_num_bits == 64 && BitmaskImmediate::try_from(uimm).is_ok()) || (dest_num_bits == 32 && u32::try_from(uimm).is_ok() && BitmaskImmediate::new_32b_reg(uimm as u32).is_ok()) { opnd } else { asm.load(opnd).with_num_bits(dest_num_bits).unwrap() } }, Opnd::None | Opnd::Value(_) => unreachable!() } } /// Operands that take the place of a shifted immediate must fit within /// a certain size. If they don't then we need to load them first. fn split_shifted_immediate(asm: &mut Assembler, opnd: Opnd) -> Opnd { match opnd { Opnd::Reg(_) | Opnd::CArg(_) | Opnd::InsnOut { .. } => opnd, Opnd::Mem(_) => split_load_operand(asm, opnd), Opnd::Imm(imm) => if ShiftedImmediate::try_from(imm as u64).is_ok() { opnd } else { asm.load(opnd) } Opnd::UImm(uimm) => { if ShiftedImmediate::try_from(uimm).is_ok() { opnd } else { asm.load(opnd) } }, Opnd::None | Opnd::Value(_) | Opnd::Stack { .. } => unreachable!() } } /// Returns the operands that should be used for a boolean logic /// instruction. fn split_boolean_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) { match (opnd0, opnd1) { (Opnd::Reg(_), Opnd::Reg(_)) => { (opnd0, opnd1) }, (reg_opnd @ Opnd::Reg(_), other_opnd) | (other_opnd, reg_opnd @ Opnd::Reg(_)) => { let opnd1 = split_bitmask_immediate(asm, other_opnd, reg_opnd.rm_num_bits()); (reg_opnd, opnd1) }, _ => { let opnd0 = split_load_operand(asm, opnd0); let opnd1 = split_bitmask_immediate(asm, opnd1, opnd0.rm_num_bits()); (opnd0, opnd1) } } } /// Returns the operands that should be used for a csel instruction. fn split_csel_operands(asm: &mut Assembler, opnd0: Opnd, opnd1: Opnd) -> (Opnd, Opnd) { let opnd0 = match opnd0 { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd0, _ => split_load_operand(asm, opnd0) }; let opnd1 = match opnd1 { Opnd::Reg(_) | Opnd::InsnOut { .. } => opnd1, _ => split_load_operand(asm, opnd1) }; (opnd0, opnd1) } fn split_less_than_32_cmp(asm: &mut Assembler, opnd0: Opnd) -> Opnd { match opnd0 { Opnd::Reg(_) | Opnd::InsnOut { .. } => { match opnd0.rm_num_bits() { 8 => asm.and(opnd0.with_num_bits(64).unwrap(), Opnd::UImm(0xff)), 16 => asm.and(opnd0.with_num_bits(64).unwrap(), Opnd::UImm(0xffff)), 32 | 64 => opnd0, bits => unreachable!("Invalid number of bits. {}", bits) } } _ => opnd0 } } let live_ranges: Vec = take(&mut self.live_ranges); let mut asm_local = Assembler::new_with_label_names(take(&mut self.label_names), take(&mut self.side_exits), self.num_locals); let asm = &mut asm_local; let mut iterator = self.into_draining_iter(); while let Some((index, mut insn)) = iterator.next_mapped() { // Here we're going to map the operands of the instruction to load // any Opnd::Value operands into registers if they are heap objects // such that only the Op::Load instruction needs to handle that // case. If the values aren't heap objects then we'll treat them as // if they were just unsigned integer. let is_load = matches!(insn, Insn::Load { .. } | Insn::LoadInto { .. }); let mut opnd_iter = insn.opnd_iter_mut(); while let Some(opnd) = opnd_iter.next() { match opnd { Opnd::Value(value) => { if value.special_const_p() { *opnd = Opnd::UImm(value.as_u64()); } else if !is_load { *opnd = asm.load(*opnd); } }, Opnd::Stack { .. } => { *opnd = asm.lower_stack_opnd(opnd); } _ => {} }; } // We are replacing instructions here so we know they are already // being used. It is okay not to use their output here. #[allow(unused_must_use)] match &mut insn { Insn::Add { left, right, .. } => { match (*left, *right) { (Opnd::Reg(_) | Opnd::InsnOut { .. }, Opnd::Reg(_) | Opnd::InsnOut { .. }) => { asm.add(*left, *right); }, (reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. }), other_opnd) | (other_opnd, reg_opnd @ (Opnd::Reg(_) | Opnd::InsnOut { .. })) => { let opnd1 = split_shifted_immediate(asm, other_opnd); asm.add(reg_opnd, opnd1); }, _ => { let opnd0 = split_load_operand(asm, *left); let opnd1 = split_shifted_immediate(asm, *right); asm.add(opnd0, opnd1); } } }, Insn::And { left, right, out } | Insn::Or { left, right, out } | Insn::Xor { left, right, out } => { let (opnd0, opnd1) = split_boolean_operands(asm, *left, *right); *left = opnd0; *right = opnd1; // Since these instructions are lowered to an instruction that have 2 input // registers and an output register, look to merge with an `Insn::Mov` that // follows which puts the output in another register. For example: // `Add a, b => out` followed by `Mov c, out` becomes `Add a, b => c`. if let (Opnd::Reg(_), Opnd::Reg(_), Some(Insn::Mov { dest, src })) = (left, right, iterator.peek()) { if live_ranges[index] == index + 1 { // Check after potentially lowering a stack operand to a register operand let lowered_dest = if let Opnd::Stack { .. } = dest { asm.lower_stack_opnd(dest) } else { *dest }; if out == src && matches!(lowered_dest, Opnd::Reg(_)) { *out = lowered_dest; iterator.map_insn_index(asm); iterator.next_unmapped(); // Pop merged Insn::Mov } } } asm.push_insn(insn); } // Lower to Joz and Jonz for generating CBZ/CBNZ for compare-with-0-and-branch. ref insn @ Insn::Cmp { ref left, right: ref right @ (Opnd::UImm(0) | Opnd::Imm(0)) } | ref insn @ Insn::Test { ref left, right: ref right @ (Opnd::InsnOut { .. } | Opnd::Reg(_)) } if { let same_opnd_if_test = if let Insn::Test { .. } = insn { left == right } else { true }; same_opnd_if_test && if let Some( Insn::Jz(target) | Insn::Je(target) | Insn::Jnz(target) | Insn::Jne(target) ) = iterator.peek() { matches!(target, Target::SideExit { .. }) } else { false } } => { let reg = split_load_operand(asm, *left); match iterator.peek() { Some(Insn::Jz(target) | Insn::Je(target)) => asm.push_insn(Insn::Joz(reg, *target)), Some(Insn::Jnz(target) | Insn::Jne(target)) => asm.push_insn(Insn::Jonz(reg, *target)), _ => () } iterator.map_insn_index(asm); iterator.next_unmapped(); // Pop merged jump instruction } Insn::CCall { opnds, fptr, .. } => { assert!(opnds.len() <= C_ARG_OPNDS.len()); // Load each operand into the corresponding argument // register. // Note: the iteration order is reversed to avoid corrupting x0, // which is both the return value and first argument register for (idx, opnd) in opnds.into_iter().enumerate().rev() { // If the value that we're sending is 0, then we can use // the zero register, so in this case we'll just send // a UImm of 0 along as the argument to the move. let value = match opnd { Opnd::UImm(0) | Opnd::Imm(0) => Opnd::UImm(0), Opnd::Mem(_) => split_memory_address(asm, *opnd), _ => *opnd }; asm.load_into(Opnd::c_arg(C_ARG_OPNDS[idx]), value); } // Now we push the CCall without any arguments so that it // just performs the call. asm.ccall(*fptr, vec![]); }, Insn::Cmp { left, right } => { let opnd0 = split_load_operand(asm, *left); let opnd0 = split_less_than_32_cmp(asm, opnd0); let split_right = split_shifted_immediate(asm, *right); let opnd1 = match split_right { Opnd::InsnOut { .. } if opnd0.num_bits() != split_right.num_bits() => { split_right.with_num_bits(opnd0.num_bits().unwrap()).unwrap() }, _ => split_right }; asm.cmp(opnd0, opnd1); }, Insn::CRet(opnd) => { match opnd { // If the value is already in the return register, then // we don't need to do anything. Opnd::Reg(C_RET_REG) => {}, // If the value is a memory address, we need to first // make sure the displacement isn't too large and then // load it into the return register. Opnd::Mem(_) => { let split = split_memory_address(asm, *opnd); asm.load_into(C_RET_OPND, split); }, // Otherwise we just need to load the value into the // return register. _ => { asm.load_into(C_RET_OPND, *opnd); } } asm.cret(C_RET_OPND); }, Insn::CSelZ { truthy, falsy, out } | Insn::CSelNZ { truthy, falsy, out } | Insn::CSelE { truthy, falsy, out } | Insn::CSelNE { truthy, falsy, out } | Insn::CSelL { truthy, falsy, out } | Insn::CSelLE { truthy, falsy, out } | Insn::CSelG { truthy, falsy, out } | Insn::CSelGE { truthy, falsy, out } => { let (opnd0, opnd1) = split_csel_operands(asm, *truthy, *falsy); *truthy = opnd0; *falsy = opnd1; // Merge `csel` and `mov` into a single `csel` when possible match iterator.peek() { Some(Insn::Mov { dest: Opnd::Reg(reg), src }) if matches!(out, Opnd::InsnOut { .. }) && *out == *src && live_ranges[index] == index + 1 => { *out = Opnd::Reg(*reg); asm.push_insn(insn); iterator.map_insn_index(asm); iterator.next_unmapped(); // Pop merged Insn::Mov } _ => { asm.push_insn(insn); } } }, Insn::IncrCounter { mem, value } => { let counter_addr = match mem { Opnd::Mem(_) => split_lea_operand(asm, *mem), _ => *mem }; asm.incr_counter(counter_addr, *value); }, Insn::JmpOpnd(opnd) => { if let Opnd::Mem(_) = opnd { let opnd0 = split_load_operand(asm, *opnd); asm.jmp_opnd(opnd0); } else { asm.jmp_opnd(*opnd); } }, Insn::Load { opnd, .. } | Insn::LoadInto { opnd, .. } => { *opnd = match opnd { Opnd::Mem(_) => split_memory_address(asm, *opnd), _ => *opnd }; asm.push_insn(insn); }, Insn::LoadSExt { opnd, .. } => { match opnd { // We only want to sign extend if the operand is a // register, instruction output, or memory address that // is 32 bits. Otherwise we'll just load the value // directly since there's no need to sign extend. Opnd::Reg(Reg { num_bits: 32, .. }) | Opnd::InsnOut { num_bits: 32, .. } | Opnd::Mem(Mem { num_bits: 32, .. }) => { asm.load_sext(*opnd); }, _ => { asm.load(*opnd); } }; }, Insn::Mov { dest, src } => { match (&dest, &src) { // If we're attempting to load into a memory operand, then // we'll switch over to the store instruction. (Opnd::Mem(_), _) => { let opnd0 = split_memory_address(asm, *dest); let value = match *src { // If the first operand is zero, then we can just use // the zero register. Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG), // If the first operand is a memory operand, we're going // to transform this into a store instruction, so we'll // need to load this anyway. Opnd::UImm(_) => asm.load(*src), // The value that is being moved must be either a // register or an immediate that can be encoded as a // bitmask immediate. Otherwise, we'll need to split the // move into multiple instructions. _ => split_bitmask_immediate(asm, *src, dest.rm_num_bits()) }; asm.store(opnd0, value); }, // If we're loading a memory operand into a register, then // we'll switch over to the load instruction. (Opnd::Reg(_), Opnd::Mem(_)) => { let value = split_memory_address(asm, *src); asm.load_into(*dest, value); }, // Otherwise we'll use the normal mov instruction. (Opnd::Reg(_), _) => { let value = match *src { // Unlike other instructions, we can avoid splitting this case, using movz. Opnd::UImm(uimm) if uimm <= 0xffff => *src, _ => split_bitmask_immediate(asm, *src, dest.rm_num_bits()), }; asm.mov(*dest, value); }, _ => unreachable!() }; }, Insn::Not { opnd, .. } => { // The value that is being negated must be in a register, so // if we get anything else we need to load it first. let opnd0 = match opnd { Opnd::Mem(_) => split_load_operand(asm, *opnd), _ => *opnd }; asm.not(opnd0); }, Insn::LShift { opnd, .. } | Insn::RShift { opnd, .. } | Insn::URShift { opnd, .. } => { // The operand must be in a register, so // if we get anything else we need to load it first. let opnd0 = match opnd { Opnd::Mem(_) => split_load_operand(asm, *opnd), _ => *opnd }; *opnd = opnd0; asm.push_insn(insn); }, Insn::Store { dest, src } => { // The value being stored must be in a register, so if it's // not already one we'll load it first. let opnd1 = match src { // If the first operand is zero, then we can just use // the zero register. Opnd::UImm(0) | Opnd::Imm(0) => Opnd::Reg(XZR_REG), // Otherwise we'll check if we need to load it first. _ => split_load_operand(asm, *src) }; match dest { Opnd::Reg(_) => { // Store does not support a register as a dest operand. asm.mov(*dest, opnd1); } _ => { // The displacement for the STUR instruction can't be more // than 9 bits long. If it's longer, we need to load the // memory address into a register first. let opnd0 = split_memory_address(asm, *dest); asm.store(opnd0, opnd1); } } }, Insn::Sub { left, right, .. } => { let opnd0 = split_load_operand(asm, *left); let opnd1 = split_shifted_immediate(asm, *right); asm.sub(opnd0, opnd1); }, Insn::Mul { left, right, .. } => { let opnd0 = split_load_operand(asm, *left); let opnd1 = split_load_operand(asm, *right); asm.mul(opnd0, opnd1); }, Insn::Test { left, right } => { // The value being tested must be in a register, so if it's // not already one we'll load it first. let opnd0 = split_load_operand(asm, *left); // The second value must be either a register or an // unsigned immediate that can be encoded as a bitmask // immediate. If it's not one of those, we'll need to load // it first. let opnd1 = split_bitmask_immediate(asm, *right, opnd0.rm_num_bits()); asm.test(opnd0, opnd1); }, _ => { // If we have an output operand, then we need to replace it // with a new output operand from the new assembler. if insn.out_opnd().is_some() { let out_num_bits = Opnd::match_num_bits_iter(insn.opnd_iter()); let out = insn.out_opnd_mut().unwrap(); *out = asm.next_opnd_out(out_num_bits); } asm.push_insn(insn); } }; iterator.map_insn_index(asm); } asm_local } /// Emit platform-specific machine code /// Returns a list of GC offsets. Can return failure to signal caller to retry. fn arm64_emit(&mut self, cb: &mut CodeBlock, ocb: &mut Option<&mut OutlinedCb>) -> Result, EmitError> { /// Determine how many instructions it will take to represent moving /// this value into a register. Note that the return value of this /// function must correspond to how many instructions are used to /// represent this load in the emit_load_value function. fn emit_load_size(value: u64) -> u8 { if BitmaskImmediate::try_from(value).is_ok() { return 1; } if value < (1 << 16) { 1 } else if value < (1 << 32) { 2 } else if value < (1 << 48) { 3 } else { 4 } } /// Emit a conditional jump instruction to a specific target. This is /// called when lowering any of the conditional jump instructions. fn emit_conditional_jump(cb: &mut CodeBlock, target: Target) { match target { Target::CodePtr(dst_ptr) | Target::SideExitPtr(dst_ptr) => { let dst_addr = dst_ptr.as_offset(); let src_addr = cb.get_write_ptr().as_offset(); let num_insns = if bcond_offset_fits_bits((dst_addr - src_addr) / 4) { // If the jump offset fits into the conditional jump as // an immediate value and it's properly aligned, then we // can use the b.cond instruction directly. We're safe // to use as i32 here since we already checked that it // fits. let bytes = (dst_addr - src_addr) as i32; bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes)); // Here we're going to return 1 because we've only // written out 1 instruction. 1 } else if b_offset_fits_bits((dst_addr - (src_addr + 4)) / 4) { // + 4 for bcond // If the jump offset fits into the unconditional jump as // an immediate value, we can use inverse b.cond + b. // // We're going to write out the inverse condition so // that if it doesn't match it will skip over the // instruction used for branching. bcond(cb, Condition::inverse(CONDITION), 2.into()); b(cb, InstructionOffset::from_bytes((dst_addr - (src_addr + 4)) as i32)); // + 4 for bcond // We've only written out 2 instructions. 2 } else { // Otherwise, we need to load the address into a // register and use the branch register instruction. let dst_addr = (dst_ptr.raw_ptr(cb) as usize).as_u64(); let load_insns: i32 = emit_load_size(dst_addr).into(); // We're going to write out the inverse condition so // that if it doesn't match it will skip over the // instructions used for branching. bcond(cb, Condition::inverse(CONDITION), (load_insns + 2).into()); emit_load_value(cb, Assembler::SCRATCH0, dst_addr); br(cb, Assembler::SCRATCH0); // Here we'll return the number of instructions that it // took to write out the destination address + 1 for the // b.cond and 1 for the br. load_insns + 2 }; if let Target::CodePtr(_) = target { // We need to make sure we have at least 6 instructions for // every kind of jump for invalidation purposes, so we're // going to write out padding nop instructions here. assert!(num_insns <= cb.conditional_jump_insns()); for _ in num_insns..cb.conditional_jump_insns() { nop(cb); } } }, Target::Label(label_idx) => { // Here we're going to save enough space for ourselves and // then come back and write the instruction once we know the // offset. We're going to assume we can fit into a single // b.cond instruction. It will panic otherwise. cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap(); bcond(cb, CONDITION, InstructionOffset::from_bytes(bytes)); }); }, Target::SideExit { .. } => { unreachable!("Target::SideExit should have been compiled by compile_side_exit") }, }; } /// Emit a CBZ or CBNZ which branches when a register is zero or non-zero fn emit_cmp_zero_jump(cb: &mut CodeBlock, reg: A64Opnd, branch_if_zero: bool, target: Target) { if let Target::SideExitPtr(dst_ptr) = target { let dst_addr = dst_ptr.as_offset(); let src_addr = cb.get_write_ptr().as_offset(); if cmp_branch_offset_fits_bits((dst_addr - src_addr) / 4) { // If the offset fits in one instruction, generate cbz or cbnz let bytes = (dst_addr - src_addr) as i32; if branch_if_zero { cbz(cb, reg, InstructionOffset::from_bytes(bytes)); } else { cbnz(cb, reg, InstructionOffset::from_bytes(bytes)); } } else { // Otherwise, we load the address into a register and // use the branch register instruction. Note that because // side exits should always be close, this form should be // rare or impossible to see. let dst_addr = dst_ptr.raw_addr(cb) as u64; let load_insns: i32 = emit_load_size(dst_addr).into(); // Write out the inverse condition so that if // it doesn't match it will skip over the // instructions used for branching. if branch_if_zero { cbnz(cb, reg, InstructionOffset::from_insns(load_insns + 2)); } else { cbz(cb, reg, InstructionOffset::from_insns(load_insns + 2)); } emit_load_value(cb, Assembler::SCRATCH0, dst_addr); br(cb, Assembler::SCRATCH0); } } else { unreachable!("We should only generate Joz/Jonz with side-exit targets"); } } /// Emit a push instruction for the given operand by adding to the stack /// pointer and then storing the given value. fn emit_push(cb: &mut CodeBlock, opnd: A64Opnd) { str_pre(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, -C_SP_STEP)); } /// Emit a pop instruction into the given operand by loading the value /// and then subtracting from the stack pointer. fn emit_pop(cb: &mut CodeBlock, opnd: A64Opnd) { ldr_post(cb, opnd, A64Opnd::new_mem(64, C_SP_REG, C_SP_STEP)); } /// Compile a side exit if Target::SideExit is given. fn compile_side_exit( target: Target, asm: &mut Assembler, ocb: &mut Option<&mut OutlinedCb>, ) -> Result { if let Target::SideExit { counter, context } = target { let side_exit = asm.get_side_exit(&context.unwrap(), Some(counter), ocb.as_mut().unwrap()) .ok_or(EmitError::OutOfMemory)?; Ok(Target::SideExitPtr(side_exit)) } else { Ok(target) } } // dbg!(&self.insns); // List of GC offsets let mut gc_offsets: Vec = Vec::new(); // Buffered list of PosMarker callbacks to fire if codegen is successful let mut pos_markers: Vec<(usize, CodePtr)> = vec![]; // For each instruction let start_write_pos = cb.get_write_pos(); let mut insn_idx: usize = 0; while let Some(insn) = self.insns.get(insn_idx) { let src_ptr = cb.get_write_ptr(); let had_dropped_bytes = cb.has_dropped_bytes(); let old_label_state = cb.get_label_state(); let mut insn_gc_offsets: Vec = Vec::new(); match insn { Insn::Comment(text) => { cb.add_comment(text); }, Insn::Label(target) => { cb.write_label(target.unwrap_label_idx()); }, // Report back the current position in the generated code Insn::PosMarker(..) => { pos_markers.push((insn_idx, cb.get_write_ptr())) } Insn::BakeString(text) => { for byte in text.as_bytes() { cb.write_byte(*byte); } // Add a null-terminator byte for safety (in case we pass // this to C code) cb.write_byte(0); // Pad out the string to the next 4-byte boundary so that // it's easy to jump past. for _ in 0..(4 - ((text.len() + 1) % 4)) { cb.write_byte(0); } }, Insn::FrameSetup => { stp_pre(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, -16)); // X29 (frame_pointer) = SP mov(cb, X29, C_SP_REG); }, Insn::FrameTeardown => { // SP = X29 (frame pointer) mov(cb, C_SP_REG, X29); ldp_post(cb, X29, X30, A64Opnd::new_mem(128, C_SP_REG, 16)); }, Insn::Add { left, right, out } => { adds(cb, out.into(), left.into(), right.into()); }, Insn::Sub { left, right, out } => { subs(cb, out.into(), left.into(), right.into()); }, Insn::Mul { left, right, out } => { // If the next instruction is jo (jump on overflow) match (self.insns.get(insn_idx + 1), self.insns.get(insn_idx + 2)) { (Some(Insn::JoMul(_)), _) | (Some(Insn::PosMarker(_)), Some(Insn::JoMul(_))) => { // Compute the high 64 bits smulh(cb, Self::SCRATCH0, left.into(), right.into()); // Compute the low 64 bits // This may clobber one of the input registers, // so we do it after smulh mul(cb, out.into(), left.into(), right.into()); // Produce a register that is all zeros or all ones // Based on the sign bit of the 64-bit mul result asr(cb, Self::SCRATCH1, out.into(), A64Opnd::UImm(63)); // If the high 64-bits are not all zeros or all ones, // matching the sign bit, then we have an overflow cmp(cb, Self::SCRATCH0, Self::SCRATCH1); // Insn::JoMul will emit_conditional_jump::<{Condition::NE}> } _ => { mul(cb, out.into(), left.into(), right.into()); } } }, Insn::And { left, right, out } => { and(cb, out.into(), left.into(), right.into()); }, Insn::Or { left, right, out } => { orr(cb, out.into(), left.into(), right.into()); }, Insn::Xor { left, right, out } => { eor(cb, out.into(), left.into(), right.into()); }, Insn::Not { opnd, out } => { mvn(cb, out.into(), opnd.into()); }, Insn::RShift { opnd, shift, out } => { asr(cb, out.into(), opnd.into(), shift.into()); }, Insn::URShift { opnd, shift, out } => { lsr(cb, out.into(), opnd.into(), shift.into()); }, Insn::LShift { opnd, shift, out } => { lsl(cb, out.into(), opnd.into(), shift.into()); }, Insn::Store { dest, src } => { // This order may be surprising but it is correct. The way // the Arm64 assembler works, the register that is going to // be stored is first and the address is second. However in // our IR we have the address first and the register second. match dest.rm_num_bits() { 64 | 32 => stur(cb, src.into(), dest.into()), 16 => sturh(cb, src.into(), dest.into()), num_bits => panic!("unexpected dest num_bits: {} (src: {:#?}, dest: {:#?})", num_bits, src, dest), } }, Insn::Load { opnd, out } | Insn::LoadInto { opnd, dest: out } => { match *opnd { Opnd::Reg(_) | Opnd::InsnOut { .. } => { mov(cb, out.into(), opnd.into()); }, Opnd::UImm(uimm) => { emit_load_value(cb, out.into(), uimm); }, Opnd::Imm(imm) => { emit_load_value(cb, out.into(), imm as u64); }, Opnd::Mem(_) => { match opnd.rm_num_bits() { 64 | 32 => ldur(cb, out.into(), opnd.into()), 16 => ldurh(cb, out.into(), opnd.into()), 8 => ldurb(cb, out.into(), opnd.into()), num_bits => panic!("unexpected num_bits: {}", num_bits) }; }, Opnd::Value(value) => { // We dont need to check if it's a special const // here because we only allow these operands to hit // this point if they're not a special const. assert!(!value.special_const_p()); // This assumes only load instructions can contain // references to GC'd Value operands. If the value // being loaded is a heap object, we'll report that // back out to the gc_offsets list. ldr_literal(cb, out.into(), 2.into()); b(cb, InstructionOffset::from_bytes(4 + (SIZEOF_VALUE as i32))); cb.write_bytes(&value.as_u64().to_le_bytes()); let ptr_offset: u32 = (cb.get_write_pos() as u32) - (SIZEOF_VALUE as u32); insn_gc_offsets.push(ptr_offset); }, Opnd::CArg { .. } => { unreachable!("C argument operand was not lowered before arm64_emit"); } Opnd::Stack { .. } => { unreachable!("Stack operand was not lowered before arm64_emit"); } Opnd::None => { unreachable!("Attempted to load from None operand"); } }; }, Insn::LoadSExt { opnd, out } => { match *opnd { Opnd::Reg(Reg { num_bits: 32, .. }) | Opnd::InsnOut { num_bits: 32, .. } => { sxtw(cb, out.into(), opnd.into()); }, Opnd::Mem(Mem { num_bits: 32, .. }) => { ldursw(cb, out.into(), opnd.into()); }, _ => unreachable!() }; }, Insn::Mov { dest, src } => { // This supports the following two kinds of immediates: // * The value fits into a single movz instruction // * It can be encoded with the special bitmask immediate encoding // arm64_split() should have split other immediates that require multiple instructions. match src { Opnd::UImm(uimm) if *uimm <= 0xffff => { movz(cb, dest.into(), A64Opnd::new_uimm(*uimm), 0); }, _ => { mov(cb, dest.into(), src.into()); } } }, Insn::Lea { opnd, out } => { let opnd: A64Opnd = opnd.into(); match opnd { A64Opnd::Mem(mem) => { add( cb, out.into(), A64Opnd::Reg(A64Reg { reg_no: mem.base_reg_no, num_bits: 64 }), A64Opnd::new_imm(mem.disp.into()) ); }, _ => { panic!("Op::Lea only accepts Opnd::Mem operands."); } }; }, Insn::LeaJumpTarget { out, target, .. } => { if let Target::Label(label_idx) = target { // Set output to the raw address of the label cb.label_ref(*label_idx, 4, |cb, end_addr, dst_addr| { adr(cb, Self::SCRATCH0, A64Opnd::new_imm(dst_addr - (end_addr - 4))); }); mov(cb, out.into(), Self::SCRATCH0); } else { // Set output to the jump target's raw address let target_code = target.unwrap_code_ptr(); let target_addr = target_code.raw_addr(cb).as_u64(); emit_load_value(cb, out.into(), target_addr); } }, Insn::CPush(opnd) => { emit_push(cb, opnd.into()); }, Insn::CPop { out } => { emit_pop(cb, out.into()); }, Insn::CPopInto(opnd) => { emit_pop(cb, opnd.into()); }, Insn::CPushAll => { let regs = Assembler::get_caller_save_regs(); for reg in regs { emit_push(cb, A64Opnd::Reg(reg)); } // Push the flags/state register mrs(cb, Self::SCRATCH0, SystemRegister::NZCV); emit_push(cb, Self::SCRATCH0); }, Insn::CPopAll => { let regs = Assembler::get_caller_save_regs(); // Pop the state/flags register msr(cb, SystemRegister::NZCV, Self::SCRATCH0); emit_pop(cb, Self::SCRATCH0); for reg in regs.into_iter().rev() { emit_pop(cb, A64Opnd::Reg(reg)); } }, Insn::CCall { fptr, .. } => { // The offset to the call target in bytes let src_addr = cb.get_write_ptr().raw_ptr(cb) as i64; let dst_addr = *fptr as i64; // Use BL if the offset is short enough to encode as an immediate. // Otherwise, use BLR with a register. if b_offset_fits_bits((dst_addr - src_addr) / 4) { bl(cb, InstructionOffset::from_bytes((dst_addr - src_addr) as i32)); } else { emit_load_value(cb, Self::SCRATCH0, dst_addr as u64); blr(cb, Self::SCRATCH0); } }, Insn::CRet { .. } => { ret(cb, A64Opnd::None); }, Insn::Cmp { left, right } => { cmp(cb, left.into(), right.into()); }, Insn::Test { left, right } => { tst(cb, left.into(), right.into()); }, Insn::JmpOpnd(opnd) => { br(cb, opnd.into()); }, Insn::Jmp(target) => { match compile_side_exit(*target, self, ocb)? { Target::CodePtr(dst_ptr) => { emit_jmp_ptr(cb, dst_ptr, true); }, Target::SideExitPtr(dst_ptr) => { emit_jmp_ptr(cb, dst_ptr, false); }, Target::Label(label_idx) => { // Here we're going to save enough space for // ourselves and then come back and write the // instruction once we know the offset. We're going // to assume we can fit into a single b instruction. // It will panic otherwise. cb.label_ref(label_idx, 4, |cb, src_addr, dst_addr| { let bytes: i32 = (dst_addr - (src_addr - 4)).try_into().unwrap(); b(cb, InstructionOffset::from_bytes(bytes)); }); }, Target::SideExit { .. } => { unreachable!("Target::SideExit should have been compiled by compile_side_exit") }, }; }, Insn::Je(target) | Insn::Jz(target) => { emit_conditional_jump::<{Condition::EQ}>(cb, compile_side_exit(*target, self, ocb)?); }, Insn::Jne(target) | Insn::Jnz(target) | Insn::JoMul(target) => { emit_conditional_jump::<{Condition::NE}>(cb, compile_side_exit(*target, self, ocb)?); }, Insn::Jl(target) => { emit_conditional_jump::<{Condition::LT}>(cb, compile_side_exit(*target, self, ocb)?); }, Insn::Jg(target) => { emit_conditional_jump::<{Condition::GT}>(cb, compile_side_exit(*target, self, ocb)?); }, Insn::Jge(target) => { emit_conditional_jump::<{Condition::GE}>(cb, compile_side_exit(*target, self, ocb)?); }, Insn::Jbe(target) => { emit_conditional_jump::<{Condition::LS}>(cb, compile_side_exit(*target, self, ocb)?); }, Insn::Jb(target) => { emit_conditional_jump::<{Condition::CC}>(cb, compile_side_exit(*target, self, ocb)?); }, Insn::Jo(target) => { emit_conditional_jump::<{Condition::VS}>(cb, compile_side_exit(*target, self, ocb)?); }, Insn::Joz(opnd, target) => { emit_cmp_zero_jump(cb, opnd.into(), true, compile_side_exit(*target, self, ocb)?); }, Insn::Jonz(opnd, target) => { emit_cmp_zero_jump(cb, opnd.into(), false, compile_side_exit(*target, self, ocb)?); }, Insn::IncrCounter { mem, value } => { let label = cb.new_label("incr_counter_loop".to_string()); cb.write_label(label); ldaxr(cb, Self::SCRATCH0, mem.into()); add(cb, Self::SCRATCH0, Self::SCRATCH0, value.into()); // The status register that gets used to track whether or // not the store was successful must be 32 bytes. Since we // store the SCRATCH registers as their 64-bit versions, we // need to rewrap it here. let status = A64Opnd::Reg(Self::SCRATCH1.unwrap_reg().with_num_bits(32)); stlxr(cb, status, Self::SCRATCH0, mem.into()); cmp(cb, Self::SCRATCH1, A64Opnd::new_uimm(0)); emit_conditional_jump::<{Condition::NE}>(cb, Target::Label(label)); }, Insn::Breakpoint => { brk(cb, A64Opnd::None); }, Insn::CSelZ { truthy, falsy, out } | Insn::CSelE { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::EQ); }, Insn::CSelNZ { truthy, falsy, out } | Insn::CSelNE { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::NE); }, Insn::CSelL { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LT); }, Insn::CSelLE { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::LE); }, Insn::CSelG { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GT); }, Insn::CSelGE { truthy, falsy, out } => { csel(cb, out.into(), truthy.into(), falsy.into(), Condition::GE); } Insn::LiveReg { .. } => (), // just a reg alloc signal, no code Insn::PadInvalPatch => { while (cb.get_write_pos().saturating_sub(std::cmp::max(start_write_pos, cb.page_start_pos()))) < cb.jmp_ptr_bytes() && !cb.has_dropped_bytes() { nop(cb); } } }; // On failure, jump to the next page and retry the current insn if !had_dropped_bytes && cb.has_dropped_bytes() && cb.next_page(src_ptr, emit_jmp_ptr_with_invalidation) { // Reset cb states before retrying the current Insn cb.set_label_state(old_label_state); // We don't want label references to cross page boundaries. Signal caller for // retry. if !self.label_names.is_empty() { return Err(EmitError::RetryOnNextPage); } } else { insn_idx += 1; gc_offsets.append(&mut insn_gc_offsets); } } // Error if we couldn't write out everything if cb.has_dropped_bytes() { return Err(EmitError::OutOfMemory) } else { // No bytes dropped, so the pos markers point to valid code for (insn_idx, pos) in pos_markers { if let Insn::PosMarker(callback) = self.insns.get(insn_idx).unwrap() { callback(pos, &cb); } else { panic!("non-PosMarker in pos_markers insn_idx={insn_idx} {self:?}"); } } return Ok(gc_offsets) } } /// Optimize and compile the stored instructions pub fn compile_with_regs(self, cb: &mut CodeBlock, ocb: Option<&mut OutlinedCb>, regs: Vec) -> Option<(CodePtr, Vec)> { let asm = self.arm64_split(); let mut asm = asm.alloc_regs(regs); // Create label instances in the code block for (idx, name) in asm.label_names.iter().enumerate() { let label_idx = cb.new_label(name.to_string()); assert!(label_idx == idx); } let start_ptr = cb.get_write_ptr(); let starting_label_state = cb.get_label_state(); let mut ocb = ocb; // for &mut let emit_result = match asm.arm64_emit(cb, &mut ocb) { Err(EmitError::RetryOnNextPage) => { // we want to lower jumps to labels to b.cond instructions, which have a 1 MiB // range limit. We can easily exceed the limit in case the jump straddles two pages. // In this case, we retry with a fresh page. cb.set_label_state(starting_label_state); cb.next_page(start_ptr, emit_jmp_ptr_with_invalidation); let result = asm.arm64_emit(cb, &mut ocb); assert_ne!( Err(EmitError::RetryOnNextPage), result, "should not fail when writing to a fresh code page" ); result } result => result }; if let (Ok(gc_offsets), false) = (emit_result, cb.has_dropped_bytes()) { cb.link_labels(); // Invalidate icache for newly written out region so we don't run stale code. // It should invalidate only the code ranges of the current cb because the code // ranges of the other cb might have a memory region that is still PROT_NONE. #[cfg(not(test))] cb.without_page_end_reserve(|cb| { for (start, end) in cb.writable_addrs(start_ptr, cb.get_write_ptr()) { unsafe { rb_yjit_icache_invalidate(start as _, end as _) }; } }); Some((start_ptr, gc_offsets)) } else { cb.clear_labels(); None } } } #[cfg(test)] mod tests { use super::*; use crate::disasm::*; fn setup_asm() -> (Assembler, CodeBlock) { (Assembler::new(0), CodeBlock::new_dummy(1024)) } #[test] fn test_emit_add() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.add(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); asm.compile_with_regs(&mut cb, None, vec![X3_REG]); // Assert that only 2 instructions were written. assert_eq!(8, cb.get_write_pos()); } #[test] fn test_emit_bake_string() { let (mut asm, mut cb) = setup_asm(); asm.bake_string("Hello, world!"); asm.compile_with_num_regs(&mut cb, 0); // Testing that we pad the string to the nearest 4-byte boundary to make // it easier to jump over. assert_eq!(16, cb.get_write_pos()); } #[test] fn test_emit_cpush_all() { let (mut asm, mut cb) = setup_asm(); asm.cpush_all(); asm.compile_with_num_regs(&mut cb, 0); } #[test] fn test_emit_cpop_all() { let (mut asm, mut cb) = setup_asm(); asm.cpop_all(); asm.compile_with_num_regs(&mut cb, 0); } #[test] fn test_emit_frame() { let (mut asm, mut cb) = setup_asm(); asm.frame_setup(); asm.frame_teardown(); asm.compile_with_num_regs(&mut cb, 0); } #[test] fn test_emit_je_fits_into_bcond() { let (mut asm, mut cb) = setup_asm(); let target: CodePtr = cb.get_write_ptr().add_bytes(80); asm.je(Target::CodePtr(target)); asm.compile_with_num_regs(&mut cb, 0); } #[test] fn test_emit_je_does_not_fit_into_bcond() { let (mut asm, mut cb) = setup_asm(); let offset = 1 << 21; let target: CodePtr = cb.get_write_ptr().add_bytes(offset); asm.je(Target::CodePtr(target)); asm.compile_with_num_regs(&mut cb, 0); } #[test] fn test_emit_lea_label() { let (mut asm, mut cb) = setup_asm(); let label = asm.new_label("label"); let opnd = asm.lea_jump_target(label); asm.write_label(label); asm.bake_string("Hello, world!"); asm.store(Opnd::mem(64, SP, 0), opnd); asm.compile_with_num_regs(&mut cb, 1); } #[test] fn test_emit_load_mem_disp_fits_into_load() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.load(Opnd::mem(64, SP, 0)); asm.store(Opnd::mem(64, SP, 0), opnd); asm.compile_with_num_regs(&mut cb, 1); // Assert that two instructions were written: LDUR and STUR. assert_eq!(8, cb.get_write_pos()); } #[test] fn test_emit_load_mem_disp_fits_into_add() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.load(Opnd::mem(64, SP, 1 << 10)); asm.store(Opnd::mem(64, SP, 0), opnd); asm.compile_with_num_regs(&mut cb, 1); // Assert that three instructions were written: ADD, LDUR, and STUR. assert_eq!(12, cb.get_write_pos()); } #[test] fn test_emit_load_mem_disp_does_not_fit_into_add() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.load(Opnd::mem(64, SP, 1 << 12 | 1)); asm.store(Opnd::mem(64, SP, 0), opnd); asm.compile_with_num_regs(&mut cb, 1); // Assert that three instructions were written: MOVZ, ADD, LDUR, and STUR. assert_eq!(16, cb.get_write_pos()); } #[test] fn test_emit_load_value_immediate() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.load(Opnd::Value(Qnil)); asm.store(Opnd::mem(64, SP, 0), opnd); asm.compile_with_num_regs(&mut cb, 1); // Assert that only two instructions were written since the value is an // immediate. assert_eq!(8, cb.get_write_pos()); } #[test] fn test_emit_load_value_non_immediate() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.load(Opnd::Value(VALUE(0xCAFECAFECAFE0000))); asm.store(Opnd::mem(64, SP, 0), opnd); asm.compile_with_num_regs(&mut cb, 1); // Assert that five instructions were written since the value is not an // immediate and needs to be loaded into a register. assert_eq!(20, cb.get_write_pos()); } #[test] fn test_emit_test_32b_reg_not_bitmask_imm() { let (mut asm, mut cb) = setup_asm(); let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap(); asm.test(w0, Opnd::UImm(u32::MAX.into())); // All ones is not encodable with a bitmask immediate, // so this needs one register asm.compile_with_num_regs(&mut cb, 1); } #[test] fn test_emit_test_32b_reg_bitmask_imm() { let (mut asm, mut cb) = setup_asm(); let w0 = Opnd::Reg(X0_REG).with_num_bits(32).unwrap(); asm.test(w0, Opnd::UImm(0x80000001)); asm.compile_with_num_regs(&mut cb, 0); } #[test] fn test_emit_or() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.or(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); asm.compile_with_num_regs(&mut cb, 1); } #[test] fn test_emit_lshift() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.lshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); asm.compile_with_num_regs(&mut cb, 1); } #[test] fn test_emit_rshift() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.rshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); asm.compile_with_num_regs(&mut cb, 1); } #[test] fn test_emit_urshift() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.urshift(Opnd::Reg(X0_REG), Opnd::UImm(5)); asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); asm.compile_with_num_regs(&mut cb, 1); } #[test] fn test_emit_test() { let (mut asm, mut cb) = setup_asm(); asm.test(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); asm.compile_with_num_regs(&mut cb, 0); // Assert that only one instruction was written. assert_eq!(4, cb.get_write_pos()); } #[test] fn test_emit_test_with_encodable_unsigned_immediate() { let (mut asm, mut cb) = setup_asm(); asm.test(Opnd::Reg(X0_REG), Opnd::UImm(7)); asm.compile_with_num_regs(&mut cb, 0); // Assert that only one instruction was written. assert_eq!(4, cb.get_write_pos()); } #[test] fn test_emit_test_with_unencodable_unsigned_immediate() { let (mut asm, mut cb) = setup_asm(); asm.test(Opnd::Reg(X0_REG), Opnd::UImm(5)); asm.compile_with_num_regs(&mut cb, 1); // Assert that a load and a test instruction were written. assert_eq!(8, cb.get_write_pos()); } #[test] fn test_emit_test_with_encodable_signed_immediate() { let (mut asm, mut cb) = setup_asm(); asm.test(Opnd::Reg(X0_REG), Opnd::Imm(7)); asm.compile_with_num_regs(&mut cb, 0); // Assert that only one instruction was written. assert_eq!(4, cb.get_write_pos()); } #[test] fn test_emit_test_with_unencodable_signed_immediate() { let (mut asm, mut cb) = setup_asm(); asm.test(Opnd::Reg(X0_REG), Opnd::Imm(5)); asm.compile_with_num_regs(&mut cb, 1); // Assert that a load and a test instruction were written. assert_eq!(8, cb.get_write_pos()); } #[test] fn test_emit_test_with_negative_signed_immediate() { let (mut asm, mut cb) = setup_asm(); asm.test(Opnd::Reg(X0_REG), Opnd::Imm(-7)); asm.compile_with_num_regs(&mut cb, 1); // Assert that a test instruction is written. assert_eq!(4, cb.get_write_pos()); } #[test] fn test_32_bit_register_with_some_number() { let (mut asm, mut cb) = setup_asm(); let shape_opnd = Opnd::mem(32, Opnd::Reg(X0_REG), 6); asm.cmp(shape_opnd, Opnd::UImm(4097)); asm.compile_with_num_regs(&mut cb, 2); } #[test] fn test_16_bit_register_store_some_number() { let (mut asm, mut cb) = setup_asm(); let shape_opnd = Opnd::mem(16, Opnd::Reg(X0_REG), 0); asm.store(shape_opnd, Opnd::UImm(4097)); asm.compile_with_num_regs(&mut cb, 2); } #[test] fn test_32_bit_register_store_some_number() { let (mut asm, mut cb) = setup_asm(); let shape_opnd = Opnd::mem(32, Opnd::Reg(X0_REG), 6); asm.store(shape_opnd, Opnd::UImm(4097)); asm.compile_with_num_regs(&mut cb, 2); } #[test] fn test_bcond_straddling_code_pages() { const LANDING_PAGE: usize = 65; let mut asm = Assembler::new(0); let mut cb = CodeBlock::new_dummy_with_freed_pages(vec![0, LANDING_PAGE]); // Skip to near the end of the page. Room for two instructions. cb.set_pos(cb.page_start_pos() + cb.page_end() - 8); let end = asm.new_label("end"); // Start with a conditional jump... asm.jz(end); // A few instructions, enough to cause a page switch. let sum = asm.add(399.into(), 111.into()); let xorred = asm.xor(sum, 859.into()); asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), xorred); asm.store(Opnd::mem(64, Opnd::Reg(X0_REG), 0), xorred); // The branch target. It should be in the landing page. asm.write_label(end); asm.cret(xorred); // [Bug #19385] // This used to panic with "The offset must be 19 bits or less." // due to attempting to lower the `asm.jz` above to a `b.e` with an offset that's > 1 MiB. let starting_pos = cb.get_write_pos(); asm.compile_with_num_regs(&mut cb, 2); let gap = cb.get_write_pos() - starting_pos; assert!(gap > 0b1111111111111111111); let instruction_at_starting_pos: [u8; 4] = unsafe { std::slice::from_raw_parts(cb.get_ptr(starting_pos).raw_ptr(&cb), 4) }.try_into().unwrap(); assert_eq!( 0b000101 << 26_u32, u32::from_le_bytes(instruction_at_starting_pos) & (0b111111 << 26_u32), "starting instruction should be an unconditional branch to the new page (B)" ); } #[test] fn test_emit_xor() { let (mut asm, mut cb) = setup_asm(); let opnd = asm.xor(Opnd::Reg(X0_REG), Opnd::Reg(X1_REG)); asm.store(Opnd::mem(64, Opnd::Reg(X2_REG), 0), opnd); asm.compile_with_num_regs(&mut cb, 1); assert_disasm!(cb, "0b0001ca4b0000f8", " 0x0: eor x11, x0, x1 0x4: stur x11, [x2] "); } #[test] #[cfg(feature = "disasm")] fn test_simple_disasm() -> std::result::Result<(), capstone::Error> { // Test drive Capstone with simple input use capstone::prelude::*; let cs = Capstone::new() .arm64() .mode(arch::arm64::ArchMode::Arm) .build()?; let insns = cs.disasm_all(&[0x60, 0x0f, 0x80, 0xF2], 0x1000)?; match insns.as_ref() { [insn] => { assert_eq!(Some("movk"), insn.mnemonic()); Ok(()) } _ => Err(capstone::Error::CustomError( "expected to disassemble to movk", )), } } #[test] fn test_replace_mov_with_ldur() { let (mut asm, mut cb) = setup_asm(); asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::mem(64, CFP, 8)); asm.compile_with_num_regs(&mut cb, 1); assert_disasm!(cb, "618240f8", {" 0x0: ldur x1, [x19, #8] "}); } #[test] fn test_not_split_mov() { let (mut asm, mut cb) = setup_asm(); asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::UImm(0xffff)); asm.mov(Opnd::Reg(TEMP_REGS[0]), Opnd::UImm(0x10000)); asm.compile_with_num_regs(&mut cb, 1); assert_disasm!(cb, "e1ff9fd2e10370b2", {" 0x0: mov x1, #0xffff 0x4: orr x1, xzr, #0x10000 "}); } #[test] fn test_merge_csel_mov() { let (mut asm, mut cb) = setup_asm(); let out = asm.csel_l(Qtrue.into(), Qfalse.into()); asm.mov(Opnd::Reg(TEMP_REGS[0]), out); asm.compile_with_num_regs(&mut cb, 2); assert_disasm!(cb, "8b0280d20c0080d261b18c9a", {" 0x0: mov x11, #0x14 0x4: mov x12, #0 0x8: csel x1, x11, x12, lt "}); } #[test] fn test_add_with_immediate() { let (mut asm, mut cb) = setup_asm(); let out = asm.add(Opnd::Reg(TEMP_REGS[1]), 1.into()); let out = asm.add(out, 1_usize.into()); asm.mov(Opnd::Reg(TEMP_REGS[0]), out); asm.compile_with_num_regs(&mut cb, 2); assert_disasm!(cb, "2b0500b16b0500b1e1030baa", {" 0x0: adds x11, x9, #1 0x4: adds x11, x11, #1 0x8: mov x1, x11 "}); } #[test] fn test_mul_with_immediate() { let (mut asm, mut cb) = setup_asm(); let out = asm.mul(Opnd::Reg(TEMP_REGS[1]), 3.into()); asm.mov(Opnd::Reg(TEMP_REGS[0]), out); asm.compile_with_num_regs(&mut cb, 2); assert_disasm!(cb, "6b0080d22b7d0b9be1030baa", {" 0x0: mov x11, #3 0x4: mul x11, x9, x11 0x8: mov x1, x11 "}); } }