diff options
author | Maxime Chevalier-Boisvert <[email protected]> | 2022-05-11 15:17:44 -0400 |
---|---|---|
committer | Takashi Kokubun <[email protected]> | 2022-08-29 08:37:48 -0700 |
commit | 2ffaa377c212279e4a8bf1da8ac65a00dcdadd53 (patch) | |
tree | d7ed3c49c63d678b12f46046cf9f6ac8335c7325 /yjit | |
parent | 4ee1a687768338a1928014fc6042c320a1a1af3e (diff) |
WIP backend IR sketch
Diffstat (limited to 'yjit')
-rw-r--r-- | yjit/src/asm/x86_64/mod.rs | 24 | ||||
-rw-r--r-- | yjit/src/ir.rs | 479 | ||||
-rw-r--r-- | yjit/src/lib.rs | 1 |
3 files changed, 492 insertions, 12 deletions
diff --git a/yjit/src/asm/x86_64/mod.rs b/yjit/src/asm/x86_64/mod.rs index 6eb7efaa0a..b4ef2e4bf9 100644 --- a/yjit/src/asm/x86_64/mod.rs +++ b/yjit/src/asm/x86_64/mod.rs @@ -9,20 +9,20 @@ mod tests; pub struct X86Imm { // Size in bits - num_bits: u8, + pub num_bits: u8, // The value of the immediate - value: i64 + pub value: i64 } #[derive(Clone, Copy, Debug)] pub struct X86UImm { // Size in bits - num_bits: u8, + pub num_bits: u8, // The value of the immediate - value: u64 + pub value: u64 } #[derive(Clone, Copy, Debug, Eq, PartialEq)] @@ -38,32 +38,32 @@ pub enum RegType pub struct X86Reg { // Size in bits - num_bits: u8, + pub num_bits: u8, // Register type - reg_type: RegType, + pub reg_type: RegType, // Register index number - reg_no: u8, + pub reg_no: u8, } #[derive(Clone, Copy, Debug)] pub struct X86Mem { // Size in bits - num_bits: u8, + pub num_bits: u8, /// Base register number - base_reg_no: u8, + pub base_reg_no: u8, /// Index register number - idx_reg_no: Option<u8>, + pub idx_reg_no: Option<u8>, /// SIB scale exponent value (power of two, two bits) - scale_exp: u8, + pub scale_exp: u8, /// Constant displacement from the base, not scaled - disp: i32, + pub disp: i32, } #[derive(Clone, Copy, Debug)] diff --git a/yjit/src/ir.rs b/yjit/src/ir.rs new file mode 100644 index 0000000000..a20a982493 --- /dev/null +++ b/yjit/src/ir.rs @@ -0,0 +1,479 @@ +#![allow(dead_code)] +#![allow(unused_variables)] +#![allow(unused_imports)] + +use std::convert::From; +use crate::cruby::{VALUE}; +use crate::virtualmem::{CodePtr}; +use crate::asm::x86_64::{X86Opnd, X86Imm, X86UImm, X86Reg, X86Mem, RegType}; +use crate::core::{Context, Type, TempMapping}; + + + + +/* +// Minimally, we might want to specify how many operands and branch targets an insn has +// Branch targets are not interchangeable with other operand types. We distinguish +// between branch and regular instructions. +// +// TODO: should mark instructions that produce no output operand +// +make_ops! { + (Comment, 1, 0), + ... + + // Call is variadic, might need to be special-cased +} +*/ + + + + + + + + + +/// Instruction opcodes +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub enum Op +{ + // Add a comment into the IR at the point that this instruction is added. It + // won't have any impact on that actual compiled code, but it will impact + // the output of ir_print_insns. Accepts as its only operand an EIR_IMM + // operand (typically generated by ir_str_ptr). + Comment, + + // Add a label into the IR at the point that this instruction is added. It + // will eventually be translated into an offset when generating code such + // that EIR_LABEL_IDX operands know where to jump to. Accepts as its only + // operand an EIR_LABEL_NAME operand (typically generated by ir_label_opnd). + Label, + + // Add two operands together, and return the result as a new operand. This + // operand can then be used as the operand on another instruction. It + // accepts two operands, which can be of any type + // + // Under the hood when allocating registers, the IR will determine the most + // efficient way to get these values into memory. For example, if both + // operands are immediates, then it will load the first one into a register + // first with a mov instruction and then add them together. If one of them + // is a register, however, it will just perform a single add instruction. + Add, + + // This is the same as the OP_ADD instruction, except for subtraction. + Sub, + + // This is the same as the OP_ADD instruction, except that it performs the + // binary AND operation. + And, + + // Perform the NOT operation on an individual operand, and return the result + // as a new operand. This operand can then be used as the operand on another + // instruction. + Not, + + // + // Low-level instructions + // + + // A low-level mov instruction. It accepts two operands. + Mov, + + // Bitwise AND test instruction + Test, + + // Jump if not zero + Jnz, + + /* + // The following are conditional jump instructions. They all accept as their + // first operand an EIR_LABEL_NAME, which is used as the target of the jump. + // + // The OP_JUMP_EQ instruction accepts two additional operands, to be + // compared for equality. If they're equal, then the generated code jumps to + // the target label. If they're not, then it continues on to the next + // instruction. + JumpEq, + + // The OP_JUMP_NE instruction is very similar to the OP_JUMP_EQ instruction, + // except it compares for inequality instead. + JumpNe, + + // Checks the overflow flag and conditionally jumps to the target if it is + // currently set. + JumpOvf, + + // A low-level call instruction for calling a function by a pointer. It + // accepts one operand of type EIR_IMM that should be a pointer to the + // function. Usually this is done by first casting the function to a void*, + // as in: ir_const_ptr((void *)&my_function)). + Call, + + // Calls a function by a pointer and returns an operand that contains the + // result of the function. Accepts as its operands a pointer to a function + // of type EIR_IMM (usually generated from ir_const_ptr) and a variable + // number of arguments to the function being called. + // + // This is the higher-level instruction that should be used when you want to + // call a function with arguments, as opposed to OP_CALL which is + // lower-level and just calls a function without moving arguments into + // registers for you. + CCall, + + // Returns from the function being generated immediately. This is different + // from OP_RETVAL in that it does nothing with the return value register + // (whatever is in there is what will get returned). Accepts no operands. + Ret, + + // First, moves a value into the return value register. Then, returns from + // the generated function. Accepts as its only operand the value that should + // be returned from the generated function. + RetVal, + + // A low-level cmp instruction. It accepts two operands. The first it + // expects to be a register. The second can be anything. Most of the time + // this instruction shouldn't be used by the developer since other + // instructions break down to this one. + Cmp, + + // A conditional move instruction that should be preceeded at some point by + // an OP_CMP instruction that would have set the requisite comparison flags. + // Accepts 2 operands, both of which are expected to be of the EIR_REG type. + // + // If the comparison indicates the left compared value is greater than or + // equal to the right compared value, then the conditional move is executed, + // otherwise we just continue on to the next instruction. + // + // This is considered a low-level instruction, and the OP_SELECT_* variants + // should be preferred if possible. + CMovGE, + + // The same as OP_CMOV_GE, except the comparison is greater than. + CMovGT, + + // The same as OP_CMOV_GE, except the comparison is less than or equal. + CMovLE, + + // The same as OP_CMOV_GE, except the comparison is less than. + CMovLT, + + // Selects between two different values based on a comparison of two other + // values. Accepts 4 operands. The first two are the basis of the + // comparison. The second two are the "then" case and the "else" case. You + // can effectively think of this instruction as a ternary operation, where + // the first two values are being compared. + // + // OP_SELECT_GE performs the described ternary using a greater than or equal + // comparison, that is if the first operand is greater than or equal to the + // second operand. + SelectGE, + + // The same as OP_SELECT_GE, except the comparison is greater than. + SelectGT, + + // The same as OP_SELECT_GE, except the comparison is less than or equal. + SelectLE, + + // The same as OP_SELECT_GE, except the comparison is less than. + SelectLT, + + // For later: + // These encode Ruby true/false semantics + // Can be used to enable op fusion of Ruby compare + branch. + // OP_JUMP_TRUE, // (opnd, target) + // OP_JUMP_FALSE, // (opnd, target) + + // For later: + // OP_GUARD_HEAP, // (opnd, target) + // OP_GUARD_IMM, // (opnd, target) + // OP_GUARD_FIXNUM, // (opnd, target) + + // For later: + // OP_COUNTER_INC, (counter_name) + + // For later: + // OP_LEA, + // OP_TEST, + */ +} + + + + + + + + + + +// Register value used by IR operands +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct Reg +{ + // Register number/index + reg_no: u8, + + // Size in bits + num_bits: u8, + + // Special register flag EC/CFP/SP/SELF + special: bool, +} + +// Memory location +#[derive(Copy, Clone, PartialEq, Eq, Debug)] +pub struct Mem +{ + // Base register + base: Reg, + + // Offset relative to the base pointer + disp: i32, + + // Size in bits + num_bits: u8, +} + +/// Operand to an IR instruction +#[derive(Clone, PartialEq, Eq, Debug)] +pub enum Opnd +{ + None, // For insns with no output + + Stack(u16), // Value on the temp stack (idx) + Local(u16), // Local variable (idx, do we need depth too?) + Value(VALUE), // Immediate Ruby value, may be GC'd, movable + InsnOut(usize), // Output of a preceding instruction in this block + String(String), // String constant, used for comments + + // Low-level operands, for lowering + Imm(i64), // Raw signed immediate + UImm(u64), // Raw unsigned immediate + Mem(Mem), // Memory location (num_bits, base_ptr, const_offset) + Reg(Reg), // Machine register (num_bits, idx) +} + +// Special register constants +pub const EC : Opnd = Opnd::Reg(Reg { reg_no: 0, num_bits: 64, special: true }); +pub const CFP : Opnd = Opnd::Reg(Reg { reg_no: 1, num_bits: 64, special: true }); +pub const SP : Opnd = Opnd::Reg(Reg { reg_no: 2, num_bits: 64, special: true }); +pub const SELF : Opnd = Opnd::Reg(Reg { reg_no: 3, num_bits: 64, special: true }); + +impl Opnd +{ + // Convenience constructor for memory operands + pub fn mem(num_bits: u8, base: Opnd, disp: i32) -> Self { + match base { + Opnd::Reg(base_reg) => { + assert!(base_reg.num_bits == 64 && !base_reg.special); + Opnd::Mem(Mem { + num_bits: num_bits, + base: base_reg, + disp: disp, + }) + }, + _ => unreachable!() + } + } +} + +/// Method to convert from an X86Opnd to an IR Opnd +impl From<X86Opnd> for Opnd { + fn from(opnd: X86Opnd) -> Self { + match opnd { + X86Opnd::None => Opnd::None, + X86Opnd::UImm(X86UImm{ value, .. }) => Opnd::UImm(value), + X86Opnd::Imm(X86Imm{ value, .. }) => Opnd::Imm(value), + + // General-purpose register + X86Opnd::Reg(X86Reg{ num_bits, reg_no, reg_type: RegType::GP }) => { + Opnd::Reg(Reg { + reg_no, + num_bits, + special: false, + }) + } + + // Memory operand with displacement + X86Opnd::Mem(X86Mem{ num_bits, base_reg_no, disp, idx_reg_no: None, scale_exp: 0 }) => { + let base_reg = Reg { num_bits: 64, reg_no: base_reg_no, special: false }; + + Opnd::Mem(Mem { + base: base_reg, + disp, + num_bits + }) + } + + _ => panic!("unsupported x86 operand type") + } + } +} + + + + + + +/// Branch target (something that we can jump to) +/// for branch instructions +#[derive(Clone, PartialEq, Eq, Debug)] +enum BranchTarget +{ + CodePtr(CodePtr), // Pointer to a piece of code (e.g. side-exit) + LabelName(String), // A label without an index in the output + LabelIdx(u32), // A label that has been indexed +} + +/// YJIT IR instruction +pub struct Insn +{ + // Opcode for the instruction + op: Op, + + // List of input operands/values + opnds: Vec<Opnd>, + + // List of branch targets (branch instructions only) + targets: Vec<BranchTarget>, + + // Position in the generated machine code + // Useful for comments and for patching jumps + pos: Option<CodePtr>, +} + +/// Object into which we assemble instructions to be +/// optimized and lowered +struct Assembler +{ + insns: Vec<Insn> +} + +impl Assembler +{ + fn new() -> Assembler { + Assembler { + insns: Vec::default() + } + } + + fn push_insn(&mut self, op: Op, opnds: Vec<Opnd>, targets: Vec<BranchTarget>) -> Opnd + { + let insn_idx = self.insns.len(); + + let insn = Insn { + op: op, + opnds: opnds, + targets: targets, + pos: None + }; + self.insns.push(insn); + + // Return an operand for the output of this instruction + Opnd::InsnOut(insn_idx) + } + + // TODO: + //fn label(&self, name: &str) -> BranchTarget + //{ + //} + + // Optimize and compile the stored instructions + fn compile() + { + // Peephole optimizations + // Register allocation + // Generic lowering pass + // Platform-specific lowering + } +} + +impl Assembler +{ + fn add(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd + { + self.push_insn(Op::Add, vec![opnd0, opnd1], vec![]) + } + + fn mov(&mut self, opnd0: Opnd, opnd1: Opnd) -> Opnd + { + self.push_insn(Op::Add, vec![opnd0, opnd1], vec![]); + Opnd::None + } +} + +// NOTE: these methods are temporary and will likely move +// to context.rs later +// They are just wrappers to convert from X86Opnd into the IR Opnd type +impl Context +{ + pub fn ir_stack_pop(&mut self, n: usize) -> Opnd { + self.stack_pop(n).into() + } + + pub fn ir_stack_push(&mut self, val_type: Type) -> Opnd { + self.stack_push(val_type).into() + } + + pub fn ir_stack_push_mapping(&mut self, (mapping, temp_type): (TempMapping, Type)) -> Opnd { + self.stack_push_mapping((mapping, temp_type)).into() + } +} + +#[cfg(test)] +mod tests { + use super::*; + + use crate::core::*; + use InsnOpnd::*; + + // Test that this function type checks + fn gen_dup( + ctx: &mut Context, + asm: &mut Assembler, + ) { + let dup_val = ctx.ir_stack_pop(0); + let (mapping, tmp_type) = ctx.get_opnd_mapping(StackOpnd(0)); + + let loc0 = ctx.ir_stack_push_mapping((mapping, tmp_type)); + asm.mov(loc0, dup_val); + } + + + + + + #[test] + fn test_add() { + let mut asm = Assembler::new(); + let out = asm.add(SP, Opnd::UImm(1)); + asm.add(out, Opnd::UImm(2)); + } +} + + + + + +// TODO: we need a test instruction +// Can we combine this with a branch? +// +/* +fn guard_object_is_heap( + cb: &mut CodeBlock, + object_opnd: X86Opnd, + _ctx: &mut Context, + side_exit: CodePtr, +) { + add_comment(cb, "guard object is heap"); + + // Test that the object is not an immediate + test(cb, object_opnd, uimm_opnd(RUBY_IMMEDIATE_MASK as u64)); + jnz_ptr(cb, side_exit); + + // Test that the object is not false or nil + cmp(cb, object_opnd, uimm_opnd(Qnil.into())); + jbe_ptr(cb, side_exit); +} +*/
\ No newline at end of file diff --git a/yjit/src/lib.rs b/yjit/src/lib.rs index 6772f551a8..019189e8e8 100644 --- a/yjit/src/lib.rs +++ b/yjit/src/lib.rs @@ -4,6 +4,7 @@ #![allow(clippy::identity_op)] // Sometimes we do it for style mod asm; +mod ir; mod codegen; mod core; mod cruby; |