Implement target-specific insn splitting with Kevin. Add tests.

author: Maxime Chevalier-Boisvert <[email protected]> 2022-05-19 15:01:20 -0400
committer: Takashi Kokubun <[email protected]> 2022-08-29 08:46:53 -0700
commit: 1b2ee62149d5fa8d8cbe2097f9fd7a3af31989c2 (patch)
tree: 26350c8aa8408b3ddbd4be9e19e7942433da2b00 /yjit
parent: 564f9503603ae261561193f69f1fbdef6a140aa1 (diff)
3 files changed, 81 insertions, 29 deletions
diff --git a/yjit/src/backend/ir.rs b/yjit/src/backend/ir.rs
index d26eb289c6..e292160efc 100644
--- a/yjit/src/backend/ir.rs
+++ b/yjit/src/backend/ir.rs
@@ -306,7 +306,7 @@ pub struct Assembler
 
     /// Parallel vec with insns
     /// Index of the last insn using the output of this insn
-    live_ranges: Vec<usize>
+    pub(super) live_ranges: Vec<usize>
 }
 
 impl Assembler
@@ -319,7 +319,7 @@ impl Assembler
     }
 
     /// Append an instruction to the list
-    fn push_insn(&mut self, op: Op, opnds: Vec<Opnd>, target: Option<Target>) -> Opnd
+    pub(super) fn push_insn(&mut self, op: Op, opnds: Vec<Opnd>, target: Option<Target>) -> Opnd
     {
         // If we find any InsnOut from previous instructions, we're going to
         // update the live range of the previous instruction to point to this
@@ -382,7 +382,7 @@ impl Assembler
     }
 
     /// Transform input instructions, consumes the input assembler
-    fn transform_insns<F>(mut self, mut map_insn: F) -> Assembler
+    pub(super) fn transform_insns<F>(mut self, mut map_insn: F) -> Assembler
         where F: FnMut(&mut Assembler, usize, Op, Vec<Opnd>, Option<Target>)
     {
         let mut asm = Assembler::new();
@@ -430,7 +430,7 @@ impl Assembler
     /// Transforms the instructions by splitting instructions that cannot be
     /// represented in the final architecture into multiple instructions that
     /// can.
-    fn split_insns(self) -> Assembler
+    pub(super) fn split_loads(self) -> Assembler
     {
         self.transform_insns(|asm, _, op, opnds, target| {
             match op {
@@ -458,7 +458,7 @@ impl Assembler
     /// Sets the out field on the various instructions that require allocated
     /// registers because their output is used as the operand on a subsequent
     /// instruction. This is our implementation of the linear scan algorithm.
-    fn alloc_regs(mut self, regs: Vec<Reg>) -> Assembler
+    pub(super) fn alloc_regs(mut self, regs: Vec<Reg>) -> Assembler
     {
         // First, create the pool of registers.
         let mut pool: u32 = 0;
@@ -517,6 +517,8 @@ impl Assembler
                 // If this instruction's first operand maps to a register and
                 // this is the last use of the register, reuse the register
                 // We do this to improve register allocation on x86
+                // e.g. out  = add(reg0, reg1)
+                //      reg0 = add(reg0, reg1)
                 if opnds.len() > 0 {
                     if let Opnd::InsnOut(idx) = opnds[0] {
                         if live_ranges[idx] == index {
@@ -527,8 +529,8 @@ impl Assembler
                     }
                 }
 
+                // Allocate a new register for this instruction
                 if out_reg == Opnd::None {
-                    // Allocate a new register for this instruction
                     out_reg = Opnd::Reg(alloc_reg(&mut pool, &regs))
                 }
             }
@@ -552,19 +554,11 @@ impl Assembler
         asm
     }
 
-    // Optimize and compile the stored instructions
+    /// Compile the instructions down to machine code
     pub fn compile(self, cb: &mut CodeBlock)
     {
-        // NOTE: for arm we're going to want to split loads but also stores
-        // This can be done in a platform-agnostic way, but the set of passes
-        // we run will be slightly different.
-
-        let scratch_regs = Self::get_scrach_regs();
-
-        dbg!(self
-        .split_insns()
-        .alloc_regs(scratch_regs))
-        .target_emit(cb);
+        let scratch_regs = Self::get_scratch_regs();
+        self.compile_with_regs(cb, scratch_regs);
     }
 }
 
@@ -694,17 +688,17 @@ mod tests {
     }
 
     #[test]
-    fn test_split_insns() {
+    fn test_split_loads() {
         let mut asm = Assembler::new();
 
-        let regs = Assembler::get_scrach_regs();
+        let regs = Assembler::get_scratch_regs();
 
         asm.add(
             Opnd::mem(64, Opnd::Reg(regs[0]), 0),
             Opnd::mem(64, Opnd::Reg(regs[1]), 0)
         );
 
-        let result = asm.split_insns();
+        let result = asm.split_loads();
         assert_eq!(result.insns.len(), 2);
         assert_eq!(result.insns[0].op, Op::Load);
     }
@@ -734,11 +728,11 @@ mod tests {
         asm.add(out3, Opnd::UImm(6));
 
         // Here we're going to allocate the registers.
-        let result = asm.alloc_regs(Assembler::get_scrach_regs());
+        let result = asm.alloc_regs(Assembler::get_scratch_regs());
 
         // Now we're going to verify that the out field has been appropriately
         // updated for each of the instructions that needs it.
-        let regs = Assembler::get_scrach_regs();
+        let regs = Assembler::get_scratch_regs();
         assert_eq!(result.insns[0].out, Opnd::Reg(regs[0]));
         assert_eq!(result.insns[2].out, Opnd::Reg(regs[1]));
         assert_eq!(result.insns[5].out, Opnd::Reg(regs[0]));
@@ -750,7 +744,7 @@ mod tests {
     {
         let mut asm = Assembler::new();
         let mut cb = CodeBlock::new_dummy(1024);
-        let regs = Assembler::get_scrach_regs();
+        let regs = Assembler::get_scratch_regs();
 
         let out = asm.add(Opnd::Reg(regs[0]), Opnd::UImm(2));
         asm.add(out, Opnd::UImm(2));
@@ -758,14 +752,31 @@ mod tests {
         asm.compile(&mut cb);
     }
 
-    // Test full codegen pipeline
+    // Test memory-to-memory move
     #[test]
     fn test_mov_mem2mem()
     {
         let mut asm = Assembler::new();
         let mut cb = CodeBlock::new_dummy(1024);
+        let regs = Assembler::get_scratch_regs();
+
         asm.comment("check that comments work too");
         asm.mov(Opnd::mem(64, SP, 0), Opnd::mem(64, SP, 8));
-        asm.compile(&mut cb);
+
+        asm.compile_with_regs(&mut cb, vec![regs[0]]);
+    }
+
+    // Test load of register into new register
+    #[test]
+    fn test_load_reg()
+    {
+        let mut asm = Assembler::new();
+        let mut cb = CodeBlock::new_dummy(1024);
+        let regs = Assembler::get_scratch_regs();
+
+        let out = asm.load(SP);
+        asm.mov(Opnd::mem(64, SP, 0), out);
+
+        asm.compile_with_regs(&mut cb, vec![regs[0]]);
     }
 }
diff --git a/yjit/src/backend/x86_64/mod.rs b/yjit/src/backend/x86_64/mod.rs
index ed68e13eb6..65259a72f6 100644
--- a/yjit/src/backend/x86_64/mod.rs
+++ b/yjit/src/backend/x86_64/mod.rs
@@ -45,7 +45,7 @@ impl From<Opnd> for X86Opnd {
 impl Assembler
 {
     // Get the list of registers from which we can allocate on this platform
-    pub fn get_scrach_regs() -> Vec<Reg>
+    pub fn get_scratch_regs() -> Vec<Reg>
     {
         vec![
             RAX_REG,
@@ -54,6 +54,40 @@ impl Assembler
     }
 
     // Emit platform-specific machine code
+    fn target_split(mut self) -> Assembler
+    {
+        let live_ranges: Vec<usize> = std::mem::take(&mut self.live_ranges);
+
+        self.transform_insns(|asm, index, op, opnds, target| {
+            match op {
+                Op::Add | Op::Sub | Op::And => {
+                    match opnds.as_slice() {
+                        // Instruction output whose live range spans beyond this instruction
+                        [Opnd::InsnOut(out_idx), _] => {
+                            if live_ranges[*out_idx] > index {
+                                let opnd0 = asm.load(opnds[0]);
+                                asm.push_insn(op, vec![opnd0, opnds[1]], None);
+                                return;
+                            }
+                        },
+
+                        [Opnd::Mem(_), _] => {
+                            let opnd0 = asm.load(opnds[0]);
+                            asm.push_insn(op, vec![opnd0, opnds[1]], None);
+                            return;
+                        },
+
+                        _ => {}
+                    }
+                },
+                _ => {}
+            };
+
+            asm.push_insn(op, opnds, target);
+        })
+    }
+
+    // Emit platform-specific machine code
     pub fn target_emit(&self, cb: &mut CodeBlock)
     {
         // For each instruction
@@ -87,4 +121,14 @@ impl Assembler
             };
         }
     }
+
+    // Optimize and compile the stored instructions
+    pub fn compile_with_regs(self, cb: &mut CodeBlock, regs: Vec<Reg>)
+    {
+        dbg!(self
+        .target_split()
+        .split_loads()
+        .alloc_regs(regs))
+        .target_emit(cb);
+    }
 }
diff --git a/yjit/src/codegen.rs b/yjit/src/codegen.rs
index 22e3c45438..28a2e6ca34 100644
--- a/yjit/src/codegen.rs
+++ b/yjit/src/codegen.rs
@@ -934,9 +934,6 @@ fn gen_dup_ir(
 
 
 
-
-
-
 // duplicate stack top n elements
 fn gen_dupn(
     jit: &mut JITState,
author	Maxime Chevalier-Boisvert <[email protected]>	2022-05-19 15:01:20 -0400
committer	Takashi Kokubun <[email protected]>	2022-08-29 08:46:53 -0700
commit	1b2ee62149d5fa8d8cbe2097f9fd7a3af31989c2 (patch)
tree	26350c8aa8408b3ddbd4be9e19e7942433da2b00 /yjit
parent	564f9503603ae261561193f69f1fbdef6a140aa1 (diff)