Skip to content

Commit d438ffc

Browse files
committed
cmd/compile/mips: intrinsify bits.RotateLeft32 on MIPS
This CL implements the ROTR & ROTRV instructions for MIPS and MIPS64, which are mips32r2 instructions. Additionally bits.RotateLeft32 is now instrinsic and will be rewritten to ROTR during the SSA phase. This brings roughly a 65-70% improvement on mipsle code running Chacha20Poly1305 on a MT7688: goos: linux goarch: mipsle pkg: golang.org/x/crypto/chacha20poly1305 name old time/op new time/op delta Chacha20Poly1305/Open-16 56.2µs ±20% 38.5µs ±40% -31.45% (p=0.001 n=8+10) Chacha20Poly1305/Seal-16 68.3µs ±49% 30.6µs ±13% -55.14% (p=0.000 n=10+10) Chacha20Poly1305/Open-64 67.5µs ±22% 37.8µs ±19% -43.98% (p=0.000 n=9+9) Chacha20Poly1305/Seal-64 64.7µs ±10% 37.6µs ± 8% -41.96% (p=0.000 n=9+8) Chacha20Poly1305/Open-256 151µs ±13% 89µs ±20% -41.03% (p=0.000 n=9+10) Chacha20Poly1305/Seal-256 148µs ±19% 93µs ±35% -37.15% (p=0.000 n=10+10) Chacha20Poly1305/Open-1024 456µs ±16% 260µs ±23% -42.95% (p=0.000 n=10+10) Chacha20Poly1305/Seal-1024 469µs ±14% 254µs ±15% -45.88% (p=0.000 n=10+9) Chacha20Poly1305/Open-8192 3.59ms ±23% 1.94ms ±15% -45.86% (p=0.000 n=10+10) Chacha20Poly1305/Seal-8192 3.47ms ±20% 2.03ms ±22% -41.60% (p=0.000 n=9+10) Chacha20Poly1305/Open-16384 7.01ms ± 9% 4.22ms ±22% -39.89% (p=0.000 n=9+10) Chacha20Poly1305/Seal-16384 7.43ms ±19% 4.23ms ±11% -43.04% (p=0.000 n=10+9) name old speed new speed delta Chacha20Poly1305/Open-16 258kB/s ±46% 431kB/s ±32% +67.05% (p=0.000 n=10+10) Chacha20Poly1305/Seal-16 246kB/s ±35% 527kB/s ±13% +114.23% (p=0.000 n=10+10) Chacha20Poly1305/Open-64 927kB/s ±31% 1664kB/s ±22% +79.50% (p=0.000 n=10+10) Chacha20Poly1305/Seal-64 993kB/s ±10% 1709kB/s ± 8% +72.02% (p=0.000 n=9+8) Chacha20Poly1305/Open-256 1.70MB/s ±13% 2.90MB/s ±18% +70.88% (p=0.000 n=9+10) Chacha20Poly1305/Seal-256 1.74MB/s ±17% 2.81MB/s ±28% +61.16% (p=0.000 n=10+10) Chacha20Poly1305/Open-1024 2.26MB/s ±15% 3.99MB/s ±20% +76.38% (p=0.000 n=10+10) Chacha20Poly1305/Seal-1024 2.20MB/s ±13% 3.92MB/s ±32% +78.82% (p=0.000 n=10+10) Chacha20Poly1305/Open-8192 2.31MB/s ±19% 4.24MB/s ±14% +83.72% (p=0.000 n=10+10) Chacha20Poly1305/Seal-8192 2.30MB/s ±29% 4.09MB/s ±19% +77.66% (p=0.000 n=10+10) Chacha20Poly1305/Open-16384 2.34MB/s ±10% 3.93MB/s ±19% +68.04% (p=0.000 n=9+10) Chacha20Poly1305/Seal-16384 2.23MB/s ±17% 3.79MB/s ±23% +70.00% (p=0.000 n=10+10) Fixes #39139
1 parent 7bfe32f commit d438ffc

File tree

13 files changed

+122
-37
lines changed

13 files changed

+122
-37
lines changed

src/cmd/compile/internal/mips/ssa.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
168168
ssa.OpMIPSMULD,
169169
ssa.OpMIPSDIVF,
170170
ssa.OpMIPSDIVD,
171-
ssa.OpMIPSMUL:
171+
ssa.OpMIPSMUL,
172+
ssa.OpMIPSROTR:
172173
p := s.Prog(v.Op.Asm())
173174
p.From.Type = obj.TYPE_REG
174175
p.From.Reg = v.Args[1].Reg()
@@ -201,7 +202,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
201202
ssa.OpMIPSSRLconst,
202203
ssa.OpMIPSSRAconst,
203204
ssa.OpMIPSSGTconst,
204-
ssa.OpMIPSSGTUconst:
205+
ssa.OpMIPSSGTUconst,
206+
ssa.OpMIPSROTRconst:
205207
p := s.Prog(v.Op.Asm())
206208
p.From.Type = obj.TYPE_CONST
207209
p.From.Offset = v.AuxInt

src/cmd/compile/internal/mips64/ssa.go

+4-2
Original file line numberDiff line numberDiff line change
@@ -170,7 +170,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
170170
ssa.OpMIPS64MULF,
171171
ssa.OpMIPS64MULD,
172172
ssa.OpMIPS64DIVF,
173-
ssa.OpMIPS64DIVD:
173+
ssa.OpMIPS64DIVD,
174+
ssa.OpMIPS64ROTR:
174175
p := s.Prog(v.Op.Asm())
175176
p.From.Type = obj.TYPE_REG
176177
p.From.Reg = v.Args[1].Reg()
@@ -195,7 +196,8 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
195196
ssa.OpMIPS64SRLVconst,
196197
ssa.OpMIPS64SRAVconst,
197198
ssa.OpMIPS64SGTconst,
198-
ssa.OpMIPS64SGTUconst:
199+
ssa.OpMIPS64SGTUconst,
200+
ssa.OpMIPS64ROTRconst:
199201
p := s.Prog(v.Op.Asm())
200202
p.From.Type = obj.TYPE_CONST
201203
p.From.Offset = v.AuxInt

src/cmd/compile/internal/ssa/gen/MIPS.rules

+2-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@
111111
// rotates
112112
(RotateLeft8 <t> x (MOVWconst [c])) => (Or8 (Lsh8x32 <t> x (MOVWconst [c&7])) (Rsh8Ux32 <t> x (MOVWconst [-c&7])))
113113
(RotateLeft16 <t> x (MOVWconst [c])) => (Or16 (Lsh16x32 <t> x (MOVWconst [c&15])) (Rsh16Ux32 <t> x (MOVWconst [-c&15])))
114-
(RotateLeft32 <t> x (MOVWconst [c])) => (Or32 (Lsh32x32 <t> x (MOVWconst [c&31])) (Rsh32Ux32 <t> x (MOVWconst [-c&31])))
114+
(RotateLeft32 x (MOVWconst [c])) => (ROTRconst x [-c&31])
115+
(RotateLeft32 x y) => (ROTR x (NEG <y.Type> y))
115116
(RotateLeft64 <t> x (MOVWconst [c])) => (Or64 (Lsh64x32 <t> x (MOVWconst [c&63])) (Rsh64Ux32 <t> x (MOVWconst [-c&63])))
116117

117118
// unary ops

src/cmd/compile/internal/ssa/gen/MIPS64.rules

+2-1
Original file line numberDiff line numberDiff line change
@@ -111,7 +111,8 @@
111111
// rotates
112112
(RotateLeft8 <t> x (MOVVconst [c])) => (Or8 (Lsh8x64 <t> x (MOVVconst [c&7])) (Rsh8Ux64 <t> x (MOVVconst [-c&7])))
113113
(RotateLeft16 <t> x (MOVVconst [c])) => (Or16 (Lsh16x64 <t> x (MOVVconst [c&15])) (Rsh16Ux64 <t> x (MOVVconst [-c&15])))
114-
(RotateLeft32 <t> x (MOVVconst [c])) => (Or32 (Lsh32x64 <t> x (MOVVconst [c&31])) (Rsh32Ux64 <t> x (MOVVconst [-c&31])))
114+
(RotateLeft32 x (MOVVconst [c])) => (ROTRconst x [int32(-c&31)])
115+
(RotateLeft32 x y) => (ROTR x (NEGV <y.Type> y))
115116
(RotateLeft64 <t> x (MOVVconst [c])) => (Or64 (Lsh64x64 <t> x (MOVVconst [c&63])) (Rsh64Ux64 <t> x (MOVVconst [-c&63])))
116117

117118
// unary ops

src/cmd/compile/internal/ssa/gen/MIPS64Ops.go

+2
Original file line numberDiff line numberDiff line change
@@ -208,6 +208,8 @@ func init() {
208208
{name: "SRLVconst", argLength: 1, reg: gp11, asm: "SRLV", aux: "Int64"}, // arg0 >> auxInt, unsigned
209209
{name: "SRAV", argLength: 2, reg: gp21, asm: "SRAV"}, // arg0 >> arg1, signed, shift amount is mod 64
210210
{name: "SRAVconst", argLength: 1, reg: gp11, asm: "SRAV", aux: "Int64"}, // arg0 >> auxInt, signed
211+
{name: "ROTR", argLength: 2, reg: gp21, asm: "ROTR"}, // arg0 right rotate by (arg1 mod 32) bits
212+
{name: "ROTRconst", argLength: 1, reg: gp11, asm: "ROTR", aux: "Int32"}, // arg0 right rotate by auxInt bits
211213

212214
// comparisons
213215
{name: "SGT", argLength: 2, reg: gp21, asm: "SGT", typ: "Bool"}, // 1 if arg0 > arg1 (signed), 0 otherwise

src/cmd/compile/internal/ssa/gen/MIPSOps.go

+3
Original file line numberDiff line numberDiff line change
@@ -192,6 +192,9 @@ func init() {
192192
{name: "SRA", argLength: 2, reg: gp21, asm: "SRA"}, // arg0 >> arg1, signed, shift amount is mod 32
193193
{name: "SRAconst", argLength: 1, reg: gp11, asm: "SRA", aux: "Int32"}, // arg0 >> auxInt, signed, shift amount must be 0 through 31 inclusive
194194

195+
{name: "ROTR", argLength: 2, reg: gp21, asm: "ROTR"}, // arg0 right rotate by (arg1 mod 32) bits
196+
{name: "ROTRconst", argLength: 1, reg: gp11, asm: "ROTR", aux: "Int32"}, // arg0 right rotate by auxInt bits
197+
195198
{name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"},
196199

197200
// comparisons

src/cmd/compile/internal/ssa/opGen.go

+60
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/rewriteMIPS.go

+16-15
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssa/rewriteMIPS64.go

+16-15
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/compile/internal/ssagen/ssa.go

+1-1
Original file line numberDiff line numberDiff line change
@@ -4394,7 +4394,7 @@ func InitTables() {
43944394
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
43954395
return s.newValue2(ssa.OpRotateLeft32, types.Types[types.TUINT32], args[0], args[1])
43964396
},
4397-
sys.AMD64, sys.ARM, sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm)
4397+
sys.AMD64, sys.ARM, sys.ARM64, sys.S390X, sys.PPC64, sys.Wasm, sys.MIPS, sys.MIPS64)
43984398
addF("math/bits", "RotateLeft64",
43994399
func(s *state, n *ir.CallExpr, args []*ssa.Value) *ssa.Value {
44004400
return s.newValue2(ssa.OpRotateLeft64, types.Types[types.TUINT64], args[0], args[1])

src/cmd/internal/obj/mips/a.out.go

+1
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ const (
390390
AREM
391391
AREMU
392392
ARFE
393+
AROTR
393394
ASC
394395
ASCV
395396
ASGT

src/cmd/internal/obj/mips/anames.go

+1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

src/cmd/internal/obj/mips/asm0.go

+10
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,11 @@ var optab = []Optab{
107107
{ASLLV, C_REG, C_REG, C_REG, 9, 4, 0, sys.MIPS64, 0},
108108
{ACLO, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0},
109109

110+
{AROTR, C_REG, C_NONE, C_REG, 9, 4, 0, 0, 0},
111+
{AROTR, C_REG, C_REG, C_REG, 9, 4, 0, 0, 0},
112+
{AROTR, C_SCON, C_REG, C_REG, 16, 4, 0, 0, 0},
113+
{AROTR, C_SCON, C_NONE, C_REG, 16, 4, 0, 0, 0},
114+
110115
{AADDF, C_FREG, C_NONE, C_FREG, 32, 4, 0, 0, 0},
111116
{AADDF, C_FREG, C_REG, C_FREG, 32, 4, 0, 0, 0},
112117
{ACMPEQF, C_FREG, C_REG, C_NONE, 32, 4, 0, 0, 0},
@@ -1079,6 +1084,7 @@ func buildop(ctxt *obj.Link) {
10791084
ANEGW,
10801085
ANEGV,
10811086
AWORD,
1087+
AROTR,
10821088
obj.ANOP,
10831089
obj.ATEXT,
10841090
obj.AUNDEF,
@@ -1730,6 +1736,8 @@ func (c *ctxt0) oprrr(a obj.As) uint32 {
17301736
return OP(0, 4)
17311737
case ASRL:
17321738
return OP(0, 6)
1739+
case AROTR:
1740+
return OP(0, 6) | (1 << 6)
17331741
case ASRA:
17341742
return OP(0, 7)
17351743
case ASLLV:
@@ -1914,6 +1922,8 @@ func (c *ctxt0) opirr(a obj.As) uint32 {
19141922
return OP(0, 0)
19151923
case ASRL:
19161924
return OP(0, 2)
1925+
case AROTR:
1926+
return OP(0, 2) | (1 << 21)
19171927
case ASRA:
19181928
return OP(0, 3)
19191929
case AADDV:

0 commit comments

Comments
 (0)