cmd/compile: add SHLX&SHRX without load

Change-Id: I79eb5e7d6bcb23f26d3a100e915efff6dae70391
Reviewed-on: https://go-review.googlesource.com/c/go/+/399061
Reviewed-by: Keith Randall <khr@golang.org>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
This commit is contained in:
Wayne Zuo 2022-04-09 14:40:40 +08:00 committed by Keith Randall
parent 517781b391
commit 66f03f79da
6 changed files with 2481 additions and 111 deletions

View file

@ -282,7 +282,9 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) {
p.To.Reg = v.Reg()
p.SetFrom3Reg(v.Args[1].Reg())
case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ:
case ssa.OpAMD64SARXL, ssa.OpAMD64SARXQ,
ssa.OpAMD64SHLXL, ssa.OpAMD64SHLXQ,
ssa.OpAMD64SHRXL, ssa.OpAMD64SHRXQ:
p := opregreg(s, v.Op.Asm(), v.Reg(), v.Args[1].Reg())
p.SetFrom3Reg(v.Args[0].Reg())

View file

@ -206,8 +206,10 @@
(Rsh16x(64|32|16|8) x y) && shiftIsBounded(v) => (SARW x y)
(Rsh8x(64|32|16|8) x y) && shiftIsBounded(v) => (SARB x y)
// Prefer SARX instruction because it has less register restriction on the shift input.
// Prefer SARX/SHLX/SHRX instruction because it has less register restriction on the shift input.
(SAR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SARX(Q|L) x y)
(SHL(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHLX(Q|L) x y)
(SHR(Q|L) x y) && buildcfg.GOAMD64 >= 3 => (SHRX(Q|L) x y)
// Lowering integer comparisons
(Less(64|32|16|8) x y) => (SETL (CMP(Q|L|W|B) x y))
@ -593,6 +595,8 @@
// mutandis, for UGE and SETAE, and CC and SETCC.
((NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y))
((NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y))
((NE|EQ) (TESTL (SHLXL (MOVLconst [1]) x) y)) => ((ULT|UGE) (BTL x y))
((NE|EQ) (TESTQ (SHLXQ (MOVQconst [1]) x) y)) => ((ULT|UGE) (BTQ x y))
((NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
=> ((ULT|UGE) (BTLconst [int8(log32(c))] x))
((NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
@ -601,6 +605,8 @@
=> ((ULT|UGE) (BTQconst [int8(log64(c))] x))
(SET(NE|EQ) (TESTL (SHLL (MOVLconst [1]) x) y)) => (SET(B|AE) (BTL x y))
(SET(NE|EQ) (TESTQ (SHLQ (MOVQconst [1]) x) y)) => (SET(B|AE) (BTQ x y))
(SET(NE|EQ) (TESTL (SHLXL (MOVLconst [1]) x) y)) => (SET(B|AE) (BTL x y))
(SET(NE|EQ) (TESTQ (SHLXQ (MOVQconst [1]) x) y)) => (SET(B|AE) (BTQ x y))
(SET(NE|EQ) (TESTLconst [c] x)) && isUint32PowerOfTwo(int64(c))
=> (SET(B|AE) (BTLconst [int8(log32(c))] x))
(SET(NE|EQ) (TESTQconst [c] x)) && isUint64PowerOfTwo(int64(c))
@ -612,6 +618,10 @@
=> (SET(B|AE)store [off] {sym} ptr (BTL x y) mem)
(SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLQ (MOVQconst [1]) x) y) mem)
=> (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem)
(SET(NE|EQ)store [off] {sym} ptr (TESTL (SHLXL (MOVLconst [1]) x) y) mem)
=> (SET(B|AE)store [off] {sym} ptr (BTL x y) mem)
(SET(NE|EQ)store [off] {sym} ptr (TESTQ (SHLXQ (MOVQconst [1]) x) y) mem)
=> (SET(B|AE)store [off] {sym} ptr (BTQ x y) mem)
(SET(NE|EQ)store [off] {sym} ptr (TESTLconst [c] x) mem) && isUint32PowerOfTwo(int64(c))
=> (SET(B|AE)store [off] {sym} ptr (BTLconst [int8(log32(c))] x) mem)
(SET(NE|EQ)store [off] {sym} ptr (TESTQconst [c] x) mem) && isUint64PowerOfTwo(int64(c))
@ -624,9 +634,10 @@
(BT(Q|L)const [c] (SHRQconst [d] x)) && (c+d)<64 => (BTQconst [c+d] x)
(BT(Q|L)const [c] (SHLQconst [d] x)) && c>d => (BT(Q|L)const [c-d] x)
(BT(Q|L)const [0] s:(SHRQ x y)) => (BTQ y x)
(BT(Q|L)const [0] s:(SHRXQ x y)) => (BTQ y x)
(BTLconst [c] (SHRLconst [d] x)) && (c+d)<32 => (BTLconst [c+d] x)
(BTLconst [c] (SHLLconst [d] x)) && c>d => (BTLconst [c-d] x)
(BTLconst [0] s:(SHRL x y)) => (BTL y x)
(BTLconst [0] s:(SHR(L|XL) x y)) => (BTL y x)
// Rewrite a & 1 != 1 into a & 1 == 0.
// Among other things, this lets us turn (a>>b)&1 != 1 into a bit test.
@ -638,6 +649,8 @@
// Recognize bit setting (a |= 1<<b) and toggling (a ^= 1<<b)
(OR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
(XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
(OR(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y) x) => (BTS(Q|L) x y)
(XOR(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y)
// Convert ORconst into BTS, if the code gets smaller, with boundary being
// (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes).
@ -653,6 +666,8 @@
// Recognize bit clearing: a &^= 1<<b
(AND(Q|L) (NOT(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
(ANDN(Q|L) x (SHL(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
(AND(Q|L) (NOT(Q|L) (SHLX(Q|L) (MOV(Q|L)const [1]) y)) x) => (BTR(Q|L) x y)
(ANDN(Q|L) x (SHLX(Q|L) (MOV(Q|L)const [1]) y)) => (BTR(Q|L) x y)
(ANDQconst [c] x) && isUint64PowerOfTwo(int64(^c)) && uint64(^c) >= 128
=> (BTRQconst [int8(log32(^c))] x)
(ANDLconst [c] x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128
@ -794,6 +809,8 @@
(SHLQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x)
(SHLL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x)
(SHLXQ x (MOV(Q|L)const [c])) => (SHLQconst [int8(c&63)] x)
(SHLXL x (MOV(Q|L)const [c])) => (SHLLconst [int8(c&31)] x)
(SHRQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x)
(SHRL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x)
@ -801,6 +818,8 @@
(SHRW _ (MOV(Q|L)const [c])) && c&31 >= 16 => (MOVLconst [0])
(SHRB x (MOV(Q|L)const [c])) && c&31 < 8 => (SHRBconst [int8(c&31)] x)
(SHRB _ (MOV(Q|L)const [c])) && c&31 >= 8 => (MOVLconst [0])
(SHRXQ x (MOV(Q|L)const [c])) => (SHRQconst [int8(c&63)] x)
(SHRXL x (MOV(Q|L)const [c])) => (SHRLconst [int8(c&31)] x)
(SARQ x (MOV(Q|L)const [c])) => (SARQconst [int8(c&63)] x)
(SARL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
@ -810,25 +829,25 @@
(SARXL x (MOV(Q|L)const [c])) => (SARLconst [int8(c&31)] x)
// Operations which don't affect the low 6/5 bits of the shift amount are NOPs.
((SHLQ|SHRQ|SARQ|SARXQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> y))
((SHLQ|SHRQ|SARQ|SARXQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGQ <t> y))
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ADDQconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> (ADDQconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> y))
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ANDQconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> (ANDQconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGQ <t> y))
((SHLL|SHRL|SARL|SARXL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x y)
((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> y))
((SHLL|SHRL|SARL|SARXL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x y)
((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x (NEGQ <t> y))
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ADDQconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> (ADDQconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> y))
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ANDQconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> (ANDQconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGQ <t> y))
((SHLQ|SHRQ|SARQ|SARXQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> y))
((SHLQ|SHRQ|SARQ|SARXQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x y)
((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SARXQ) x (NEGL <t> y))
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ADDLconst [c] y)) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> (ADDLconst [c] y))) && c & 63 == 0 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> y))
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (ANDLconst [c] y)) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x y)
((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> (ANDLconst [c] y))) && c & 63 == 63 => ((SHLQ|SHRQ|SARQ|SHLXQ|SHRXQ|SARXQ) x (NEGL <t> y))
((SHLL|SHRL|SARL|SARXL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x y)
((SHLL|SHRL|SARL|SARXL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SARXL) x (NEGL <t> y))
((SHLL|SHRL|SARL|SARXL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x y)
((SHLL|SHRL|SARL|SARXL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SARXL) x (NEGL <t> y))
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ADDLconst [c] y)) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> (ADDLconst [c] y))) && c & 31 == 0 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> y))
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (ANDLconst [c] y)) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x y)
((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> (ANDLconst [c] y))) && c & 31 == 31 => ((SHLL|SHRL|SARL|SHLXL|SHRXL|SARXL) x (NEGL <t> y))
// Constant rotate instructions
((ADDQ|ORQ|XORQ) (SHLQconst x [c]) (SHRQconst x [d])) && d==64-c => (ROLQconst x [c])
@ -860,9 +879,13 @@
// it in order to strip it out.
(ORQ (SHLQ x y) (ANDQ (SHRQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (ROLQ x y)
(ORQ (SHRQ x y) (ANDQ (SHLQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (RORQ x y)
(ORQ (SHLXQ x y) (ANDQ (SHRXQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (ROLQ x y)
(ORQ (SHRXQ x y) (ANDQ (SHLXQ x (NEG(Q|L) y)) (SBBQcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [63]) [-64])) [64])))) => (RORQ x y)
(ORL (SHLL x y) (ANDL (SHRL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (ROLL x y)
(ORL (SHRL x y) (ANDL (SHLL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (RORL x y)
(ORL (SHLXL x y) (ANDL (SHRXL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (ROLL x y)
(ORL (SHRXL x y) (ANDL (SHLXL x (NEG(Q|L) y)) (SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [31]) [-32])) [32])))) => (RORL x y)
// Help with rotate detection
(CMPQconst (NEGQ (ADDQconst [-16] (ANDQconst [15] _))) [32]) => (FlagLT_ULT)
@ -877,6 +900,15 @@
(SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))))
&& v.Type.Size() == 2
=> (RORW x y)
(ORL (SHLXL x (AND(Q|L)const y [15]))
(ANDL (SHRW x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])))
(SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16])) [16]))))
&& v.Type.Size() == 2
=> (ROLW x y)
(ORL (SHRW x (AND(Q|L)const y [15]))
(SHLXL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [15]) [-16]))))
&& v.Type.Size() == 2
=> (RORW x y)
(ORL (SHLL x (AND(Q|L)const y [ 7]))
(ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))
@ -887,6 +919,15 @@
(SHLL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))))
&& v.Type.Size() == 1
=> (RORB x y)
(ORL (SHLXL x (AND(Q|L)const y [ 7]))
(ANDL (SHRB x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])))
(SBBLcarrymask (CMP(Q|L)const (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8])) [ 8]))))
&& v.Type.Size() == 1
=> (ROLB x y)
(ORL (SHRB x (AND(Q|L)const y [ 7]))
(SHLXL x (NEG(Q|L) (ADD(Q|L)const (AND(Q|L)const y [ 7]) [ -8]))))
&& v.Type.Size() == 1
=> (RORB x y)
// rotate left negative = rotate right
(ROLQ x (NEG(Q|L) y)) => (RORQ x y)
@ -920,6 +961,7 @@
// Multi-register shifts
(ORQ (SH(R|L)Q lo bits) (SH(L|R)Q hi (NEGQ bits))) => (SH(R|L)DQ lo hi bits)
(ORQ (SH(R|L)XQ lo bits) (SH(L|R)XQ hi (NEGQ bits))) => (SH(R|L)DQ lo hi bits)
// Note: the word and byte shifts keep the low 5 bits (not the low 4 or 3 bits)
// because the x86 instructions are defined to use all 5 bits of the shift even
@ -2257,5 +2299,5 @@
=> @mergePoint(b,x0,x1) (MOVBEQload [i] {s} p1 mem)
(SARX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SARX(Q|L)load [off] {sym} ptr x mem)
(SHL(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem)
(SHR(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && buildcfg.GOAMD64 >= 3 && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem)
(SHLX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHLX(Q|L)load [off] {sym} ptr x mem)
(SHRX(Q|L) l:(MOV(Q|L)load [off] {sym} ptr mem) x) && canMergeLoad(v, l) && clobber(l) => (SHRX(Q|L)load [off] {sym} ptr x mem)

View file

@ -955,6 +955,10 @@ func init() {
// CPUID feature: BMI2.
{name: "SARXQ", argLength: 2, reg: gp21, asm: "SARXQ"}, // signed arg0 >> arg1, shift amount is mod 64
{name: "SARXL", argLength: 2, reg: gp21, asm: "SARXL"}, // signed int32(arg0) >> arg1, shift amount is mod 32
{name: "SHLXQ", argLength: 2, reg: gp21, asm: "SHLXQ"}, // arg0 << arg1, shift amount is mod 64
{name: "SHLXL", argLength: 2, reg: gp21, asm: "SHLXL"}, // arg0 << arg1, shift amount is mod 32
{name: "SHRXQ", argLength: 2, reg: gp21, asm: "SHRXQ"}, // unsigned arg0 >> arg1, shift amount is mod 64
{name: "SHRXL", argLength: 2, reg: gp21, asm: "SHRXL"}, // unsigned uint32(arg0) >> arg1, shift amount is mod 32
{name: "SARXLload", argLength: 3, reg: gp21shxload, asm: "SARXL", aux: "SymOff", typ: "Uint32", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 32
{name: "SARXQload", argLength: 3, reg: gp21shxload, asm: "SARXQ", aux: "SymOff", typ: "Uint64", faultOnNilArg0: true, symEffect: "Read"}, // signed *(arg0+auxint+aux) >> arg1, arg2=mem, shift amount is mod 64

View file

@ -1064,6 +1064,10 @@ const (
OpAMD64MOVBEQstoreidx8
OpAMD64SARXQ
OpAMD64SARXL
OpAMD64SHLXQ
OpAMD64SHLXL
OpAMD64SHRXQ
OpAMD64SHRXL
OpAMD64SARXLload
OpAMD64SARXQload
OpAMD64SHLXLload
@ -14154,6 +14158,62 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "SHLXQ",
argLen: 2,
asm: x86.ASHLXQ,
reg: regInfo{
inputs: []inputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{
name: "SHLXL",
argLen: 2,
asm: x86.ASHLXL,
reg: regInfo{
inputs: []inputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{
name: "SHRXQ",
argLen: 2,
asm: x86.ASHRXQ,
reg: regInfo{
inputs: []inputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{
name: "SHRXL",
argLen: 2,
asm: x86.ASHRXL,
reg: regInfo{
inputs: []inputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
{1, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
outputs: []outputInfo{
{0, 49135}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R15
},
},
},
{
name: "SARXLload",
auxType: auxSymOff,

File diff suppressed because it is too large Load diff

View file

@ -72,7 +72,23 @@ func sarx32_load(x []int32, i int) int32 {
return s
}
func shlrx64(x []uint64, i int, s uint64) uint64 {
func shlrx64(x, y uint64) uint64 {
// amd64/v3:"SHRXQ"
s := x >> y
// amd64/v3:"SHLXQ"
s = s << y
return s
}
func shlrx32(x, y uint32) uint32 {
// amd64/v3:"SHRXL"
s := x >> y
// amd64/v3:"SHLXL"
s = s << y
return s
}
func shlrx64_load(x []uint64, i int, s uint64) uint64 {
// amd64/v3: `SHRXQ\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
s = x[i] >> i
// amd64/v3: `SHLXQ\t[A-Z]+[0-9]*, 8\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*8\), [A-Z]+[0-9]*`
@ -80,7 +96,7 @@ func shlrx64(x []uint64, i int, s uint64) uint64 {
return s
}
func shlrx32(x []uint32, i int, s uint32) uint32 {
func shlrx32_load(x []uint32, i int, s uint32) uint32 {
// amd64/v3: `SHRXL\t[A-Z]+[0-9]*, \([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`
s = x[i] >> i
// amd64/v3: `SHLXL\t[A-Z]+[0-9]*, 4\([A-Z]+[0-9]*\)\([A-Z]+[0-9]*\*4\), [A-Z]+[0-9]*`