mirror of
https://github.com/golang/go
synced 2024-11-02 13:42:29 +00:00
cmd/compile/internal/ssa: add patterns for arm64 bitfield opcodes
Add patterns to match common idioms for EXTR, BFI, BFXIL, SBFIZ, SBFX, UBFIZ and UBFX opcodes. go1 benchmarks results on Amberwing: name old time/op new time/op delta FmtManyArgs 786ns ± 2% 714ns ± 1% -9.20% (p=0.000 n=10+10) Gzip 437ms ± 0% 402ms ± 0% -7.99% (p=0.000 n=10+10) FmtFprintfIntInt 196ns ± 0% 182ns ± 0% -7.28% (p=0.000 n=10+9) FmtFprintfPrefixedInt 207ns ± 0% 199ns ± 0% -3.86% (p=0.000 n=10+10) FmtFprintfFloat 324ns ± 0% 316ns ± 0% -2.47% (p=0.000 n=10+8) FmtFprintfInt 119ns ± 0% 117ns ± 0% -1.68% (p=0.000 n=10+9) GobDecode 12.8ms ± 2% 12.6ms ± 1% -1.62% (p=0.002 n=10+10) JSONDecode 94.4ms ± 1% 93.4ms ± 0% -1.10% (p=0.000 n=10+10) RegexpMatchEasy0_32 247ns ± 0% 245ns ± 0% -0.65% (p=0.000 n=10+10) RegexpMatchMedium_32 314ns ± 0% 312ns ± 0% -0.64% (p=0.000 n=10+10) RegexpMatchEasy0_1K 541ns ± 0% 538ns ± 0% -0.55% (p=0.000 n=10+9) TimeParse 450ns ± 1% 448ns ± 1% -0.42% (p=0.035 n=9+9) RegexpMatchEasy1_32 244ns ± 0% 243ns ± 0% -0.41% (p=0.000 n=10+10) GoParse 6.03ms ± 0% 6.00ms ± 0% -0.40% (p=0.002 n=10+10) RegexpMatchEasy1_1K 779ns ± 0% 777ns ± 0% -0.26% (p=0.000 n=10+10) RegexpMatchHard_32 2.75µs ± 0% 2.74µs ± 1% -0.06% (p=0.026 n=9+9) BinaryTree17 11.7s ± 0% 11.6s ± 0% ~ (p=0.089 n=10+10) HTTPClientServer 89.1µs ± 1% 89.5µs ± 2% ~ (p=0.436 n=10+10) RegexpMatchHard_1K 78.9µs ± 0% 79.5µs ± 2% ~ (p=0.469 n=10+10) FmtFprintfEmpty 58.5ns ± 0% 58.5ns ± 0% ~ (all equal) GobEncode 12.0ms ± 1% 12.1ms ± 0% ~ (p=0.075 n=10+10) Revcomp 669ms ± 0% 668ms ± 0% ~ (p=0.091 n=7+9) Mandelbrot200 5.35ms ± 0% 5.36ms ± 0% +0.07% (p=0.000 n=9+9) RegexpMatchMedium_1K 52.1µs ± 0% 52.1µs ± 0% +0.10% (p=0.000 n=9+9) Fannkuch11 3.25s ± 0% 3.26s ± 0% +0.36% (p=0.000 n=9+10) FmtFprintfString 114ns ± 1% 115ns ± 0% +0.52% (p=0.011 n=10+10) JSONEncode 20.2ms ± 0% 20.3ms ± 0% +0.65% (p=0.000 n=10+10) Template 91.3ms ± 0% 92.3ms ± 0% +1.08% (p=0.000 n=10+10) TimeFormat 484ns ± 0% 495ns ± 1% +2.30% (p=0.000 n=9+10) There are some opportunities to improve this change further by adding patterns to match the "extended register" versions of ADD/SUB/CMP, but I think that should be evaluated on its own. The regressions in Template and TimeFormat would likely be recovered by this, as they seem to be due to generating: ubfiz x0, x0, #3, #8 add x1, x2, x0 instead of add x1, x2, x0, lsl #3 Change-Id: I5644a8d70ac7a98e784a377a2b76ab47a3415a4b Reviewed-on: https://go-review.googlesource.com/88355 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
ded9a1b372
commit
e244a7a7d3
7 changed files with 1755 additions and 150 deletions
|
@ -223,6 +223,15 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||
p.Reg = v.Args[0].Reg()
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
case ssa.OpARM64EXTRconst,
|
||||
ssa.OpARM64EXTRWconst:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = v.AuxInt
|
||||
p.SetFrom3(obj.Addr{Type: obj.TYPE_REG, Reg: v.Args[0].Reg()})
|
||||
p.Reg = v.Args[1].Reg()
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
case ssa.OpARM64ADDshiftLL,
|
||||
ssa.OpARM64SUBshiftLL,
|
||||
ssa.OpARM64ANDshiftLL,
|
||||
|
@ -380,6 +389,30 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||
p.To.Type = obj.TYPE_MEM
|
||||
p.To.Reg = v.Args[0].Reg()
|
||||
gc.AddAux(&p.To, v)
|
||||
case ssa.OpARM64BFI,
|
||||
ssa.OpARM64BFXIL:
|
||||
r := v.Reg()
|
||||
if r != v.Args[0].Reg() {
|
||||
v.Fatalf("input[0] and output not in same register %s", v.LongString())
|
||||
}
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = v.AuxInt >> 8
|
||||
p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt & 0xff})
|
||||
p.Reg = v.Args[1].Reg()
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = r
|
||||
case ssa.OpARM64SBFIZ,
|
||||
ssa.OpARM64SBFX,
|
||||
ssa.OpARM64UBFIZ,
|
||||
ssa.OpARM64UBFX:
|
||||
p := s.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = v.AuxInt >> 8
|
||||
p.SetFrom3(obj.Addr{Type: obj.TYPE_CONST, Offset: v.AuxInt & 0xff})
|
||||
p.Reg = v.Args[0].Reg()
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = v.Reg()
|
||||
case ssa.OpARM64LoweredAtomicExchange64,
|
||||
ssa.OpARM64LoweredAtomicExchange32:
|
||||
// LDAXR (Rarg0), Rout
|
||||
|
|
|
@ -939,6 +939,12 @@
|
|||
(UMODW (MOVDconst [c]) (MOVDconst [d])) -> (MOVDconst [int64(uint32(c)%uint32(d))])
|
||||
(ANDconst [c] (MOVDconst [d])) -> (MOVDconst [c&d])
|
||||
(ANDconst [c] (ANDconst [d] x)) -> (ANDconst [c&d] x)
|
||||
(ANDconst [c] (MOVWUreg x)) -> (ANDconst [c&(1<<32-1)] x)
|
||||
(ANDconst [c] (MOVHUreg x)) -> (ANDconst [c&(1<<16-1)] x)
|
||||
(ANDconst [c] (MOVBUreg x)) -> (ANDconst [c&(1<<8-1)] x)
|
||||
(MOVWUreg (ANDconst [c] x)) -> (ANDconst [c&(1<<32-1)] x)
|
||||
(MOVHUreg (ANDconst [c] x)) -> (ANDconst [c&(1<<16-1)] x)
|
||||
(MOVBUreg (ANDconst [c] x)) -> (ANDconst [c&(1<<8-1)] x)
|
||||
(ORconst [c] (MOVDconst [d])) -> (MOVDconst [c|d])
|
||||
(ORconst [c] (ORconst [d] x)) -> (ORconst [c|d] x)
|
||||
(XORconst [c] (MOVDconst [d])) -> (MOVDconst [c^d])
|
||||
|
@ -1262,12 +1268,27 @@
|
|||
( ORshiftRL [c] (SLLconst x [64-c]) x) -> (RORconst [ c] x)
|
||||
(XORshiftRL [c] (SLLconst x [64-c]) x) -> (RORconst [ c] x)
|
||||
|
||||
(ADDshiftLL <t> [c] (SRLconst (MOVWUreg x) [32-c]) x) && c < 32 && t.Size() == 4 -> (RORWconst [32-c] x)
|
||||
( ORshiftLL <t> [c] (SRLconst (MOVWUreg x) [32-c]) x) && c < 32 && t.Size() == 4 -> (RORWconst [32-c] x)
|
||||
(XORshiftLL <t> [c] (SRLconst (MOVWUreg x) [32-c]) x) && c < 32 && t.Size() == 4 -> (RORWconst [32-c] x)
|
||||
(ADDshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [ c] x)
|
||||
( ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [ c] x)
|
||||
(XORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [ c] x)
|
||||
(ADDshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
|
||||
-> (RORWconst [32-c] x)
|
||||
( ORshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
|
||||
-> (RORWconst [32-c] x)
|
||||
(XORshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
|
||||
-> (RORWconst [32-c] x)
|
||||
(ADDshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
|
||||
( ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
|
||||
(XORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
|
||||
|
||||
// Extract from reg pair
|
||||
(ADDshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
|
||||
( ORshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
|
||||
(XORshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
|
||||
|
||||
(ADDshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
|
||||
-> (EXTRWconst [32-c] x2 x)
|
||||
( ORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
|
||||
-> (EXTRWconst [32-c] x2 x)
|
||||
(XORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
|
||||
-> (EXTRWconst [32-c] x2 x)
|
||||
|
||||
// Generic rules rewrite certain AND to a pair of shifts.
|
||||
// However, on ARM64 the bitmask can fit into an instruction.
|
||||
|
@ -1275,6 +1296,106 @@
|
|||
(SRLconst [c] (SLLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [1<<uint(64-c)-1] x) // mask out high bits
|
||||
(SLLconst [c] (SRLconst [c] x)) && 0 < c && c < 64 -> (ANDconst [^(1<<uint(c)-1)] x) // mask out low bits
|
||||
|
||||
// bitfield ops
|
||||
|
||||
// sbfiz
|
||||
// (x << lc) >> rc
|
||||
(SRAconst [rc] (SLLconst [lc] x)) && lc > rc -> (SBFIZ [arm64BFAuxInt(lc-rc, 64-lc)] x)
|
||||
(MOVWreg (SLLconst [lc] x)) && lc < 32 -> (SBFIZ [arm64BFAuxInt(lc, 32-lc)] x)
|
||||
(MOVHreg (SLLconst [lc] x)) && lc < 16 -> (SBFIZ [arm64BFAuxInt(lc, 16-lc)] x)
|
||||
(MOVBreg (SLLconst [lc] x)) && lc < 8 -> (SBFIZ [arm64BFAuxInt(lc, 8-lc)] x)
|
||||
|
||||
// sbfx
|
||||
// (x << lc) >> rc
|
||||
(SRAconst [rc] (SLLconst [lc] x)) && lc <= rc -> (SBFX [arm64BFAuxInt(rc-lc, 64-rc)] x)
|
||||
(SRAconst [rc] (MOVWreg x)) && rc < 32 -> (SBFX [arm64BFAuxInt(rc, 32-rc)] x)
|
||||
(SRAconst [rc] (MOVHreg x)) && rc < 16 -> (SBFX [arm64BFAuxInt(rc, 16-rc)] x)
|
||||
(SRAconst [rc] (MOVBreg x)) && rc < 8 -> (SBFX [arm64BFAuxInt(rc, 8-rc)] x)
|
||||
|
||||
// sbfiz/sbfx combinations: merge shifts into bitfield ops
|
||||
(SRAconst [sc] (SBFIZ [bfc] x)) && sc < getARM64BFlsb(bfc)
|
||||
-> (SBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
|
||||
(SRAconst [sc] (SBFIZ [bfc] x)) && sc >= getARM64BFlsb(bfc)
|
||||
&& sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
|
||||
-> (SBFX [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
|
||||
|
||||
// ubfiz
|
||||
// (x & ac) << sc
|
||||
(SLLconst [sc] (ANDconst [ac] x)) && isARM64BFMask(sc, ac, 0)
|
||||
-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(ac, 0))] x)
|
||||
(SLLconst [sc] (MOVWUreg x)) && isARM64BFMask(sc, 1<<32-1, 0) -> (UBFIZ [arm64BFAuxInt(sc, 32)] x)
|
||||
(SLLconst [sc] (MOVHUreg x)) && isARM64BFMask(sc, 1<<16-1, 0) -> (UBFIZ [arm64BFAuxInt(sc, 16)] x)
|
||||
(SLLconst [sc] (MOVBUreg x)) && isARM64BFMask(sc, 1<<8-1, 0) -> (UBFIZ [arm64BFAuxInt(sc, 8)] x)
|
||||
// (x << sc) & ac
|
||||
(ANDconst [ac] (SLLconst [sc] x)) && isARM64BFMask(sc, ac, sc)
|
||||
-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(ac, sc))] x)
|
||||
(MOVWUreg (SLLconst [sc] x)) && isARM64BFMask(sc, 1<<32-1, sc)
|
||||
-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
|
||||
(MOVHUreg (SLLconst [sc] x)) && isARM64BFMask(sc, 1<<16-1, sc)
|
||||
-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
|
||||
(MOVBUreg (SLLconst [sc] x)) && isARM64BFMask(sc, 1<<8-1, sc)
|
||||
-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
|
||||
// (x << lc) >> rc
|
||||
(SRLconst [rc] (SLLconst [lc] x)) && lc > rc -> (UBFIZ [arm64BFAuxInt(lc-rc, 64-lc)] x)
|
||||
|
||||
// ubfx
|
||||
// (x >> sc) & ac
|
||||
(ANDconst [ac] (SRLconst [sc] x)) && isARM64BFMask(sc, ac, 0)
|
||||
-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(ac, 0))] x)
|
||||
(MOVWUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<32-1, 0) -> (UBFX [arm64BFAuxInt(sc, 32)] x)
|
||||
(MOVHUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<16-1, 0) -> (UBFX [arm64BFAuxInt(sc, 16)] x)
|
||||
(MOVBUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<8-1, 0) -> (UBFX [arm64BFAuxInt(sc, 8)] x)
|
||||
// (x & ac) >> sc
|
||||
(SRLconst [sc] (ANDconst [ac] x)) && isARM64BFMask(sc, ac, sc)
|
||||
-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(ac, sc))] x)
|
||||
(SRLconst [sc] (MOVWUreg x)) && isARM64BFMask(sc, 1<<32-1, sc)
|
||||
-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
|
||||
(SRLconst [sc] (MOVHUreg x)) && isARM64BFMask(sc, 1<<16-1, sc)
|
||||
-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
|
||||
(SRLconst [sc] (MOVBUreg x)) && isARM64BFMask(sc, 1<<8-1, sc)
|
||||
-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
|
||||
// (x << lc) >> rc
|
||||
(SRLconst [rc] (SLLconst [lc] x)) && lc < rc -> (UBFX [arm64BFAuxInt(rc-lc, 64-rc)] x)
|
||||
|
||||
// ubfiz/ubfx combinations: merge shifts into bitfield ops
|
||||
(SRLconst [sc] (UBFX [bfc] x)) && sc < getARM64BFwidth(bfc)
|
||||
-> (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)] x)
|
||||
(UBFX [bfc] (SRLconst [sc] x)) && sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64
|
||||
-> (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))] x)
|
||||
(SLLconst [sc] (UBFIZ [bfc] x)) && sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64
|
||||
-> (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))] x)
|
||||
(UBFIZ [bfc] (SLLconst [sc] x)) && sc < getARM64BFwidth(bfc)
|
||||
-> (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)] x)
|
||||
// ((x << c1) >> c2) >> c3
|
||||
(SRLconst [sc] (UBFIZ [bfc] x)) && sc == getARM64BFlsb(bfc)
|
||||
-> (ANDconst [1<<uint(getARM64BFwidth(bfc))-1] x)
|
||||
(SRLconst [sc] (UBFIZ [bfc] x)) && sc < getARM64BFlsb(bfc)
|
||||
-> (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
|
||||
(SRLconst [sc] (UBFIZ [bfc] x)) && sc > getARM64BFlsb(bfc)
|
||||
&& sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
|
||||
-> (UBFX [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
|
||||
// ((x << c1) << c2) >> c3
|
||||
(UBFX [bfc] (SLLconst [sc] x)) && sc == getARM64BFlsb(bfc)
|
||||
-> (ANDconst [1<<uint(getARM64BFwidth(bfc))-1] x)
|
||||
(UBFX [bfc] (SLLconst [sc] x)) && sc < getARM64BFlsb(bfc)
|
||||
-> (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
|
||||
(UBFX [bfc] (SLLconst [sc] x)) && sc > getARM64BFlsb(bfc)
|
||||
&& sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
|
||||
-> (UBFIZ [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
|
||||
|
||||
// bfi
|
||||
(OR (UBFIZ [bfc] x) (ANDconst [ac] y))
|
||||
&& ac == ^((1<<uint(getARM64BFwidth(bfc))-1) << uint(getARM64BFlsb(bfc)))
|
||||
-> (BFI [bfc] y x)
|
||||
(ORshiftRL [rc] (ANDconst [ac] x) (SLLconst [lc] y))
|
||||
&& lc > rc && ac == ^((1<<uint(64-lc)-1) << uint64(lc-rc))
|
||||
-> (BFI [arm64BFAuxInt(lc-rc, 64-lc)] x y)
|
||||
// bfxil
|
||||
(OR (UBFX [bfc] x) (ANDconst [ac] y)) && ac == ^(1<<uint(getARM64BFwidth(bfc))-1)
|
||||
-> (BFXIL [bfc] y x)
|
||||
(ORshiftLL [sc] (UBFX [bfc] x) (SRLconst [sc] y)) && sc == getARM64BFwidth(bfc)
|
||||
-> (BFXIL [bfc] y x)
|
||||
|
||||
// do combined loads
|
||||
// little endian loads
|
||||
// b[0] | b[1]<<8 -> load 16-bit
|
||||
|
@ -1510,12 +1631,12 @@
|
|||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
-> (MOVHstore [i-1] {s} ptr0 w mem)
|
||||
(MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVHUreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
|
||||
(MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
-> (MOVHstore [i-1] {s} ptr0 w mem)
|
||||
(MOVBstore [i] {s} ptr0 (SRLconst [8] (MOVWUreg w)) x:(MOVBstore [i-1] {s} ptr1 w mem))
|
||||
(MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
|
@ -1530,9 +1651,12 @@
|
|||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
-> (MOVHstore [i-1] {s} ptr0 w0 mem)
|
||||
(MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVWUreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVWUreg w)) mem))
|
||||
(MOVBstore [i] {s} ptr0 (UBFX [bfc] w) x:(MOVBstore [i-1] {s} ptr1 w0:(UBFX [bfc2] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& getARM64BFwidth(bfc) == 32 - getARM64BFlsb(bfc)
|
||||
&& getARM64BFwidth(bfc2) == 32 - getARM64BFlsb(bfc2)
|
||||
&& getARM64BFlsb(bfc2) == getARM64BFlsb(bfc) - 8
|
||||
&& clobber(x)
|
||||
-> (MOVHstore [i-1] {s} ptr0 w0 mem)
|
||||
(MOVBstore [i] {s} ptr0 (SRLconst [j] (MOVDreg w)) x:(MOVBstore [i-1] {s} ptr1 w0:(SRLconst [j-8] (MOVDreg w)) mem))
|
||||
|
@ -1545,7 +1669,7 @@
|
|||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
-> (MOVWstore [i-2] {s} ptr0 w mem)
|
||||
(MOVHstore [i] {s} ptr0 (SRLconst [16] (MOVWUreg w)) x:(MOVHstore [i-2] {s} ptr1 w mem))
|
||||
(MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
|
||||
&& x.Uses == 1
|
||||
&& isSamePtr(ptr0, ptr1)
|
||||
&& clobber(x)
|
||||
|
@ -1594,9 +1718,9 @@
|
|||
&& clobber(x6)
|
||||
-> (MOVDstore [i-7] {s} ptr (REV <w.Type> w) mem)
|
||||
(MOVBstore [i] {s} ptr w
|
||||
x0:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVWUreg w))
|
||||
x1:(MOVBstore [i-2] {s} ptr (SRLconst [16] (MOVWUreg w))
|
||||
x2:(MOVBstore [i-3] {s} ptr (SRLconst [24] (MOVWUreg w)) mem))))
|
||||
x0:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w)
|
||||
x1:(MOVBstore [i-2] {s} ptr (UBFX [arm64BFAuxInt(16, 16)] w)
|
||||
x2:(MOVBstore [i-3] {s} ptr (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
|
||||
&& x0.Uses == 1
|
||||
&& x1.Uses == 1
|
||||
&& x2.Uses == 1
|
||||
|
@ -1630,7 +1754,7 @@
|
|||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
|
||||
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVHUreg w)) mem))
|
||||
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 8)] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
|
||||
|
@ -1638,7 +1762,7 @@
|
|||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
|
||||
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (SRLconst [8] (MOVWUreg w)) mem))
|
||||
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) mem))
|
||||
&& x.Uses == 1
|
||||
&& clobber(x)
|
||||
-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
|
||||
|
|
|
@ -139,6 +139,7 @@ func init() {
|
|||
gp1flags = regInfo{inputs: []regMask{gpg}}
|
||||
gp1flags1 = regInfo{inputs: []regMask{gpg}, outputs: []regMask{gp}}
|
||||
gp21 = regInfo{inputs: []regMask{gpg, gpg}, outputs: []regMask{gp}}
|
||||
gp21nog = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
|
||||
gp2flags = regInfo{inputs: []regMask{gpg, gpg}}
|
||||
gp2flags1 = regInfo{inputs: []regMask{gp, gp}, outputs: []regMask{gp}}
|
||||
gpload = regInfo{inputs: []regMask{gpspsbg}, outputs: []regMask{gp}}
|
||||
|
@ -231,14 +232,16 @@ func init() {
|
|||
{name: "FNMSUBD", argLength: 3, reg: fp31, asm: "FNMSUBD"}, // -arg0 + (arg1 * arg2)
|
||||
|
||||
// shifts
|
||||
{name: "SLL", argLength: 2, reg: gp21, asm: "LSL"}, // arg0 << arg1, shift amount is mod 64
|
||||
{name: "SLLconst", argLength: 1, reg: gp11, asm: "LSL", aux: "Int64"}, // arg0 << auxInt
|
||||
{name: "SRL", argLength: 2, reg: gp21, asm: "LSR"}, // arg0 >> arg1, unsigned, shift amount is mod 64
|
||||
{name: "SRLconst", argLength: 1, reg: gp11, asm: "LSR", aux: "Int64"}, // arg0 >> auxInt, unsigned
|
||||
{name: "SRA", argLength: 2, reg: gp21, asm: "ASR"}, // arg0 >> arg1, signed, shift amount is mod 64
|
||||
{name: "SRAconst", argLength: 1, reg: gp11, asm: "ASR", aux: "Int64"}, // arg0 >> auxInt, signed
|
||||
{name: "RORconst", argLength: 1, reg: gp11, asm: "ROR", aux: "Int64"}, // arg0 right rotate by auxInt bits
|
||||
{name: "RORWconst", argLength: 1, reg: gp11, asm: "RORW", aux: "Int64"}, // uint32(arg0) right rotate by auxInt bits
|
||||
{name: "SLL", argLength: 2, reg: gp21, asm: "LSL"}, // arg0 << arg1, shift amount is mod 64
|
||||
{name: "SLLconst", argLength: 1, reg: gp11, asm: "LSL", aux: "Int64"}, // arg0 << auxInt
|
||||
{name: "SRL", argLength: 2, reg: gp21, asm: "LSR"}, // arg0 >> arg1, unsigned, shift amount is mod 64
|
||||
{name: "SRLconst", argLength: 1, reg: gp11, asm: "LSR", aux: "Int64"}, // arg0 >> auxInt, unsigned
|
||||
{name: "SRA", argLength: 2, reg: gp21, asm: "ASR"}, // arg0 >> arg1, signed, shift amount is mod 64
|
||||
{name: "SRAconst", argLength: 1, reg: gp11, asm: "ASR", aux: "Int64"}, // arg0 >> auxInt, signed
|
||||
{name: "RORconst", argLength: 1, reg: gp11, asm: "ROR", aux: "Int64"}, // arg0 right rotate by auxInt bits
|
||||
{name: "RORWconst", argLength: 1, reg: gp11, asm: "RORW", aux: "Int64"}, // uint32(arg0) right rotate by auxInt bits
|
||||
{name: "EXTRconst", argLength: 2, reg: gp21, asm: "EXTR", aux: "Int64"}, // extract 64 bits from arg0:arg1 starting at lsb auxInt
|
||||
{name: "EXTRWconst", argLength: 2, reg: gp21, asm: "EXTRW", aux: "Int64"}, // extract 32 bits from arg0[31:0]:arg1[31:0] starting at lsb auxInt and zero top 32 bits
|
||||
|
||||
// comparisons
|
||||
{name: "CMP", argLength: 2, reg: gp2flags, asm: "CMP", typ: "Flags"}, // arg0 compare to arg1
|
||||
|
@ -281,6 +284,21 @@ func init() {
|
|||
{name: "CMPshiftRL", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, unsigned shift
|
||||
{name: "CMPshiftRA", argLength: 2, reg: gp2flags, asm: "CMP", aux: "Int64", typ: "Flags"}, // arg0 compare to arg1>>auxInt, signed shift
|
||||
|
||||
// bitfield ops
|
||||
// for all bitfield ops lsb is auxInt>>8, width is auxInt&0xff
|
||||
// insert low width bits of arg1 into the result starting at bit lsb, copy other bits from arg0
|
||||
{name: "BFI", argLength: 2, reg: gp21nog, asm: "BFI", aux: "Int64", resultInArg0: true},
|
||||
// extract width bits of arg1 starting at bit lsb and insert at low end of result, copy other bits from arg0
|
||||
{name: "BFXIL", argLength: 2, reg: gp21nog, asm: "BFXIL", aux: "Int64", resultInArg0: true},
|
||||
// insert low width bits of arg0 into the result starting at bit lsb, bits to the left of the inserted bit field are set to the high/sign bit of the inserted bit field, bits to the right are zeroed
|
||||
{name: "SBFIZ", argLength: 1, reg: gp11, asm: "SBFIZ", aux: "Int64"},
|
||||
// extract width bits of arg0 starting at bit lsb and insert at low end of result, remaining high bits are set to the high/sign bit of the extracted bitfield
|
||||
{name: "SBFX", argLength: 1, reg: gp11, asm: "SBFX", aux: "Int64"},
|
||||
// insert low width bits of arg0 into the result starting at bit lsb, bits to the left and right of the inserted bit field are zeroed
|
||||
{name: "UBFIZ", argLength: 1, reg: gp11, asm: "UBFIZ", aux: "Int64"},
|
||||
// extract width bits of arg0 starting at bit lsb and insert at low end of result, remaining high bits are zeroed
|
||||
{name: "UBFX", argLength: 1, reg: gp11, asm: "UBFX", aux: "Int64"},
|
||||
|
||||
// moves
|
||||
{name: "MOVDconst", argLength: 0, reg: gp01, aux: "Int64", asm: "MOVD", typ: "UInt64", rematerializeable: true}, // 32 low bits of auxint
|
||||
{name: "FMOVSconst", argLength: 0, reg: fp01, aux: "Float64", asm: "FMOVS", typ: "Float32", rematerializeable: true}, // auxint as 64-bit float, convert to 32-bit float
|
||||
|
|
|
@ -1030,6 +1030,8 @@ const (
|
|||
OpARM64SRAconst
|
||||
OpARM64RORconst
|
||||
OpARM64RORWconst
|
||||
OpARM64EXTRconst
|
||||
OpARM64EXTRWconst
|
||||
OpARM64CMP
|
||||
OpARM64CMPconst
|
||||
OpARM64CMPW
|
||||
|
@ -1067,6 +1069,12 @@ const (
|
|||
OpARM64CMPshiftLL
|
||||
OpARM64CMPshiftRL
|
||||
OpARM64CMPshiftRA
|
||||
OpARM64BFI
|
||||
OpARM64BFXIL
|
||||
OpARM64SBFIZ
|
||||
OpARM64SBFX
|
||||
OpARM64UBFIZ
|
||||
OpARM64UBFX
|
||||
OpARM64MOVDconst
|
||||
OpARM64FMOVSconst
|
||||
OpARM64FMOVDconst
|
||||
|
@ -13167,6 +13175,36 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "EXTRconst",
|
||||
auxType: auxInt64,
|
||||
argLen: 2,
|
||||
asm: arm64.AEXTR,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "EXTRWconst",
|
||||
auxType: auxInt64,
|
||||
argLen: 2,
|
||||
asm: arm64.AEXTRW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
{1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CMP",
|
||||
argLen: 2,
|
||||
|
@ -13673,6 +13711,94 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BFI",
|
||||
auxType: auxInt64,
|
||||
argLen: 2,
|
||||
resultInArg0: true,
|
||||
asm: arm64.ABFI,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
{1, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BFXIL",
|
||||
auxType: auxInt64,
|
||||
argLen: 2,
|
||||
resultInArg0: true,
|
||||
asm: arm64.ABFXIL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
{1, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SBFIZ",
|
||||
auxType: auxInt64,
|
||||
argLen: 1,
|
||||
asm: arm64.ASBFIZ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SBFX",
|
||||
auxType: auxInt64,
|
||||
argLen: 1,
|
||||
asm: arm64.ASBFX,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "UBFIZ",
|
||||
auxType: auxInt64,
|
||||
argLen: 1,
|
||||
asm: arm64.AUBFIZ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "UBFX",
|
||||
auxType: auxInt64,
|
||||
argLen: 1,
|
||||
asm: arm64.AUBFX,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30
|
||||
},
|
||||
outputs: []outputInfo{
|
||||
{0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "MOVDconst",
|
||||
auxType: auxInt64,
|
||||
|
|
|
@ -837,3 +837,39 @@ func isInlinableMemmoveSize(sz int64, c *Config) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// encodes the lsb and width for arm64 bitfield ops into the expected auxInt format.
|
||||
func arm64BFAuxInt(lsb, width int64) int64 {
|
||||
if lsb < 0 || lsb > 63 {
|
||||
panic("ARM64 bit field lsb constant out of range")
|
||||
}
|
||||
if width < 1 || width > 64 {
|
||||
panic("ARM64 bit field width constant out of range")
|
||||
}
|
||||
return width | lsb<<8
|
||||
}
|
||||
|
||||
// returns the lsb part of the auxInt field of arm64 bitfield ops.
|
||||
func getARM64BFlsb(bfc int64) int64 {
|
||||
return int64(uint64(bfc) >> 8)
|
||||
}
|
||||
|
||||
// returns the width part of the auxInt field of arm64 bitfield ops.
|
||||
func getARM64BFwidth(bfc int64) int64 {
|
||||
return bfc & 0xff
|
||||
}
|
||||
|
||||
// checks if mask >> rshift applied at lsb is a valid arm64 bitfield op mask.
|
||||
func isARM64BFMask(lsb, mask, rshift int64) bool {
|
||||
shiftedMask := int64(uint64(mask) >> uint64(rshift))
|
||||
return shiftedMask != 0 && isPowerOfTwo(shiftedMask+1) && nto(shiftedMask)+lsb < 64
|
||||
}
|
||||
|
||||
// returns the bitfield width of mask >> rshift for arm64 bitfield ops
|
||||
func arm64BFWidth(mask, rshift int64) int64 {
|
||||
shiftedMask := int64(uint64(mask) >> uint64(rshift))
|
||||
if shiftedMask == 0 {
|
||||
panic("ARM64 BF mask is zero")
|
||||
}
|
||||
return nto(shiftedMask)
|
||||
}
|
||||
|
|
File diff suppressed because it is too large
Load diff
224
test/codegen/bitfield.go
Normal file
224
test/codegen/bitfield.go
Normal file
|
@ -0,0 +1,224 @@
|
|||
// asmcheck
|
||||
|
||||
// Copyright 2018 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package codegen
|
||||
|
||||
// This file contains codegen tests related to bit field
|
||||
// insertion/extraction simplifications/optimizations.
|
||||
|
||||
func extr1(x, x2 uint64) uint64 {
|
||||
return x<<7 + x2>>57 // arm64:"EXTR\t[$]57,"
|
||||
}
|
||||
|
||||
func extr2(x, x2 uint64) uint64 {
|
||||
return x<<7 | x2>>57 // arm64:"EXTR\t[$]57,"
|
||||
}
|
||||
|
||||
func extr3(x, x2 uint64) uint64 {
|
||||
return x<<7 ^ x2>>57 // arm64:"EXTR\t[$]57,"
|
||||
}
|
||||
|
||||
func extr4(x, x2 uint32) uint32 {
|
||||
return x<<7 + x2>>25 // arm64:"EXTRW\t[$]25,"
|
||||
}
|
||||
|
||||
func extr5(x, x2 uint32) uint32 {
|
||||
return x<<7 | x2>>25 // arm64:"EXTRW\t[$]25,"
|
||||
}
|
||||
|
||||
func extr6(x, x2 uint32) uint32 {
|
||||
return x<<7 ^ x2>>25 // arm64:"EXTRW\t[$]25,"
|
||||
}
|
||||
|
||||
// check 32-bit shift masking
|
||||
func mask32(x uint32) uint32 {
|
||||
return (x << 29) >> 29 // arm64:"AND\t[$]7, R[0-9]+",-"LSR",-"LSL"
|
||||
}
|
||||
|
||||
// check 16-bit shift masking
|
||||
func mask16(x uint16) uint16 {
|
||||
return (x << 14) >> 14 // arm64:"AND\t[$]3, R[0-9]+",-"LSR",-"LSL"
|
||||
}
|
||||
|
||||
// check 8-bit shift masking
|
||||
func mask8(x uint8) uint8 {
|
||||
return (x << 7) >> 7 // arm64:"AND\t[$]1, R[0-9]+",-"LSR",-"LSL"
|
||||
}
|
||||
|
||||
func maskshift(x uint64) uint64 {
|
||||
// arm64:"AND\t[$]4095, R[0-9]+",-"LSL",-"LSR",-"UBFIZ",-"UBFX"
|
||||
return ((x << 5) & (0xfff << 5)) >> 5
|
||||
}
|
||||
|
||||
// bitfield ops
|
||||
// bfi
|
||||
func bfi1(x, y uint64) uint64 {
|
||||
// arm64:"BFI\t[$]4, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
|
||||
return ((x & 0xfff) << 4) | (y & 0xffffffffffff000f)
|
||||
}
|
||||
|
||||
func bfi2(x, y uint64) uint64 {
|
||||
// arm64:"BFI\t[$]12, R[0-9]+, [$]40",-"LSL",-"LSR",-"AND"
|
||||
return (x << 24 >> 12) | (y & 0xfff0000000000fff)
|
||||
}
|
||||
|
||||
// bfxil
|
||||
func bfxil1(x, y uint64) uint64 {
|
||||
// arm64:"BFXIL\t[$]5, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
|
||||
return ((x >> 5) & 0xfff) | (y & 0xfffffffffffff000)
|
||||
}
|
||||
|
||||
func bfxil2(x, y uint64) uint64 {
|
||||
// arm64:"BFXIL\t[$]12, R[0-9]+, [$]40",-"LSL",-"LSR",-"AND"
|
||||
return (x << 12 >> 24) | (y & 0xffffff0000000000)
|
||||
}
|
||||
|
||||
// sbfiz
|
||||
func sbfiz1(x int64) int64 {
|
||||
// arm64:"SBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"ASR"
|
||||
return (x << 4) >> 3
|
||||
}
|
||||
|
||||
func sbfiz2(x int32) int64 {
|
||||
return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]29",-"LSL"
|
||||
}
|
||||
|
||||
func sbfiz3(x int16) int64 {
|
||||
return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]13",-"LSL"
|
||||
}
|
||||
|
||||
func sbfiz4(x int8) int64 {
|
||||
return int64(x << 3) // arm64:"SBFIZ\t[$]3, R[0-9]+, [$]5",-"LSL"
|
||||
}
|
||||
|
||||
func sbfiz5(x int32) int32 {
|
||||
// arm64:"SBFIZ\t[$]1, R[0-9]+, [$]28",-"LSL",-"ASR"
|
||||
return (x << 4) >> 3
|
||||
}
|
||||
|
||||
// sbfx
|
||||
func sbfx1(x int64) int64 {
|
||||
return (x << 3) >> 4 // arm64:"SBFX\t[$]1, R[0-9]+, [$]60",-"LSL",-"ASR"
|
||||
}
|
||||
|
||||
func sbfx2(x int64) int64 {
|
||||
return (x << 60) >> 60 // arm64:"SBFX\tZR, R[0-9]+, [$]4",-"LSL",-"ASR"
|
||||
}
|
||||
|
||||
func sbfx3(x int32) int64 {
|
||||
return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]29",-"ASR"
|
||||
}
|
||||
|
||||
func sbfx4(x int16) int64 {
|
||||
return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]13",-"ASR"
|
||||
}
|
||||
|
||||
func sbfx5(x int8) int64 {
|
||||
return int64(x) >> 3 // arm64:"SBFX\t[$]3, R[0-9]+, [$]5",-"ASR"
|
||||
}
|
||||
|
||||
func sbfx6(x int32) int32 {
|
||||
return (x << 3) >> 4 // arm64:"SBFX\t[$]1, R[0-9]+, [$]28",-"LSL",-"ASR"
|
||||
}
|
||||
|
||||
// ubfiz
|
||||
func ubfiz1(x uint64) uint64 {
|
||||
// arm64:"UBFIZ\t[$]3, R[0-9]+, [$]12",-"LSL",-"AND"
|
||||
return (x & 0xfff) << 3
|
||||
}
|
||||
|
||||
func ubfiz2(x uint64) uint64 {
|
||||
// arm64:"UBFIZ\t[$]4, R[0-9]+, [$]12",-"LSL",-"AND"
|
||||
return (x << 4) & 0xfff0
|
||||
}
|
||||
|
||||
func ubfiz3(x uint32) uint64 {
|
||||
return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]32",-"LSL"
|
||||
}
|
||||
|
||||
func ubfiz4(x uint16) uint64 {
|
||||
return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]16",-"LSL"
|
||||
}
|
||||
|
||||
func ubfiz5(x uint8) uint64 {
|
||||
return uint64(x+1) << 3 // arm64:"UBFIZ\t[$]3, R[0-9]+, [$]8",-"LSL"
|
||||
}
|
||||
|
||||
func ubfiz6(x uint64) uint64 {
|
||||
// arm64:"UBFIZ\t[$]1, R[0-9]+, [$]60",-"LSL",-"LSR"
|
||||
return (x << 4) >> 3
|
||||
}
|
||||
|
||||
func ubfiz7(x uint32) uint32 {
|
||||
// arm64:"UBFIZ\t[$]1, R[0-9]+, [$]28",-"LSL",-"LSR"
|
||||
return (x << 4) >> 3
|
||||
}
|
||||
|
||||
func ubfiz8(x uint64) uint64 {
|
||||
// arm64:"UBFIZ\t[$]1, R[0-9]+, [$]20",-"LSL",-"LSR"
|
||||
return ((x & 0xfffff) << 4) >> 3
|
||||
}
|
||||
|
||||
func ubfiz9(x uint64) uint64 {
|
||||
// arm64:"UBFIZ\t[$]5, R[0-9]+, [$]13",-"LSL",-"LSR",-"AND"
|
||||
return ((x << 3) & 0xffff) << 2
|
||||
}
|
||||
|
||||
func ubfiz10(x uint64) uint64 {
|
||||
// arm64:"UBFIZ\t[$]7, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
|
||||
return ((x << 5) & (0xfff << 5)) << 2
|
||||
}
|
||||
|
||||
// ubfx
|
||||
func ubfx1(x uint64) uint64 {
|
||||
// arm64:"UBFX\t[$]25, R[0-9]+, [$]10",-"LSR",-"AND"
|
||||
return (x >> 25) & 1023
|
||||
}
|
||||
|
||||
func ubfx2(x uint64) uint64 {
|
||||
// arm64:"UBFX\t[$]4, R[0-9]+, [$]8",-"LSR",-"AND"
|
||||
return (x & 0x0ff0) >> 4
|
||||
}
|
||||
|
||||
func ubfx3(x uint32) uint64 {
|
||||
return uint64(x >> 15) // arm64:"UBFX\t[$]15, R[0-9]+, [$]17",-"LSR"
|
||||
}
|
||||
|
||||
func ubfx4(x uint16) uint64 {
|
||||
return uint64(x >> 9) // arm64:"UBFX\t[$]9, R[0-9]+, [$]7",-"LSR"
|
||||
}
|
||||
|
||||
func ubfx5(x uint8) uint64 {
|
||||
return uint64(x >> 3) // arm64:"UBFX\t[$]3, R[0-9]+, [$]5",-"LSR"
|
||||
}
|
||||
|
||||
func ubfx6(x uint64) uint64 {
|
||||
return (x << 1) >> 2 // arm64:"UBFX\t[$]1, R[0-9]+, [$]62",-"LSL",-"LSR"
|
||||
}
|
||||
|
||||
func ubfx7(x uint32) uint32 {
|
||||
return (x << 1) >> 2 // arm64:"UBFX\t[$]1, R[0-9]+, [$]30",-"LSL",-"LSR"
|
||||
}
|
||||
|
||||
func ubfx8(x uint64) uint64 {
|
||||
// arm64:"UBFX\t[$]1, R[0-9]+, [$]12",-"LSL",-"LSR",-"AND"
|
||||
return ((x << 1) >> 2) & 0xfff
|
||||
}
|
||||
|
||||
func ubfx9(x uint64) uint64 {
|
||||
// arm64:"UBFX\t[$]4, R[0-9]+, [$]11",-"LSL",-"LSR",-"AND"
|
||||
return ((x >> 3) & 0xfff) >> 1
|
||||
}
|
||||
|
||||
func ubfx10(x uint64) uint64 {
|
||||
// arm64:"UBFX\t[$]5, R[0-9]+, [$]56",-"LSL",-"LSR"
|
||||
return ((x >> 2) << 5) >> 8
|
||||
}
|
||||
|
||||
func ubfx11(x uint64) uint64 {
|
||||
// arm64:"UBFX\t[$]1, R[0-9]+, [$]19",-"LSL",-"LSR"
|
||||
return ((x & 0xfffff) << 3) >> 4
|
||||
}
|
Loading…
Reference in a new issue