cmd/compile: add an optimization rule for math/bits.ReverseBytes16 on arm

This CL adds two rules to turn patterns like ((x<<8) | (x>>8)) (the type of
x is uint16, "|" can also be "+" or "^") to a REV16 instruction on arm v6+.
This optimization rule can be used for math/bits.ReverseBytes16.

Benchmarks on arm v6:
name               old time/op  new time/op  delta
ReverseBytes-32    2.86ns ± 0%  2.86ns ± 0%   ~     (all equal)
ReverseBytes16-32  2.86ns ± 0%  2.86ns ± 0%   ~     (all equal)
ReverseBytes32-32  1.29ns ± 0%  1.29ns ± 0%   ~     (all equal)
ReverseBytes64-32  1.43ns ± 0%  1.43ns ± 0%   ~     (all equal)

Change-Id: I819e633c9a9d308f8e476fb0c82d73fb73dd019f
Reviewed-on: https://go-review.googlesource.com/c/go/+/159019
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
erifan01 2019-02-11 09:40:02 +00:00 committed by Cherry Zhang
parent a2ace8ec18
commit fee84cc905
9 changed files with 445 additions and 210 deletions

View file

@ -659,6 +659,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
case ssa.OpARMMVN,
ssa.OpARMCLZ,
ssa.OpARMREV,
ssa.OpARMREV16,
ssa.OpARMRBIT,
ssa.OpARMSQRTD,
ssa.OpARMNEGF,

View file

@ -1216,6 +1216,12 @@
( ORshiftRL [c] (SLLconst x [32-c]) x) -> (SRRconst [ c] x)
(XORshiftRL [c] (SLLconst x [32-c]) x) -> (SRRconst [ c] x)
// ((x>>8) | (x<<8)) -> (REV16 x), the type of x is uint16, "|" can also be "^" or "+".
// UBFX instruction is supported by ARMv6T2, ARMv7 and above versions, REV16 is supported by
// ARMv6 and above versions. So for ARMv6, we need to match SLLconst, SRLconst and ORshiftLL.
((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (BFXU <typ.UInt16> [armBFAuxInt(8, 8)] x) x) -> (REV16 x)
((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (SRLconst <typ.UInt16> [24] (SLLconst [16] x)) x) && objabi.GOARM>=6 -> (REV16 x)
// use indexed loads and stores
(MOVWload [0] {sym} (ADD ptr idx) mem) && sym == nil && !config.nacl -> (MOVWloadidx ptr idx mem)
(MOVWstore [0] {sym} (ADD ptr idx) val mem) && sym == nil && !config.nacl -> (MOVWstoreidx ptr idx val mem)

View file

@ -1751,11 +1751,11 @@
( ORshiftRL [c] (SLLconst x [64-c]) x) -> (RORconst [ c] x)
(XORshiftRL [c] (SLLconst x [64-c]) x) -> (RORconst [ c] x)
(ADDshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
(ADDshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
-> (RORWconst [32-c] x)
( ORshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
( ORshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
-> (RORWconst [32-c] x)
(XORshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
(XORshiftLL <t> [c] (UBFX [bfc] x) x) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
-> (RORWconst [32-c] x)
(ADDshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
( ORshiftRL <t> [c] (SLLconst x [32-c]) (MOVWUreg x)) && c < 32 && t.Size() == 4 -> (RORWconst [c] x)
@ -1794,18 +1794,18 @@
-> (RORW x y)
// ((x>>8) | (x<<8)) -> (REV16W x), the type of x is uint16, "|" can also be "^" or "+".
((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (UBFX <typ.UInt16> [arm64BFAuxInt(8, 8)] x) x) -> (REV16W x)
((ADDshiftLL|ORshiftLL|XORshiftLL) <typ.UInt16> [8] (UBFX <typ.UInt16> [armBFAuxInt(8, 8)] x) x) -> (REV16W x)
// Extract from reg pair
(ADDshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
( ORshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
(XORshiftLL [c] (SRLconst x [64-c]) x2) -> (EXTRconst [64-c] x2 x)
(ADDshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
(ADDshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
-> (EXTRWconst [32-c] x2 x)
( ORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
( ORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
-> (EXTRWconst [32-c] x2 x)
(XORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == arm64BFAuxInt(32-c, c)
(XORshiftLL <t> [c] (UBFX [bfc] x) x2) && c < 32 && t.Size() == 4 && bfc == armBFAuxInt(32-c, c)
-> (EXTRWconst [32-c] x2 x)
// Generic rules rewrite certain AND to a pair of shifts.
@ -1821,88 +1821,88 @@
// sbfiz
// (x << lc) >> rc
(SRAconst [rc] (SLLconst [lc] x)) && lc > rc -> (SBFIZ [arm64BFAuxInt(lc-rc, 64-lc)] x)
(MOVWreg (SLLconst [lc] x)) && lc < 32 -> (SBFIZ [arm64BFAuxInt(lc, 32-lc)] x)
(MOVHreg (SLLconst [lc] x)) && lc < 16 -> (SBFIZ [arm64BFAuxInt(lc, 16-lc)] x)
(MOVBreg (SLLconst [lc] x)) && lc < 8 -> (SBFIZ [arm64BFAuxInt(lc, 8-lc)] x)
(SRAconst [rc] (SLLconst [lc] x)) && lc > rc -> (SBFIZ [armBFAuxInt(lc-rc, 64-lc)] x)
(MOVWreg (SLLconst [lc] x)) && lc < 32 -> (SBFIZ [armBFAuxInt(lc, 32-lc)] x)
(MOVHreg (SLLconst [lc] x)) && lc < 16 -> (SBFIZ [armBFAuxInt(lc, 16-lc)] x)
(MOVBreg (SLLconst [lc] x)) && lc < 8 -> (SBFIZ [armBFAuxInt(lc, 8-lc)] x)
// sbfx
// (x << lc) >> rc
(SRAconst [rc] (SLLconst [lc] x)) && lc <= rc -> (SBFX [arm64BFAuxInt(rc-lc, 64-rc)] x)
(SRAconst [rc] (MOVWreg x)) && rc < 32 -> (SBFX [arm64BFAuxInt(rc, 32-rc)] x)
(SRAconst [rc] (MOVHreg x)) && rc < 16 -> (SBFX [arm64BFAuxInt(rc, 16-rc)] x)
(SRAconst [rc] (MOVBreg x)) && rc < 8 -> (SBFX [arm64BFAuxInt(rc, 8-rc)] x)
(SRAconst [rc] (SLLconst [lc] x)) && lc <= rc -> (SBFX [armBFAuxInt(rc-lc, 64-rc)] x)
(SRAconst [rc] (MOVWreg x)) && rc < 32 -> (SBFX [armBFAuxInt(rc, 32-rc)] x)
(SRAconst [rc] (MOVHreg x)) && rc < 16 -> (SBFX [armBFAuxInt(rc, 16-rc)] x)
(SRAconst [rc] (MOVBreg x)) && rc < 8 -> (SBFX [armBFAuxInt(rc, 8-rc)] x)
// sbfiz/sbfx combinations: merge shifts into bitfield ops
(SRAconst [sc] (SBFIZ [bfc] x)) && sc < getARM64BFlsb(bfc)
-> (SBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
-> (SBFIZ [armBFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
(SRAconst [sc] (SBFIZ [bfc] x)) && sc >= getARM64BFlsb(bfc)
&& sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
-> (SBFX [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
-> (SBFX [armBFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
// ubfiz
// (x & ac) << sc
(SLLconst [sc] (ANDconst [ac] x)) && isARM64BFMask(sc, ac, 0)
-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(ac, 0))] x)
(SLLconst [sc] (MOVWUreg x)) && isARM64BFMask(sc, 1<<32-1, 0) -> (UBFIZ [arm64BFAuxInt(sc, 32)] x)
(SLLconst [sc] (MOVHUreg x)) && isARM64BFMask(sc, 1<<16-1, 0) -> (UBFIZ [arm64BFAuxInt(sc, 16)] x)
(SLLconst [sc] (MOVBUreg x)) && isARM64BFMask(sc, 1<<8-1, 0) -> (UBFIZ [arm64BFAuxInt(sc, 8)] x)
-> (UBFIZ [armBFAuxInt(sc, arm64BFWidth(ac, 0))] x)
(SLLconst [sc] (MOVWUreg x)) && isARM64BFMask(sc, 1<<32-1, 0) -> (UBFIZ [armBFAuxInt(sc, 32)] x)
(SLLconst [sc] (MOVHUreg x)) && isARM64BFMask(sc, 1<<16-1, 0) -> (UBFIZ [armBFAuxInt(sc, 16)] x)
(SLLconst [sc] (MOVBUreg x)) && isARM64BFMask(sc, 1<<8-1, 0) -> (UBFIZ [armBFAuxInt(sc, 8)] x)
// (x << sc) & ac
(ANDconst [ac] (SLLconst [sc] x)) && isARM64BFMask(sc, ac, sc)
-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(ac, sc))] x)
-> (UBFIZ [armBFAuxInt(sc, arm64BFWidth(ac, sc))] x)
(MOVWUreg (SLLconst [sc] x)) && isARM64BFMask(sc, 1<<32-1, sc)
-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
-> (UBFIZ [armBFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
(MOVHUreg (SLLconst [sc] x)) && isARM64BFMask(sc, 1<<16-1, sc)
-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
-> (UBFIZ [armBFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
(MOVBUreg (SLLconst [sc] x)) && isARM64BFMask(sc, 1<<8-1, sc)
-> (UBFIZ [arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
-> (UBFIZ [armBFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
// (x << lc) >> rc
(SRLconst [rc] (SLLconst [lc] x)) && lc > rc -> (UBFIZ [arm64BFAuxInt(lc-rc, 64-lc)] x)
(SRLconst [rc] (SLLconst [lc] x)) && lc > rc -> (UBFIZ [armBFAuxInt(lc-rc, 64-lc)] x)
// ubfx
// (x >> sc) & ac
(ANDconst [ac] (SRLconst [sc] x)) && isARM64BFMask(sc, ac, 0)
-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(ac, 0))] x)
(MOVWUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<32-1, 0) -> (UBFX [arm64BFAuxInt(sc, 32)] x)
(MOVHUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<16-1, 0) -> (UBFX [arm64BFAuxInt(sc, 16)] x)
(MOVBUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<8-1, 0) -> (UBFX [arm64BFAuxInt(sc, 8)] x)
-> (UBFX [armBFAuxInt(sc, arm64BFWidth(ac, 0))] x)
(MOVWUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<32-1, 0) -> (UBFX [armBFAuxInt(sc, 32)] x)
(MOVHUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<16-1, 0) -> (UBFX [armBFAuxInt(sc, 16)] x)
(MOVBUreg (SRLconst [sc] x)) && isARM64BFMask(sc, 1<<8-1, 0) -> (UBFX [armBFAuxInt(sc, 8)] x)
// (x & ac) >> sc
(SRLconst [sc] (ANDconst [ac] x)) && isARM64BFMask(sc, ac, sc)
-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(ac, sc))] x)
-> (UBFX [armBFAuxInt(sc, arm64BFWidth(ac, sc))] x)
(SRLconst [sc] (MOVWUreg x)) && isARM64BFMask(sc, 1<<32-1, sc)
-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
-> (UBFX [armBFAuxInt(sc, arm64BFWidth(1<<32-1, sc))] x)
(SRLconst [sc] (MOVHUreg x)) && isARM64BFMask(sc, 1<<16-1, sc)
-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
-> (UBFX [armBFAuxInt(sc, arm64BFWidth(1<<16-1, sc))] x)
(SRLconst [sc] (MOVBUreg x)) && isARM64BFMask(sc, 1<<8-1, sc)
-> (UBFX [arm64BFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
-> (UBFX [armBFAuxInt(sc, arm64BFWidth(1<<8-1, sc))] x)
// (x << lc) >> rc
(SRLconst [rc] (SLLconst [lc] x)) && lc < rc -> (UBFX [arm64BFAuxInt(rc-lc, 64-rc)] x)
(SRLconst [rc] (SLLconst [lc] x)) && lc < rc -> (UBFX [armBFAuxInt(rc-lc, 64-rc)] x)
// ubfiz/ubfx combinations: merge shifts into bitfield ops
(SRLconst [sc] (UBFX [bfc] x)) && sc < getARM64BFwidth(bfc)
-> (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)] x)
-> (UBFX [armBFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)] x)
(UBFX [bfc] (SRLconst [sc] x)) && sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64
-> (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))] x)
-> (UBFX [armBFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))] x)
(SLLconst [sc] (UBFIZ [bfc] x)) && sc+getARM64BFwidth(bfc)+getARM64BFlsb(bfc) < 64
-> (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))] x)
-> (UBFIZ [armBFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc))] x)
(UBFIZ [bfc] (SLLconst [sc] x)) && sc < getARM64BFwidth(bfc)
-> (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)] x)
-> (UBFIZ [armBFAuxInt(getARM64BFlsb(bfc)+sc, getARM64BFwidth(bfc)-sc)] x)
// ((x << c1) >> c2) >> c3
(SRLconst [sc] (UBFIZ [bfc] x)) && sc == getARM64BFlsb(bfc)
-> (ANDconst [1<<uint(getARM64BFwidth(bfc))-1] x)
(SRLconst [sc] (UBFIZ [bfc] x)) && sc < getARM64BFlsb(bfc)
-> (UBFIZ [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
-> (UBFIZ [armBFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
(SRLconst [sc] (UBFIZ [bfc] x)) && sc > getARM64BFlsb(bfc)
&& sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
-> (UBFX [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
-> (UBFX [armBFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
// ((x << c1) << c2) >> c3
(UBFX [bfc] (SLLconst [sc] x)) && sc == getARM64BFlsb(bfc)
-> (ANDconst [1<<uint(getARM64BFwidth(bfc))-1] x)
(UBFX [bfc] (SLLconst [sc] x)) && sc < getARM64BFlsb(bfc)
-> (UBFX [arm64BFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
-> (UBFX [armBFAuxInt(getARM64BFlsb(bfc)-sc, getARM64BFwidth(bfc))] x)
(UBFX [bfc] (SLLconst [sc] x)) && sc > getARM64BFlsb(bfc)
&& sc < getARM64BFlsb(bfc)+getARM64BFwidth(bfc)
-> (UBFIZ [arm64BFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
-> (UBFIZ [armBFAuxInt(sc-getARM64BFlsb(bfc), getARM64BFlsb(bfc)+getARM64BFwidth(bfc)-sc)] x)
// bfi
(OR (UBFIZ [bfc] x) (ANDconst [ac] y))
@ -1910,7 +1910,7 @@
-> (BFI [bfc] y x)
(ORshiftRL [rc] (ANDconst [ac] x) (SLLconst [lc] y))
&& lc > rc && ac == ^((1<<uint(64-lc)-1) << uint64(lc-rc))
-> (BFI [arm64BFAuxInt(lc-rc, 64-lc)] x y)
-> (BFI [armBFAuxInt(lc-rc, 64-lc)] x y)
// bfxil
(OR (UBFX [bfc] x) (ANDconst [ac] y)) && ac == ^(1<<uint(getARM64BFwidth(bfc))-1)
-> (BFXIL [bfc] y x)
@ -2560,23 +2560,23 @@
&& x.Uses == 1
&& clobber(x)
-> (MOVHstoreidx ptr idx w mem)
(MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
(MOVBstore [i] {s} ptr0 (UBFX [armBFAuxInt(8, 8)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
&& clobber(x)
-> (MOVHstore [i-1] {s} ptr0 w mem)
(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(8, 8)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr1 idx1 w mem)
(MOVBstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
(MOVBstore [i] {s} ptr0 (UBFX [armBFAuxInt(8, 24)] w) x:(MOVBstore [i-1] {s} ptr1 w mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
&& clobber(x)
-> (MOVHstore [i-1] {s} ptr0 w mem)
(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
(MOVBstore [1] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(8, 24)] w) x:(MOVBstoreidx ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
@ -2653,18 +2653,18 @@
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
&& clobber(x)
-> (MOVWstoreidx ptr1 (SLLconst <idx1.Type> [1] idx1) w mem)
(MOVHstore [i] {s} ptr0 (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
(MOVHstore [i] {s} ptr0 (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstore [i-2] {s} ptr1 w mem))
&& x.Uses == 1
&& isSamePtr(ptr0, ptr1)
&& clobber(x)
-> (MOVWstore [i-2] {s} ptr0 w mem)
(MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
(MOVHstore [2] {s} (ADD ptr0 idx0) (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstoreidx ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVWstoreidx ptr1 idx1 w mem)
(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (UBFX [arm64BFAuxInt(16, 16)] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
(MOVHstore [2] {s} (ADDshiftLL [1] ptr0 idx0) (UBFX [armBFAuxInt(16, 16)] w) x:(MOVHstoreidx2 ptr1 idx1 w mem))
&& x.Uses == 1
&& s == nil
&& isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1)
@ -2792,9 +2792,9 @@
&& clobber(x6)
-> (MOVDstoreidx ptr0 idx0 (REV <w.Type> w) mem)
(MOVBstore [i] {s} ptr w
x0:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w)
x1:(MOVBstore [i-2] {s} ptr (UBFX [arm64BFAuxInt(16, 16)] w)
x2:(MOVBstore [i-3] {s} ptr (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
x0:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w)
x1:(MOVBstore [i-2] {s} ptr (UBFX [armBFAuxInt(16, 16)] w)
x2:(MOVBstore [i-3] {s} ptr (UBFX [armBFAuxInt(24, 8)] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
@ -2803,9 +2803,9 @@
&& clobber(x2)
-> (MOVWstore [i-3] {s} ptr (REVW <w.Type> w) mem)
(MOVBstore [3] {s} p w
x0:(MOVBstore [2] {s} p (UBFX [arm64BFAuxInt(8, 24)] w)
x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [arm64BFAuxInt(16, 16)] w)
x2:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
x0:(MOVBstore [2] {s} p (UBFX [armBFAuxInt(8, 24)] w)
x1:(MOVBstore [1] {s} p1:(ADD ptr1 idx1) (UBFX [armBFAuxInt(16, 16)] w)
x2:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(24, 8)] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
@ -2817,9 +2817,9 @@
&& clobber(x2)
-> (MOVWstoreidx ptr0 idx0 (REVW <w.Type> w) mem)
(MOVBstoreidx ptr (ADDconst [3] idx) w
x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [arm64BFAuxInt(8, 24)] w)
x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(16, 16)] w)
x2:(MOVBstoreidx ptr idx (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
x0:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(8, 24)] w)
x1:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(16, 16)] w)
x2:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(24, 8)] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
@ -2828,9 +2828,9 @@
&& clobber(x2)
-> (MOVWstoreidx ptr idx (REVW <w.Type> w) mem)
(MOVBstoreidx ptr idx w
x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(8, 24)] w)
x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [arm64BFAuxInt(16, 16)] w)
x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [arm64BFAuxInt(24, 8)] w) mem))))
x0:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 24)] w)
x1:(MOVBstoreidx ptr (ADDconst [2] idx) (UBFX [armBFAuxInt(16, 16)] w)
x2:(MOVBstoreidx ptr (ADDconst [3] idx) (UBFX [armBFAuxInt(24, 8)] w) mem))))
&& x0.Uses == 1
&& x1.Uses == 1
&& x2.Uses == 1
@ -2898,21 +2898,21 @@
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 8)] w) mem))
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 8)] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 8)] w) mem))
(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(8, 8)] w) mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
(MOVBstoreidx ptr (ADDconst [1] idx) w x:(MOVBstoreidx ptr idx (UBFX [arm64BFAuxInt(8, 8)] w) mem))
(MOVBstoreidx ptr (ADDconst [1] idx) w x:(MOVBstoreidx ptr idx (UBFX [armBFAuxInt(8, 8)] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVHstoreidx ptr idx (REV16W <w.Type> w) mem)
(MOVBstoreidx ptr idx w x:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [arm64BFAuxInt(8, 8)] w) mem))
(MOVBstoreidx ptr idx w x:(MOVBstoreidx ptr (ADDconst [1] idx) (UBFX [armBFAuxInt(8, 8)] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVHstoreidx ptr idx w mem)
@ -2926,11 +2926,11 @@
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))
&& clobber(x)
-> (MOVHstoreidx ptr0 idx0 (REV16W <w.Type> w) mem)
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [arm64BFAuxInt(8, 24)] w) mem))
(MOVBstore [i] {s} ptr w x:(MOVBstore [i-1] {s} ptr (UBFX [armBFAuxInt(8, 24)] w) mem))
&& x.Uses == 1
&& clobber(x)
-> (MOVHstore [i-1] {s} ptr (REV16W <w.Type> w) mem)
(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [arm64BFAuxInt(8, 24)] w) mem))
(MOVBstore [1] {s} (ADD ptr1 idx1) w x:(MOVBstoreidx ptr0 idx0 (UBFX [armBFAuxInt(8, 24)] w) mem))
&& x.Uses == 1
&& s == nil
&& (isSamePtr(ptr0, ptr1) && isSamePtr(idx0, idx1) || isSamePtr(ptr0, idx1) && isSamePtr(idx0, ptr1))

View file

@ -207,9 +207,10 @@ func init() {
{name: "NEGD", argLength: 1, reg: fp11, asm: "NEGD"}, // -arg0, float64
{name: "SQRTD", argLength: 1, reg: fp11, asm: "SQRTD"}, // sqrt(arg0), float64
{name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zero
{name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // reverse byte order
{name: "RBIT", argLength: 1, reg: gp11, asm: "RBIT"}, // reverse bit order
{name: "CLZ", argLength: 1, reg: gp11, asm: "CLZ"}, // count leading zero
{name: "REV", argLength: 1, reg: gp11, asm: "REV"}, // reverse byte order
{name: "REV16", argLength: 1, reg: gp11, asm: "REV16"}, // reverse byte order in 16-bit halfwords
{name: "RBIT", argLength: 1, reg: gp11, asm: "RBIT"}, // reverse bit order
// shifts
{name: "SLL", argLength: 2, reg: gp21, asm: "SLL"}, // arg0 << arg1, shift amount is mod 256

View file

@ -907,6 +907,7 @@ const (
OpARMSQRTD
OpARMCLZ
OpARMREV
OpARMREV16
OpARMRBIT
OpARMSLL
OpARMSLLconst
@ -12036,6 +12037,19 @@ var opcodeTable = [...]opInfo{
},
},
},
{
name: "REV16",
argLen: 1,
asm: arm.AREV16,
reg: regInfo{
inputs: []inputInfo{
{0, 22527}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 g R12 R14
},
outputs: []outputInfo{
{0, 21503}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14
},
},
},
{
name: "RBIT",
argLen: 1,

View file

@ -1037,13 +1037,13 @@ func isInlinableMemmove(dst, src *Value, sz int64, c *Config) bool {
return false
}
// encodes the lsb and width for arm64 bitfield ops into the expected auxInt format.
func arm64BFAuxInt(lsb, width int64) int64 {
// encodes the lsb and width for arm(64) bitfield ops into the expected auxInt format.
func armBFAuxInt(lsb, width int64) int64 {
if lsb < 0 || lsb > 63 {
panic("ARM64 bit field lsb constant out of range")
panic("ARM(64) bit field lsb constant out of range")
}
if width < 1 || width > 64 {
panic("ARM64 bit field width constant out of range")
panic("ARM(64) bit field width constant out of range")
}
return width | lsb<<8
}

View file

@ -2933,6 +2933,8 @@ func rewriteValueARM_OpARMADDconst_0(v *Value) bool {
func rewriteValueARM_OpARMADDshiftLL_0(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (ADDshiftLL (MOVWconst [c]) x [d])
// cond:
// result: (ADDconst [c] (SLLconst <x.Type> x [d]))
@ -2992,6 +2994,74 @@ func rewriteValueARM_OpARMADDshiftLL_0(v *Value) bool {
v.AddArg(x)
return true
}
// match: (ADDshiftLL <typ.UInt16> [8] (BFXU <typ.UInt16> [armBFAuxInt(8, 8)] x) x)
// cond:
// result: (REV16 x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARMBFXU {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != armBFAuxInt(8, 8) {
break
}
x := v_0.Args[0]
if x != v.Args[1] {
break
}
v.reset(OpARMREV16)
v.AddArg(x)
return true
}
// match: (ADDshiftLL <typ.UInt16> [8] (SRLconst <typ.UInt16> [24] (SLLconst [16] x)) x)
// cond: objabi.GOARM>=6
// result: (REV16 x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARMSRLconst {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != 24 {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpARMSLLconst {
break
}
if v_0_0.AuxInt != 16 {
break
}
x := v_0_0.Args[0]
if x != v.Args[1] {
break
}
if !(objabi.GOARM >= 6) {
break
}
v.reset(OpARMREV16)
v.AddArg(x)
return true
}
return false
}
func rewriteValueARM_OpARMADDshiftLLreg_0(v *Value) bool {
@ -11952,6 +12022,8 @@ func rewriteValueARM_OpARMORconst_0(v *Value) bool {
func rewriteValueARM_OpARMORshiftLL_0(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (ORshiftLL (MOVWconst [c]) x [d])
// cond:
// result: (ORconst [c] (SLLconst <x.Type> x [d]))
@ -12011,6 +12083,74 @@ func rewriteValueARM_OpARMORshiftLL_0(v *Value) bool {
v.AddArg(x)
return true
}
// match: (ORshiftLL <typ.UInt16> [8] (BFXU <typ.UInt16> [armBFAuxInt(8, 8)] x) x)
// cond:
// result: (REV16 x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARMBFXU {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != armBFAuxInt(8, 8) {
break
}
x := v_0.Args[0]
if x != v.Args[1] {
break
}
v.reset(OpARMREV16)
v.AddArg(x)
return true
}
// match: (ORshiftLL <typ.UInt16> [8] (SRLconst <typ.UInt16> [24] (SLLconst [16] x)) x)
// cond: objabi.GOARM>=6
// result: (REV16 x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARMSRLconst {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != 24 {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpARMSLLconst {
break
}
if v_0_0.AuxInt != 16 {
break
}
x := v_0_0.Args[0]
if x != v.Args[1] {
break
}
if !(objabi.GOARM >= 6) {
break
}
v.reset(OpARMREV16)
v.AddArg(x)
return true
}
// match: (ORshiftLL x y:(SLLconst x [c]) [d])
// cond: c==d
// result: y
@ -17230,6 +17370,8 @@ func rewriteValueARM_OpARMXORconst_0(v *Value) bool {
func rewriteValueARM_OpARMXORshiftLL_0(v *Value) bool {
b := v.Block
_ = b
typ := &b.Func.Config.Types
_ = typ
// match: (XORshiftLL (MOVWconst [c]) x [d])
// cond:
// result: (XORconst [c] (SLLconst <x.Type> x [d]))
@ -17289,6 +17431,74 @@ func rewriteValueARM_OpARMXORshiftLL_0(v *Value) bool {
v.AddArg(x)
return true
}
// match: (XORshiftLL <typ.UInt16> [8] (BFXU <typ.UInt16> [armBFAuxInt(8, 8)] x) x)
// cond:
// result: (REV16 x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARMBFXU {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != armBFAuxInt(8, 8) {
break
}
x := v_0.Args[0]
if x != v.Args[1] {
break
}
v.reset(OpARMREV16)
v.AddArg(x)
return true
}
// match: (XORshiftLL <typ.UInt16> [8] (SRLconst <typ.UInt16> [24] (SLLconst [16] x)) x)
// cond: objabi.GOARM>=6
// result: (REV16 x)
for {
if v.Type != typ.UInt16 {
break
}
if v.AuxInt != 8 {
break
}
_ = v.Args[1]
v_0 := v.Args[0]
if v_0.Op != OpARMSRLconst {
break
}
if v_0.Type != typ.UInt16 {
break
}
if v_0.AuxInt != 24 {
break
}
v_0_0 := v_0.Args[0]
if v_0_0.Op != OpARMSLLconst {
break
}
if v_0_0.AuxInt != 16 {
break
}
x := v_0_0.Args[0]
if x != v.Args[1] {
break
}
if !(objabi.GOARM >= 6) {
break
}
v.reset(OpARMREV16)
v.AddArg(x)
return true
}
// match: (XORshiftLL x (SLLconst x [c]) [d])
// cond: c==d
// result: (MOVWconst [0])

File diff suppressed because it is too large Load diff

View file

@ -171,6 +171,9 @@ func ReverseBytes32(n uint32) uint32 {
func ReverseBytes16(n uint16) uint16 {
// amd64:"ROLW"
// arm64:"REV16W",-"UBFX",-"ORR"
// arm/5:"SLL","SRL","ORR"
// arm/6:"REV16"
// arm/7:"REV16"
return bits.ReverseBytes16(n)
}