mirror of
https://github.com/golang/go
synced 2024-10-04 15:09:59 +00:00
cmd/compile: added some intrinsics to SSA back end
One intrinsic was needed to help get the very best performance out of a future GC; as long as that one was being added, I also added Bswap since that is sometimes a handy thing to have. I had intended to fill out the bit-scan intrinsic family, but the mismatch between the "scan forward" instruction and "count leading zeroes" was large enough to cause me to leave it out -- it poses a dilemma that I'd rather dodge right now. These intrinsics are not exposed for general use. That's a separate issue requiring an API proposal change ( https://github.com/golang/proposal ) All intrinsics are tested, both that they are substituted on the appropriate architecture, and that they produce the expected result. Change-Id: I5848037cfd97de4f75bdc33bdd89bba00af4a8ee Reviewed-on: https://go-review.googlesource.com/20564 Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: David Chase <drchase@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
parent
2e90192b0e
commit
8eec2bbfbc
|
@ -36,26 +36,44 @@ var progtable = [x86.ALAST & obj.AMask]obj.ProgInfo{
|
|||
|
||||
// NOP is an internal no-op that also stands
|
||||
// for USED and SET annotations, not the Intel opcode.
|
||||
obj.ANOP: {Flags: gc.LeftRead | gc.RightWrite},
|
||||
x86.AADCL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
|
||||
x86.AADCQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
|
||||
x86.AADCW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
|
||||
x86.AADDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
x86.AADDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
x86.AADDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
x86.AADDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
x86.AADDSD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | RightRdwr},
|
||||
x86.AADDSS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | RightRdwr},
|
||||
x86.AANDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
x86.AANDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
x86.AANDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
x86.AANDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
obj.ACALL & obj.AMask: {Flags: gc.RightAddr | gc.Call | gc.KillCarry},
|
||||
x86.ACDQ & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
|
||||
x86.ACQO & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
|
||||
x86.ACWD & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
|
||||
x86.ACLD & obj.AMask: {Flags: gc.OK},
|
||||
x86.ASTD & obj.AMask: {Flags: gc.OK},
|
||||
obj.ANOP: {Flags: gc.LeftRead | gc.RightWrite},
|
||||
x86.AADCL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
|
||||
x86.AADCQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
|
||||
x86.AADCW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry | gc.UseCarry},
|
||||
x86.AADDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
x86.AADDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
x86.AADDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
x86.AADDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
x86.AADDSD & obj.AMask: {Flags: gc.SizeD | gc.LeftRead | RightRdwr},
|
||||
x86.AADDSS & obj.AMask: {Flags: gc.SizeF | gc.LeftRead | RightRdwr},
|
||||
x86.AANDB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
x86.AANDL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
x86.AANDQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
x86.AANDW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.SetCarry},
|
||||
|
||||
x86.ABSFL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.SetCarry},
|
||||
x86.ABSFQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.SetCarry},
|
||||
x86.ABSFW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.SetCarry},
|
||||
x86.ABSRL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightWrite | gc.SetCarry},
|
||||
x86.ABSRQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightWrite | gc.SetCarry},
|
||||
x86.ABSRW & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | gc.RightWrite | gc.SetCarry},
|
||||
x86.ABSWAPL & obj.AMask: {Flags: gc.SizeL | RightRdwr},
|
||||
x86.ABSWAPQ & obj.AMask: {Flags: gc.SizeQ | RightRdwr},
|
||||
|
||||
obj.ACALL & obj.AMask: {Flags: gc.RightAddr | gc.Call | gc.KillCarry},
|
||||
x86.ACDQ & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
|
||||
x86.ACQO & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
|
||||
x86.ACWD & obj.AMask: {Flags: gc.OK, Reguse: AX, Regset: AX | DX},
|
||||
x86.ACLD & obj.AMask: {Flags: gc.OK},
|
||||
x86.ASTD & obj.AMask: {Flags: gc.OK},
|
||||
|
||||
x86.ACMOVLEQ & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.UseCarry},
|
||||
x86.ACMOVLNE & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | RightRdwr | gc.UseCarry},
|
||||
x86.ACMOVQEQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.UseCarry},
|
||||
x86.ACMOVQNE & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | RightRdwr | gc.UseCarry},
|
||||
x86.ACMOVWEQ & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.UseCarry},
|
||||
x86.ACMOVWNE & obj.AMask: {Flags: gc.SizeW | gc.LeftRead | RightRdwr | gc.UseCarry},
|
||||
|
||||
x86.ACMPB & obj.AMask: {Flags: gc.SizeB | gc.LeftRead | gc.RightRead | gc.SetCarry},
|
||||
x86.ACMPL & obj.AMask: {Flags: gc.SizeL | gc.LeftRead | gc.RightRead | gc.SetCarry},
|
||||
x86.ACMPQ & obj.AMask: {Flags: gc.SizeQ | gc.LeftRead | gc.RightRead | gc.SetCarry},
|
||||
|
|
|
@ -477,6 +477,33 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||
p.From.Offset = v.AuxInt2Int64()
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = r
|
||||
|
||||
case ssa.OpAMD64CMOVQEQconst, ssa.OpAMD64CMOVLEQconst, ssa.OpAMD64CMOVWEQconst,
|
||||
ssa.OpAMD64CMOVQNEconst, ssa.OpAMD64CMOVLNEconst, ssa.OpAMD64CMOVWNEconst:
|
||||
r := gc.SSARegNum(v)
|
||||
x := gc.SSARegNum(v.Args[0])
|
||||
// Arg0 is in/out, move in to out if not already same
|
||||
if r != x {
|
||||
p := gc.Prog(moveByType(v.Type))
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = x
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = r
|
||||
}
|
||||
|
||||
// Constant into AX, after arg0 movement in case arg0 is in AX
|
||||
p := gc.Prog(moveByType(v.Type))
|
||||
p.From.Type = obj.TYPE_CONST
|
||||
p.From.Offset = v.AuxInt2Int64()
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = x86.REG_AX
|
||||
|
||||
p = gc.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = x86.REG_AX
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = r
|
||||
|
||||
case ssa.OpAMD64MULQconst, ssa.OpAMD64MULLconst, ssa.OpAMD64MULWconst, ssa.OpAMD64MULBconst:
|
||||
r := gc.SSARegNum(v)
|
||||
x := gc.SSARegNum(v.Args[0])
|
||||
|
@ -955,6 +982,7 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||
gc.Maxarg = v.AuxInt
|
||||
}
|
||||
case ssa.OpAMD64NEGQ, ssa.OpAMD64NEGL, ssa.OpAMD64NEGW, ssa.OpAMD64NEGB,
|
||||
ssa.OpAMD64BSWAPQ, ssa.OpAMD64BSWAPL,
|
||||
ssa.OpAMD64NOTQ, ssa.OpAMD64NOTL, ssa.OpAMD64NOTW, ssa.OpAMD64NOTB:
|
||||
x := gc.SSARegNum(v.Args[0])
|
||||
r := gc.SSARegNum(v)
|
||||
|
@ -968,7 +996,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||
p := gc.Prog(v.Op.Asm())
|
||||
p.To.Type = obj.TYPE_REG
|
||||
p.To.Reg = r
|
||||
case ssa.OpAMD64SQRTSD:
|
||||
case ssa.OpAMD64BSFQ, ssa.OpAMD64BSFL, ssa.OpAMD64BSFW,
|
||||
ssa.OpAMD64BSRQ, ssa.OpAMD64BSRL, ssa.OpAMD64BSRW,
|
||||
ssa.OpAMD64SQRTSD:
|
||||
p := gc.Prog(v.Op.Asm())
|
||||
p.From.Type = obj.TYPE_REG
|
||||
p.From.Reg = gc.SSARegNum(v.Args[0])
|
||||
|
@ -1008,9 +1038,9 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) {
|
|||
opregreg(x86.AANDL, gc.SSARegNum(v), x86.REG_AX)
|
||||
|
||||
case ssa.OpAMD64InvertFlags:
|
||||
v.Fatalf("InvertFlags should never make it to codegen %v", v)
|
||||
v.Fatalf("InvertFlags should never make it to codegen %v", v.LongString())
|
||||
case ssa.OpAMD64FlagEQ, ssa.OpAMD64FlagLT_ULT, ssa.OpAMD64FlagLT_UGT, ssa.OpAMD64FlagGT_ULT, ssa.OpAMD64FlagGT_UGT:
|
||||
v.Fatalf("Flag* ops should never make it to codegen %v", v)
|
||||
v.Fatalf("Flag* ops should never make it to codegen %v", v.LongString())
|
||||
case ssa.OpAMD64REPSTOSQ:
|
||||
gc.Prog(x86.AREP)
|
||||
gc.Prog(x86.ASTOSQ)
|
||||
|
|
|
@ -55,8 +55,8 @@ func (v Val) Ctype() Ctype {
|
|||
}
|
||||
|
||||
type Pkg struct {
|
||||
Name string // package name
|
||||
Path string // string literal used in import statement
|
||||
Name string // package name, e.g. "sys"
|
||||
Path string // string literal used in import statement, e.g. "runtime/internal/sys"
|
||||
Pathsym *Sym
|
||||
Prefix string // escaped path for use in symbol table
|
||||
Imported bool // export data of this package was parsed
|
||||
|
@ -469,6 +469,9 @@ const (
|
|||
|
||||
// Set, use, or kill of carry bit.
|
||||
// Kill means we never look at the carry bit after this kind of instruction.
|
||||
// Originally for understanding ADC, RCR, and so on, but now also
|
||||
// tracks set, use, and kill of the zero and overflow bits as well.
|
||||
// TODO rename to {Set,Use,Kill}Flags
|
||||
SetCarry = 1 << 24
|
||||
UseCarry = 1 << 25
|
||||
KillCarry = 1 << 26
|
||||
|
|
|
@ -453,7 +453,7 @@ func inlnode(n *Node) *Node {
|
|||
if Debug['m'] > 3 {
|
||||
fmt.Printf("%v:call to func %v\n", n.Line(), Nconv(n.Left, FmtSign))
|
||||
}
|
||||
if n.Left.Func != nil && len(n.Left.Func.Inl.Slice()) != 0 { // normal case
|
||||
if n.Left.Func != nil && len(n.Left.Func.Inl.Slice()) != 0 && !isIntrinsicCall1(n) { // normal case
|
||||
n = mkinlcall(n, n.Left, n.Isddd)
|
||||
} else if n.Left.Op == ONAME && n.Left.Left != nil && n.Left.Left.Op == OTYPE && n.Left.Right != nil && n.Left.Right.Op == ONAME { // methods called as functions
|
||||
if n.Left.Sym.Def != nil {
|
||||
|
|
|
@ -2052,7 +2052,13 @@ func (s *state) expr(n *Node) *ssa.Value {
|
|||
p, l, c := s.slice(n.Left.Type, v, i, j, k)
|
||||
return s.newValue3(ssa.OpSliceMake, n.Type, p, l, c)
|
||||
|
||||
case OCALLFUNC, OCALLINTER, OCALLMETH:
|
||||
case OCALLFUNC:
|
||||
if isIntrinsicCall1(n) {
|
||||
return s.intrinsicCall1(n)
|
||||
}
|
||||
fallthrough
|
||||
|
||||
case OCALLINTER, OCALLMETH:
|
||||
a := s.call(n, callNormal)
|
||||
return s.newValue2(ssa.OpLoad, n.Type, a, s.mem())
|
||||
|
||||
|
@ -2373,6 +2379,75 @@ const (
|
|||
callGo
|
||||
)
|
||||
|
||||
// isSSAIntrinsic1 returns true if n is a call to a recognized 1-arg intrinsic
|
||||
// that can be handled by the SSA backend.
|
||||
// SSA uses this, but so does the front end to see if should not
|
||||
// inline a function because it is a candidate for intrinsic
|
||||
// substitution.
|
||||
func isSSAIntrinsic1(s *Sym) bool {
|
||||
// The test below is not quite accurate -- in the event that
|
||||
// a function is disabled on a per-function basis, for example
|
||||
// because of hash-keyed binary failure search, SSA might be
|
||||
// disabled for that function but it would not be noted here,
|
||||
// and thus an inlining would not occur (in practice, inlining
|
||||
// so far has only been noticed for Bswap32 and the 16-bit count
|
||||
// leading/trailing instructions, but heuristics might change
|
||||
// in the future or on different architectures).
|
||||
if !ssaEnabled || ssa.IntrinsicsDisable || Thearch.Thechar != '6' {
|
||||
return false
|
||||
}
|
||||
if s != nil && s.Pkg != nil && s.Pkg.Path == "runtime/internal/sys" {
|
||||
switch s.Name {
|
||||
case
|
||||
"Ctz64", "Ctz32", "Ctz16",
|
||||
"Bswap64", "Bswap32":
|
||||
return true
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func isIntrinsicCall1(n *Node) bool {
|
||||
if n == nil || n.Left == nil {
|
||||
return false
|
||||
}
|
||||
return isSSAIntrinsic1(n.Left.Sym)
|
||||
}
|
||||
|
||||
// intrinsicFirstArg extracts arg from n.List and eval
|
||||
func (s *state) intrinsicFirstArg(n *Node) *ssa.Value {
|
||||
x := n.List.First()
|
||||
if x.Op == OAS {
|
||||
x = x.Right
|
||||
}
|
||||
return s.expr(x)
|
||||
}
|
||||
|
||||
// intrinsicCall1 converts a call to a recognized 1-arg intrinsic
|
||||
// into the intrinsic
|
||||
func (s *state) intrinsicCall1(n *Node) *ssa.Value {
|
||||
var result *ssa.Value
|
||||
switch n.Left.Sym.Name {
|
||||
case "Ctz64":
|
||||
result = s.newValue1(ssa.OpCtz64, Types[TUINT64], s.intrinsicFirstArg(n))
|
||||
case "Ctz32":
|
||||
result = s.newValue1(ssa.OpCtz32, Types[TUINT32], s.intrinsicFirstArg(n))
|
||||
case "Ctz16":
|
||||
result = s.newValue1(ssa.OpCtz16, Types[TUINT16], s.intrinsicFirstArg(n))
|
||||
case "Bswap64":
|
||||
result = s.newValue1(ssa.OpBswap64, Types[TUINT64], s.intrinsicFirstArg(n))
|
||||
case "Bswap32":
|
||||
result = s.newValue1(ssa.OpBswap32, Types[TUINT32], s.intrinsicFirstArg(n))
|
||||
}
|
||||
if result == nil {
|
||||
Fatalf("Unknown special call: %v", n.Left.Sym)
|
||||
}
|
||||
if ssa.IntrinsicsDebug > 0 {
|
||||
Warnl(n.Lineno, "intrinsic substitution for %v with %s", n.Left.Sym.Name, result.LongString())
|
||||
}
|
||||
return result
|
||||
}
|
||||
|
||||
// Calls the function n using the specified call type.
|
||||
// Returns the address of the return value (or nil if none).
|
||||
func (s *state) call(n *Node, k callKind) *ssa.Value {
|
||||
|
|
|
@ -120,6 +120,10 @@ type pass struct {
|
|||
// Run consistency checker between each phase
|
||||
var checkEnabled = false
|
||||
|
||||
// Debug output
|
||||
var IntrinsicsDebug int
|
||||
var IntrinsicsDisable bool
|
||||
|
||||
// PhaseOption sets the specified flag in the specified ssa phase,
|
||||
// returning empty string if this was successful or a string explaining
|
||||
// the error if it was not.
|
||||
|
@ -157,6 +161,20 @@ func PhaseOption(phase, flag string, val int) string {
|
|||
}
|
||||
}
|
||||
|
||||
if phase == "intrinsics" {
|
||||
switch flag {
|
||||
case "on":
|
||||
IntrinsicsDisable = val == 0
|
||||
case "off":
|
||||
IntrinsicsDisable = val != 0
|
||||
case "debug":
|
||||
IntrinsicsDebug = val
|
||||
default:
|
||||
return fmt.Sprintf("Did not find a flag matching %s in -d=ssa/%s debug option", flag, phase)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
underphase := strings.Replace(phase, "_", " ", -1)
|
||||
var re *regexp.Regexp
|
||||
if phase[0] == '~' {
|
||||
|
|
|
@ -92,6 +92,38 @@
|
|||
(Com16 x) -> (NOTW x)
|
||||
(Com8 x) -> (NOTB x)
|
||||
|
||||
// CMPQconst 0 below is redundant because BSF sets Z but how to remove?
|
||||
(Ctz64 <t> x) -> (CMOVQEQconst (BSFQ <t> x) (CMPQconst x [0]) [64])
|
||||
(Ctz32 <t> x) -> (CMOVLEQconst (BSFL <t> x) (CMPLconst x [0]) [32])
|
||||
(Ctz16 <t> x) -> (CMOVWEQconst (BSFW <t> x) (CMPWconst x [0]) [16])
|
||||
|
||||
(CMOVQEQconst x (InvertFlags y) [c]) -> (CMOVQNEconst x y [c])
|
||||
(CMOVLEQconst x (InvertFlags y) [c]) -> (CMOVLNEconst x y [c])
|
||||
(CMOVWEQconst x (InvertFlags y) [c]) -> (CMOVWNEconst x y [c])
|
||||
|
||||
(CMOVQEQconst _ (FlagEQ) [c]) -> (Const64 [c])
|
||||
(CMOVLEQconst _ (FlagEQ) [c]) -> (Const32 [c])
|
||||
(CMOVWEQconst _ (FlagEQ) [c]) -> (Const16 [c])
|
||||
|
||||
(CMOVQEQconst x (FlagLT_ULT)) -> x
|
||||
(CMOVLEQconst x (FlagLT_ULT)) -> x
|
||||
(CMOVWEQconst x (FlagLT_ULT)) -> x
|
||||
|
||||
(CMOVQEQconst x (FlagLT_UGT)) -> x
|
||||
(CMOVLEQconst x (FlagLT_UGT)) -> x
|
||||
(CMOVWEQconst x (FlagLT_UGT)) -> x
|
||||
|
||||
(CMOVQEQconst x (FlagGT_ULT)) -> x
|
||||
(CMOVLEQconst x (FlagGT_ULT)) -> x
|
||||
(CMOVWEQconst x (FlagGT_ULT)) -> x
|
||||
|
||||
(CMOVQEQconst x (FlagGT_UGT)) -> x
|
||||
(CMOVLEQconst x (FlagGT_UGT)) -> x
|
||||
(CMOVWEQconst x (FlagGT_UGT)) -> x
|
||||
|
||||
(Bswap64 x) -> (BSWAPQ x)
|
||||
(Bswap32 x) -> (BSWAPL x)
|
||||
|
||||
(Sqrt x) -> (SQRTSD x)
|
||||
|
||||
// Note: we always extend to 64 bits even though some ops don't need that many result bits.
|
||||
|
|
|
@ -103,9 +103,13 @@ func init() {
|
|||
gp11mod = regInfo{inputs: []regMask{ax, gpsp &^ dx}, outputs: []regMask{dx},
|
||||
clobbers: ax | flags}
|
||||
|
||||
gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
|
||||
gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
|
||||
flagsgp = regInfo{inputs: flagsonly, outputs: gponly}
|
||||
gp2flags = regInfo{inputs: []regMask{gpsp, gpsp}, outputs: flagsonly}
|
||||
gp1flags = regInfo{inputs: []regMask{gpsp}, outputs: flagsonly}
|
||||
flagsgp = regInfo{inputs: flagsonly, outputs: gponly}
|
||||
|
||||
// for CMOVconst -- uses AX to hold constant temporary. AX input is moved before temp.
|
||||
gp1flagsgp = regInfo{inputs: []regMask{gp, flags}, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}
|
||||
|
||||
readflags = regInfo{inputs: flagsonly, outputs: gponly}
|
||||
flagsgpax = regInfo{inputs: flagsonly, clobbers: ax | flags, outputs: []regMask{gp &^ ax}}
|
||||
|
||||
|
@ -307,6 +311,25 @@ func init() {
|
|||
{name: "NOTW", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0
|
||||
{name: "NOTB", argLength: 1, reg: gp11, asm: "NOTL", resultInArg0: true}, // ^arg0
|
||||
|
||||
{name: "BSFQ", argLength: 1, reg: gp11, asm: "BSFQ"}, // arg0 # of low-order zeroes ; undef if zero
|
||||
{name: "BSFL", argLength: 1, reg: gp11, asm: "BSFL"}, // arg0 # of low-order zeroes ; undef if zero
|
||||
{name: "BSFW", argLength: 1, reg: gp11, asm: "BSFW"}, // arg0 # of low-order zeroes ; undef if zero
|
||||
|
||||
{name: "BSRQ", argLength: 1, reg: gp11, asm: "BSRQ"}, // arg0 # of high-order zeroes ; undef if zero
|
||||
{name: "BSRL", argLength: 1, reg: gp11, asm: "BSRL"}, // arg0 # of high-order zeroes ; undef if zero
|
||||
{name: "BSRW", argLength: 1, reg: gp11, asm: "BSRW"}, // arg0 # of high-order zeroes ; undef if zero
|
||||
|
||||
// Note ASM for ops moves whole register
|
||||
{name: "CMOVQEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQEQ", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z set
|
||||
{name: "CMOVLEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z set
|
||||
{name: "CMOVWEQconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLEQ", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z set
|
||||
{name: "CMOVQNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVQNE", typ: "UInt64", aux: "Int64", resultInArg0: true}, // replace arg0 w/ constant if Z not set
|
||||
{name: "CMOVLNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt32", aux: "Int32", resultInArg0: true}, // replace arg0 w/ constant if Z not set
|
||||
{name: "CMOVWNEconst", argLength: 2, reg: gp1flagsgp, asm: "CMOVLNE", typ: "UInt16", aux: "Int16", resultInArg0: true}, // replace arg0 w/ constant if Z not set
|
||||
|
||||
{name: "BSWAPQ", argLength: 1, reg: gp11, asm: "BSWAPQ", resultInArg0: true}, // arg0 swap bytes
|
||||
{name: "BSWAPL", argLength: 1, reg: gp11, asm: "BSWAPL", resultInArg0: true}, // arg0 swap bytes
|
||||
|
||||
{name: "SQRTSD", argLength: 1, reg: fp11, asm: "SQRTSD"}, // sqrt(arg0)
|
||||
|
||||
{name: "SBBQcarrymask", argLength: 1, reg: flagsgp, asm: "SBBQ"}, // (int64)(-1) if carry is set, 0 if carry is clear.
|
||||
|
|
|
@ -237,6 +237,17 @@ var genericOps = []opData{
|
|||
{name: "Com32", argLength: 1},
|
||||
{name: "Com64", argLength: 1},
|
||||
|
||||
{name: "Ctz16", argLength: 1}, // Count trailing (low order) zeroes (returns 0-16)
|
||||
{name: "Ctz32", argLength: 1}, // Count trailing zeroes (returns 0-32)
|
||||
{name: "Ctz64", argLength: 1}, // Count trailing zeroes (returns 0-64)
|
||||
|
||||
{name: "Clz16", argLength: 1}, // Count leading (high order) zeroes (returns 0-16)
|
||||
{name: "Clz32", argLength: 1}, // Count leading zeroes (returns 0-32)
|
||||
{name: "Clz64", argLength: 1}, // Count leading zeroes (returns 0-64)
|
||||
|
||||
{name: "Bswap32", argLength: 1}, // Swap bytes
|
||||
{name: "Bswap64", argLength: 1}, // Swap bytes
|
||||
|
||||
{name: "Sqrt", argLength: 1}, // sqrt(arg0), float64 only
|
||||
|
||||
// Data movement, max argument length for Phi is indefinite so just pick
|
||||
|
|
|
@ -237,6 +237,20 @@ const (
|
|||
OpAMD64NOTL
|
||||
OpAMD64NOTW
|
||||
OpAMD64NOTB
|
||||
OpAMD64BSFQ
|
||||
OpAMD64BSFL
|
||||
OpAMD64BSFW
|
||||
OpAMD64BSRQ
|
||||
OpAMD64BSRL
|
||||
OpAMD64BSRW
|
||||
OpAMD64CMOVQEQconst
|
||||
OpAMD64CMOVLEQconst
|
||||
OpAMD64CMOVWEQconst
|
||||
OpAMD64CMOVQNEconst
|
||||
OpAMD64CMOVLNEconst
|
||||
OpAMD64CMOVWNEconst
|
||||
OpAMD64BSWAPQ
|
||||
OpAMD64BSWAPL
|
||||
OpAMD64SQRTSD
|
||||
OpAMD64SBBQcarrymask
|
||||
OpAMD64SBBLcarrymask
|
||||
|
@ -521,6 +535,14 @@ const (
|
|||
OpCom16
|
||||
OpCom32
|
||||
OpCom64
|
||||
OpCtz16
|
||||
OpCtz32
|
||||
OpCtz64
|
||||
OpClz16
|
||||
OpClz32
|
||||
OpClz64
|
||||
OpBswap32
|
||||
OpBswap64
|
||||
OpSqrt
|
||||
OpPhi
|
||||
OpCopy
|
||||
|
@ -2803,6 +2825,222 @@ var opcodeTable = [...]opInfo{
|
|||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BSFQ",
|
||||
argLen: 1,
|
||||
asm: x86.ABSFQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
clobbers: 8589934592, // FLAGS
|
||||
outputs: []regMask{
|
||||
65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BSFL",
|
||||
argLen: 1,
|
||||
asm: x86.ABSFL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
clobbers: 8589934592, // FLAGS
|
||||
outputs: []regMask{
|
||||
65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BSFW",
|
||||
argLen: 1,
|
||||
asm: x86.ABSFW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
clobbers: 8589934592, // FLAGS
|
||||
outputs: []regMask{
|
||||
65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BSRQ",
|
||||
argLen: 1,
|
||||
asm: x86.ABSRQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
clobbers: 8589934592, // FLAGS
|
||||
outputs: []regMask{
|
||||
65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BSRL",
|
||||
argLen: 1,
|
||||
asm: x86.ABSRL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
clobbers: 8589934592, // FLAGS
|
||||
outputs: []regMask{
|
||||
65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BSRW",
|
||||
argLen: 1,
|
||||
asm: x86.ABSRW,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
clobbers: 8589934592, // FLAGS
|
||||
outputs: []regMask{
|
||||
65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CMOVQEQconst",
|
||||
auxType: auxInt64,
|
||||
argLen: 2,
|
||||
resultInArg0: true,
|
||||
asm: x86.ACMOVQEQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 8589934592}, // FLAGS
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
clobbers: 8589934593, // AX FLAGS
|
||||
outputs: []regMask{
|
||||
65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CMOVLEQconst",
|
||||
auxType: auxInt32,
|
||||
argLen: 2,
|
||||
resultInArg0: true,
|
||||
asm: x86.ACMOVLEQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 8589934592}, // FLAGS
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
clobbers: 8589934593, // AX FLAGS
|
||||
outputs: []regMask{
|
||||
65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CMOVWEQconst",
|
||||
auxType: auxInt16,
|
||||
argLen: 2,
|
||||
resultInArg0: true,
|
||||
asm: x86.ACMOVLEQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 8589934592}, // FLAGS
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
clobbers: 8589934593, // AX FLAGS
|
||||
outputs: []regMask{
|
||||
65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CMOVQNEconst",
|
||||
auxType: auxInt64,
|
||||
argLen: 2,
|
||||
resultInArg0: true,
|
||||
asm: x86.ACMOVQNE,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 8589934592}, // FLAGS
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
clobbers: 8589934593, // AX FLAGS
|
||||
outputs: []regMask{
|
||||
65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CMOVLNEconst",
|
||||
auxType: auxInt32,
|
||||
argLen: 2,
|
||||
resultInArg0: true,
|
||||
asm: x86.ACMOVLNE,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 8589934592}, // FLAGS
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
clobbers: 8589934593, // AX FLAGS
|
||||
outputs: []regMask{
|
||||
65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "CMOVWNEconst",
|
||||
auxType: auxInt16,
|
||||
argLen: 2,
|
||||
resultInArg0: true,
|
||||
asm: x86.ACMOVLNE,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{1, 8589934592}, // FLAGS
|
||||
{0, 65519}, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
clobbers: 8589934593, // AX FLAGS
|
||||
outputs: []regMask{
|
||||
65518, // CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BSWAPQ",
|
||||
argLen: 1,
|
||||
resultInArg0: true,
|
||||
asm: x86.ABSWAPQ,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
clobbers: 8589934592, // FLAGS
|
||||
outputs: []regMask{
|
||||
65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "BSWAPL",
|
||||
argLen: 1,
|
||||
resultInArg0: true,
|
||||
asm: x86.ABSWAPL,
|
||||
reg: regInfo{
|
||||
inputs: []inputInfo{
|
||||
{0, 65535}, // AX CX DX BX SP BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
clobbers: 8589934592, // FLAGS
|
||||
outputs: []regMask{
|
||||
65519, // AX CX DX BX BP SI DI R8 R9 R10 R11 R12 R13 R14 R15
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "SQRTSD",
|
||||
argLen: 1,
|
||||
|
@ -4981,6 +5219,46 @@ var opcodeTable = [...]opInfo{
|
|||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Ctz16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Ctz32",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Ctz64",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Clz16",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Clz32",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Clz64",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Bswap32",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Bswap64",
|
||||
argLen: 1,
|
||||
generic: true,
|
||||
},
|
||||
{
|
||||
name: "Sqrt",
|
||||
argLen: 1,
|
||||
|
|
|
@ -66,6 +66,16 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
|
|||
return rewriteValueAMD64_OpAnd8(v, config)
|
||||
case OpAvg64u:
|
||||
return rewriteValueAMD64_OpAvg64u(v, config)
|
||||
case OpBswap32:
|
||||
return rewriteValueAMD64_OpBswap32(v, config)
|
||||
case OpBswap64:
|
||||
return rewriteValueAMD64_OpBswap64(v, config)
|
||||
case OpAMD64CMOVLEQconst:
|
||||
return rewriteValueAMD64_OpAMD64CMOVLEQconst(v, config)
|
||||
case OpAMD64CMOVQEQconst:
|
||||
return rewriteValueAMD64_OpAMD64CMOVQEQconst(v, config)
|
||||
case OpAMD64CMOVWEQconst:
|
||||
return rewriteValueAMD64_OpAMD64CMOVWEQconst(v, config)
|
||||
case OpAMD64CMPB:
|
||||
return rewriteValueAMD64_OpAMD64CMPB(v, config)
|
||||
case OpAMD64CMPBconst:
|
||||
|
@ -110,6 +120,12 @@ func rewriteValueAMD64(v *Value, config *Config) bool {
|
|||
return rewriteValueAMD64_OpConstNil(v, config)
|
||||
case OpConvert:
|
||||
return rewriteValueAMD64_OpConvert(v, config)
|
||||
case OpCtz16:
|
||||
return rewriteValueAMD64_OpCtz16(v, config)
|
||||
case OpCtz32:
|
||||
return rewriteValueAMD64_OpCtz32(v, config)
|
||||
case OpCtz64:
|
||||
return rewriteValueAMD64_OpCtz64(v, config)
|
||||
case OpCvt32Fto32:
|
||||
return rewriteValueAMD64_OpCvt32Fto32(v, config)
|
||||
case OpCvt32Fto64:
|
||||
|
@ -2119,6 +2135,307 @@ func rewriteValueAMD64_OpAvg64u(v *Value, config *Config) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpBswap32(v *Value, config *Config) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
// match: (Bswap32 x)
|
||||
// cond:
|
||||
// result: (BSWAPL x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpAMD64BSWAPL)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpBswap64(v *Value, config *Config) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
// match: (Bswap64 x)
|
||||
// cond:
|
||||
// result: (BSWAPQ x)
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v.reset(OpAMD64BSWAPQ)
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64CMOVLEQconst(v *Value, config *Config) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
// match: (CMOVLEQconst x (InvertFlags y) [c])
|
||||
// cond:
|
||||
// result: (CMOVLNEconst x y [c])
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64InvertFlags {
|
||||
break
|
||||
}
|
||||
y := v_1.Args[0]
|
||||
c := v.AuxInt
|
||||
v.reset(OpAMD64CMOVLNEconst)
|
||||
v.AddArg(x)
|
||||
v.AddArg(y)
|
||||
v.AuxInt = c
|
||||
return true
|
||||
}
|
||||
// match: (CMOVLEQconst _ (FlagEQ) [c])
|
||||
// cond:
|
||||
// result: (Const32 [c])
|
||||
for {
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagEQ {
|
||||
break
|
||||
}
|
||||
c := v.AuxInt
|
||||
v.reset(OpConst32)
|
||||
v.AuxInt = c
|
||||
return true
|
||||
}
|
||||
// match: (CMOVLEQconst x (FlagLT_ULT))
|
||||
// cond:
|
||||
// result: x
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagLT_ULT {
|
||||
break
|
||||
}
|
||||
v.reset(OpCopy)
|
||||
v.Type = x.Type
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (CMOVLEQconst x (FlagLT_UGT))
|
||||
// cond:
|
||||
// result: x
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagLT_UGT {
|
||||
break
|
||||
}
|
||||
v.reset(OpCopy)
|
||||
v.Type = x.Type
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (CMOVLEQconst x (FlagGT_ULT))
|
||||
// cond:
|
||||
// result: x
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagGT_ULT {
|
||||
break
|
||||
}
|
||||
v.reset(OpCopy)
|
||||
v.Type = x.Type
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (CMOVLEQconst x (FlagGT_UGT))
|
||||
// cond:
|
||||
// result: x
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagGT_UGT {
|
||||
break
|
||||
}
|
||||
v.reset(OpCopy)
|
||||
v.Type = x.Type
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64CMOVQEQconst(v *Value, config *Config) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
// match: (CMOVQEQconst x (InvertFlags y) [c])
|
||||
// cond:
|
||||
// result: (CMOVQNEconst x y [c])
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64InvertFlags {
|
||||
break
|
||||
}
|
||||
y := v_1.Args[0]
|
||||
c := v.AuxInt
|
||||
v.reset(OpAMD64CMOVQNEconst)
|
||||
v.AddArg(x)
|
||||
v.AddArg(y)
|
||||
v.AuxInt = c
|
||||
return true
|
||||
}
|
||||
// match: (CMOVQEQconst _ (FlagEQ) [c])
|
||||
// cond:
|
||||
// result: (Const64 [c])
|
||||
for {
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagEQ {
|
||||
break
|
||||
}
|
||||
c := v.AuxInt
|
||||
v.reset(OpConst64)
|
||||
v.AuxInt = c
|
||||
return true
|
||||
}
|
||||
// match: (CMOVQEQconst x (FlagLT_ULT))
|
||||
// cond:
|
||||
// result: x
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagLT_ULT {
|
||||
break
|
||||
}
|
||||
v.reset(OpCopy)
|
||||
v.Type = x.Type
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (CMOVQEQconst x (FlagLT_UGT))
|
||||
// cond:
|
||||
// result: x
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagLT_UGT {
|
||||
break
|
||||
}
|
||||
v.reset(OpCopy)
|
||||
v.Type = x.Type
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (CMOVQEQconst x (FlagGT_ULT))
|
||||
// cond:
|
||||
// result: x
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagGT_ULT {
|
||||
break
|
||||
}
|
||||
v.reset(OpCopy)
|
||||
v.Type = x.Type
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (CMOVQEQconst x (FlagGT_UGT))
|
||||
// cond:
|
||||
// result: x
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagGT_UGT {
|
||||
break
|
||||
}
|
||||
v.reset(OpCopy)
|
||||
v.Type = x.Type
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64CMOVWEQconst(v *Value, config *Config) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
// match: (CMOVWEQconst x (InvertFlags y) [c])
|
||||
// cond:
|
||||
// result: (CMOVWNEconst x y [c])
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64InvertFlags {
|
||||
break
|
||||
}
|
||||
y := v_1.Args[0]
|
||||
c := v.AuxInt
|
||||
v.reset(OpAMD64CMOVWNEconst)
|
||||
v.AddArg(x)
|
||||
v.AddArg(y)
|
||||
v.AuxInt = c
|
||||
return true
|
||||
}
|
||||
// match: (CMOVWEQconst _ (FlagEQ) [c])
|
||||
// cond:
|
||||
// result: (Const16 [c])
|
||||
for {
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagEQ {
|
||||
break
|
||||
}
|
||||
c := v.AuxInt
|
||||
v.reset(OpConst16)
|
||||
v.AuxInt = c
|
||||
return true
|
||||
}
|
||||
// match: (CMOVWEQconst x (FlagLT_ULT))
|
||||
// cond:
|
||||
// result: x
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagLT_ULT {
|
||||
break
|
||||
}
|
||||
v.reset(OpCopy)
|
||||
v.Type = x.Type
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (CMOVWEQconst x (FlagLT_UGT))
|
||||
// cond:
|
||||
// result: x
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagLT_UGT {
|
||||
break
|
||||
}
|
||||
v.reset(OpCopy)
|
||||
v.Type = x.Type
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (CMOVWEQconst x (FlagGT_ULT))
|
||||
// cond:
|
||||
// result: x
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagGT_ULT {
|
||||
break
|
||||
}
|
||||
v.reset(OpCopy)
|
||||
v.Type = x.Type
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
// match: (CMOVWEQconst x (FlagGT_UGT))
|
||||
// cond:
|
||||
// result: x
|
||||
for {
|
||||
x := v.Args[0]
|
||||
v_1 := v.Args[1]
|
||||
if v_1.Op != OpAMD64FlagGT_UGT {
|
||||
break
|
||||
}
|
||||
v.reset(OpCopy)
|
||||
v.Type = x.Type
|
||||
v.AddArg(x)
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpAMD64CMPB(v *Value, config *Config) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
|
@ -3026,6 +3343,72 @@ func rewriteValueAMD64_OpConvert(v *Value, config *Config) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpCtz16(v *Value, config *Config) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
// match: (Ctz16 <t> x)
|
||||
// cond:
|
||||
// result: (CMOVWEQconst (BSFW <t> x) (CMPWconst x [0]) [16])
|
||||
for {
|
||||
t := v.Type
|
||||
x := v.Args[0]
|
||||
v.reset(OpAMD64CMOVWEQconst)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64BSFW, t)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v1 := b.NewValue0(v.Line, OpAMD64CMPWconst, TypeFlags)
|
||||
v1.AddArg(x)
|
||||
v1.AuxInt = 0
|
||||
v.AddArg(v1)
|
||||
v.AuxInt = 16
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpCtz32(v *Value, config *Config) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
// match: (Ctz32 <t> x)
|
||||
// cond:
|
||||
// result: (CMOVLEQconst (BSFL <t> x) (CMPLconst x [0]) [32])
|
||||
for {
|
||||
t := v.Type
|
||||
x := v.Args[0]
|
||||
v.reset(OpAMD64CMOVLEQconst)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64BSFL, t)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v1 := b.NewValue0(v.Line, OpAMD64CMPLconst, TypeFlags)
|
||||
v1.AddArg(x)
|
||||
v1.AuxInt = 0
|
||||
v.AddArg(v1)
|
||||
v.AuxInt = 32
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpCtz64(v *Value, config *Config) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
// match: (Ctz64 <t> x)
|
||||
// cond:
|
||||
// result: (CMOVQEQconst (BSFQ <t> x) (CMPQconst x [0]) [64])
|
||||
for {
|
||||
t := v.Type
|
||||
x := v.Args[0]
|
||||
v.reset(OpAMD64CMOVQEQconst)
|
||||
v0 := b.NewValue0(v.Line, OpAMD64BSFQ, t)
|
||||
v0.AddArg(x)
|
||||
v.AddArg(v0)
|
||||
v1 := b.NewValue0(v.Line, OpAMD64CMPQconst, TypeFlags)
|
||||
v1.AddArg(x)
|
||||
v1.AuxInt = 0
|
||||
v.AddArg(v1)
|
||||
v.AuxInt = 64
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
func rewriteValueAMD64_OpCvt32Fto32(v *Value, config *Config) bool {
|
||||
b := v.Block
|
||||
_ = b
|
||||
|
|
105
src/runtime/internal/sys/intrinsics.go
Normal file
105
src/runtime/internal/sys/intrinsics.go
Normal file
|
@ -0,0 +1,105 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package sys
|
||||
|
||||
// Ctz64 counts trailing (low-order) zeroes,
|
||||
// and if all are zero, then 64.
|
||||
func Ctz64(x uint64) uint64 {
|
||||
if x&0xffffffff == 0 {
|
||||
return 32 + uint64(Ctz32(uint32(x>>32)))
|
||||
}
|
||||
return uint64(Ctz32(uint32(x)))
|
||||
|
||||
}
|
||||
|
||||
// Ctz32 counts trailing (low-order) zeroes,
|
||||
// and if all are zero, then 32.
|
||||
func Ctz32(x uint32) uint32 {
|
||||
if x&0xffff == 0 {
|
||||
return 16 + uint32(Ctz16(uint16(x>>16)))
|
||||
}
|
||||
return uint32(Ctz16(uint16(x)))
|
||||
}
|
||||
|
||||
// Ctz16 counts trailing (low-order) zeroes,
|
||||
// and if all are zero, then 16.
|
||||
func Ctz16(x uint16) uint16 {
|
||||
if x&0xff == 0 {
|
||||
return 8 + uint16(Ctz8(uint8(x>>8)))
|
||||
}
|
||||
return uint16(Ctz8(uint8(x)))
|
||||
}
|
||||
|
||||
// Ctz8 counts trailing (low-order) zeroes,
|
||||
// and if all are zero, then 8.
|
||||
func Ctz8(x uint8) uint8 {
|
||||
return ctzVals[x]
|
||||
}
|
||||
|
||||
var ctzVals = [256]uint8{
|
||||
8, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
6, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
7, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
6, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
5, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0,
|
||||
4, 0, 1, 0, 2, 0, 1, 0,
|
||||
3, 0, 1, 0, 2, 0, 1, 0}
|
||||
|
||||
// Bswap64 returns its input with byte order reversed
|
||||
// 0x0102030405060708 -> 0x0807060504030201
|
||||
func Bswap64(x uint64) uint64 {
|
||||
c8 := uint64(0xff00ff00ff00ff00)
|
||||
a := (x & c8) >> 8
|
||||
b := (x &^ c8) << 8
|
||||
x = a | b
|
||||
c16 := uint64(0xffff0000ffff0000)
|
||||
a = (x & c16) >> 16
|
||||
b = (x &^ c16) << 16
|
||||
x = a | b
|
||||
c32 := uint64(0xffffffff00000000)
|
||||
a = (x & c32) >> 32
|
||||
b = (x &^ c32) << 32
|
||||
x = a | b
|
||||
return x
|
||||
}
|
||||
|
||||
// Bswap32 returns its input with byte order reversed
|
||||
// 0x01020304 -> 0x04030201
|
||||
func Bswap32(x uint32) uint32 {
|
||||
c8 := uint32(0xff00ff00)
|
||||
a := (x & c8) >> 8
|
||||
b := (x &^ c8) << 8
|
||||
x = a | b
|
||||
c16 := uint32(0xffff0000)
|
||||
a = (x & c16) >> 16
|
||||
b = (x &^ c16) << 16
|
||||
x = a | b
|
||||
return x
|
||||
}
|
109
test/intrinsic.dir/main.go
Normal file
109
test/intrinsic.dir/main.go
Normal file
|
@ -0,0 +1,109 @@
|
|||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
T "runtime/internal/sys"
|
||||
)
|
||||
|
||||
var A = []uint64{0x0102030405060708, 0x1122334455667788}
|
||||
var B = []uint64{0x0807060504030201, 0x8877665544332211}
|
||||
|
||||
var errors int
|
||||
|
||||
func logf(f string, args ...interface{}) {
|
||||
errors++
|
||||
fmt.Printf(f, args...)
|
||||
if errors > 100 { // 100 is enough spewage
|
||||
panic("100 errors is plenty is enough")
|
||||
}
|
||||
}
|
||||
|
||||
func test(i, x uint64) {
|
||||
t := T.Ctz64(x) // ERROR "intrinsic substitution for Ctz64"
|
||||
if i != t {
|
||||
logf("Ctz64(0x%x) expected %d but got %d\n", x, i, t)
|
||||
}
|
||||
x = -x
|
||||
t = T.Ctz64(x) // ERROR "intrinsic substitution for Ctz64"
|
||||
if i != t {
|
||||
logf("Ctz64(0x%x) expected %d but got %d\n", x, i, t)
|
||||
}
|
||||
|
||||
if i <= 32 {
|
||||
x32 := uint32(x)
|
||||
t32 := T.Ctz32(x32) // ERROR "intrinsic substitution for Ctz32"
|
||||
if uint32(i) != t32 {
|
||||
logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32)
|
||||
}
|
||||
x32 = -x32
|
||||
t32 = T.Ctz32(x32) // ERROR "intrinsic substitution for Ctz32"
|
||||
if uint32(i) != t32 {
|
||||
logf("Ctz32(0x%x) expected %d but got %d\n", x32, i, t32)
|
||||
}
|
||||
}
|
||||
if i <= 16 {
|
||||
x16 := uint16(x)
|
||||
t16 := T.Ctz16(x16) // ERROR "intrinsic substitution for Ctz16"
|
||||
if uint16(i) != t16 {
|
||||
logf("Ctz16(0x%x) expected %d but got %d\n", x16, i, t16)
|
||||
}
|
||||
x16 = -x16
|
||||
t16 = T.Ctz16(x16) // ERROR "intrinsic substitution for Ctz16"
|
||||
if uint16(i) != t16 {
|
||||
logf("Ctz16(0x%x) expected %d but got %d\n", x16, i, t16)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Test Bswap first because the other test relies on it
|
||||
// working correctly (to implement bit reversal).
|
||||
for i := range A {
|
||||
x := A[i]
|
||||
y := B[i]
|
||||
X := T.Bswap64(x) // ERROR "intrinsic substitution for Bswap64"
|
||||
Y := T.Bswap64(y) // ERROR "intrinsic substitution for Bswap64"
|
||||
if y != X {
|
||||
logf("Bswap64(0x%08x) expected 0x%08x but got 0x%08x\n", x, y, X)
|
||||
}
|
||||
if x != Y {
|
||||
logf("Bswap64(0x%08x) expected 0x%08x but got 0x%08x\n", y, x, Y)
|
||||
}
|
||||
|
||||
x32 := uint32(X)
|
||||
y32 := uint32(Y >> 32)
|
||||
|
||||
X32 := T.Bswap32(x32) // ERROR "intrinsic substitution for Bswap32"
|
||||
Y32 := T.Bswap32(y32) // ERROR "intrinsic substitution for Bswap32"
|
||||
if y32 != X32 {
|
||||
logf("Bswap32(0x%08x) expected 0x%08x but got 0x%08x\n", x32, y32, X32)
|
||||
}
|
||||
if x32 != Y32 {
|
||||
logf("Bswap32(0x%08x) expected 0x%08x but got 0x%08x\n", y32, x32, Y32)
|
||||
}
|
||||
}
|
||||
|
||||
// Zero is a special case, be sure it is done right.
|
||||
if T.Ctz16(0) != 16 { // ERROR "intrinsic substitution for Ctz16"
|
||||
logf("ctz16(0) != 16")
|
||||
}
|
||||
if T.Ctz32(0) != 32 { // ERROR "intrinsic substitution for Ctz32"
|
||||
logf("ctz32(0) != 32")
|
||||
}
|
||||
if T.Ctz64(0) != 64 { // ERROR "intrinsic substitution for Ctz64"
|
||||
logf("ctz64(0) != 64")
|
||||
}
|
||||
|
||||
for i := uint64(0); i <= 64; i++ {
|
||||
for j := uint64(1); j <= 255; j += 2 {
|
||||
for k := uint64(1); k <= 65537; k += 128 {
|
||||
x := (j * k) << i
|
||||
test(i, x)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
8
test/intrinsic.go
Normal file
8
test/intrinsic.go
Normal file
|
@ -0,0 +1,8 @@
|
|||
// errorcheckandrundir -0 -d=ssa/intrinsics/debug
|
||||
// +build !ppc64,!ppc64le,amd64
|
||||
|
||||
// Copyright 2016 The Go Authors. All rights reserved.
|
||||
// Use of this source code is governed by a BSD-style
|
||||
// license that can be found in the LICENSE file.
|
||||
|
||||
package ignored
|
26
test/run.go
26
test/run.go
|
@ -34,6 +34,7 @@ import (
|
|||
|
||||
var (
|
||||
verbose = flag.Bool("v", false, "verbose. if set, parallelism is set to 1.")
|
||||
keep = flag.Bool("k", false, "keep. keep temporary directory.")
|
||||
numParallel = flag.Int("n", runtime.NumCPU(), "number of parallel tests to run")
|
||||
summary = flag.Bool("summary", false, "show summary of results")
|
||||
showSkips = flag.Bool("show_skips", false, "show skipped tests")
|
||||
|
@ -201,8 +202,9 @@ func compileFile(runcmd runCmd, longname string) (out []byte, err error) {
|
|||
return runcmd(cmd...)
|
||||
}
|
||||
|
||||
func compileInDir(runcmd runCmd, dir string, names ...string) (out []byte, err error) {
|
||||
func compileInDir(runcmd runCmd, dir string, flags []string, names ...string) (out []byte, err error) {
|
||||
cmd := []string{"go", "tool", "compile", "-e", "-D", ".", "-I", "."}
|
||||
cmd = append(cmd, flags...)
|
||||
if *linkshared {
|
||||
cmd = append(cmd, "-dynlink", "-installsuffix=dynlink")
|
||||
}
|
||||
|
@ -477,6 +479,9 @@ func (t *test) run() {
|
|||
fallthrough
|
||||
case "compile", "compiledir", "build", "run", "runoutput", "rundir":
|
||||
t.action = action
|
||||
case "errorcheckandrundir":
|
||||
wantError = false // should be no error if also will run
|
||||
fallthrough
|
||||
case "errorcheck", "errorcheckdir", "errorcheckoutput":
|
||||
t.action = action
|
||||
wantError = true
|
||||
|
@ -501,7 +506,9 @@ func (t *test) run() {
|
|||
}
|
||||
|
||||
t.makeTempDir()
|
||||
defer os.RemoveAll(t.tempDir)
|
||||
if !*keep {
|
||||
defer os.RemoveAll(t.tempDir)
|
||||
}
|
||||
|
||||
err = ioutil.WriteFile(filepath.Join(t.tempDir, t.gofile), srcBytes, 0644)
|
||||
check(err)
|
||||
|
@ -577,13 +584,13 @@ func (t *test) run() {
|
|||
return
|
||||
}
|
||||
for _, gofiles := range pkgs {
|
||||
_, t.err = compileInDir(runcmd, longdir, gofiles...)
|
||||
_, t.err = compileInDir(runcmd, longdir, flags, gofiles...)
|
||||
if t.err != nil {
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
case "errorcheckdir":
|
||||
case "errorcheckdir", "errorcheckandrundir":
|
||||
// errorcheck all files in lexicographic order
|
||||
// useful for finding importing errors
|
||||
longdir := filepath.Join(cwd, t.goDirName())
|
||||
|
@ -593,7 +600,7 @@ func (t *test) run() {
|
|||
return
|
||||
}
|
||||
for i, gofiles := range pkgs {
|
||||
out, err := compileInDir(runcmd, longdir, gofiles...)
|
||||
out, err := compileInDir(runcmd, longdir, flags, gofiles...)
|
||||
if i == len(pkgs)-1 {
|
||||
if wantError && err == nil {
|
||||
t.err = fmt.Errorf("compilation succeeded unexpectedly\n%s", out)
|
||||
|
@ -615,6 +622,10 @@ func (t *test) run() {
|
|||
break
|
||||
}
|
||||
}
|
||||
if action == "errorcheckdir" {
|
||||
return
|
||||
}
|
||||
fallthrough
|
||||
|
||||
case "rundir":
|
||||
// Compile all files in the directory in lexicographic order.
|
||||
|
@ -626,7 +637,7 @@ func (t *test) run() {
|
|||
return
|
||||
}
|
||||
for i, gofiles := range pkgs {
|
||||
_, err := compileInDir(runcmd, longdir, gofiles...)
|
||||
_, err := compileInDir(runcmd, longdir, flags, gofiles...)
|
||||
if err != nil {
|
||||
t.err = err
|
||||
return
|
||||
|
@ -774,6 +785,9 @@ func (t *test) makeTempDir() {
|
|||
var err error
|
||||
t.tempDir, err = ioutil.TempDir("", "")
|
||||
check(err)
|
||||
if *keep {
|
||||
log.Printf("Temporary directory is %s", t.tempDir)
|
||||
}
|
||||
}
|
||||
|
||||
func (t *test) expectedOutput() string {
|
||||
|
|
Loading…
Reference in a new issue