diff --git a/src/cmd/compile/internal/arm64/ssa.go b/src/cmd/compile/internal/arm64/ssa.go index 5c695ef84c..22b28a9308 100644 --- a/src/cmd/compile/internal/arm64/ssa.go +++ b/src/cmd/compile/internal/arm64/ssa.go @@ -581,6 +581,24 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p2.From.Reg = arm64.REGTMP p2.To.Type = obj.TYPE_BRANCH gc.Patch(p2, p) + case ssa.OpARM64LoweredAtomicExchange64Variant, + ssa.OpARM64LoweredAtomicExchange32Variant: + swap := arm64.ASWPALD + if v.Op == ssa.OpARM64LoweredAtomicExchange32Variant { + swap = arm64.ASWPALW + } + r0 := v.Args[0].Reg() + r1 := v.Args[1].Reg() + out := v.Reg0() + + // SWPALD Rarg1, (Rarg0), Rout + p := s.Prog(swap) + p.From.Type = obj.TYPE_REG + p.From.Reg = r1 + p.To.Type = obj.TYPE_MEM + p.To.Reg = r0 + p.RegTo2 = out + case ssa.OpARM64LoweredAtomicAdd64, ssa.OpARM64LoweredAtomicAdd32: // LDAXR (Rarg0), Rout @@ -687,6 +705,56 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p5.To.Type = obj.TYPE_REG p5.To.Reg = out gc.Patch(p2, p5) + case ssa.OpARM64LoweredAtomicCas64Variant, + ssa.OpARM64LoweredAtomicCas32Variant: + // Rarg0: ptr + // Rarg1: old + // Rarg2: new + // MOV Rarg1, Rtmp + // CASAL Rtmp, (Rarg0), Rarg2 + // CMP Rarg1, Rtmp + // CSET EQ, Rout + cas := arm64.ACASALD + cmp := arm64.ACMP + mov := arm64.AMOVD + if v.Op == ssa.OpARM64LoweredAtomicCas32Variant { + cas = arm64.ACASALW + cmp = arm64.ACMPW + mov = arm64.AMOVW + } + r0 := v.Args[0].Reg() + r1 := v.Args[1].Reg() + r2 := v.Args[2].Reg() + out := v.Reg0() + + // MOV Rarg1, Rtmp + p := s.Prog(mov) + p.From.Type = obj.TYPE_REG + p.From.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = arm64.REGTMP + + // CASAL Rtmp, (Rarg0), Rarg2 + p1 := s.Prog(cas) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = arm64.REGTMP + p1.To.Type = obj.TYPE_MEM + p1.To.Reg = r0 + p1.RegTo2 = r2 + + // CMP Rarg1, Rtmp + p2 := s.Prog(cmp) + p2.From.Type = obj.TYPE_REG + p2.From.Reg = r1 + p2.Reg = arm64.REGTMP + + // CSET EQ, Rout + p3 := s.Prog(arm64.ACSET) + p3.From.Type = obj.TYPE_REG + p3.From.Reg = arm64.COND_EQ + p3.To.Type = obj.TYPE_REG + p3.To.Reg = out + case ssa.OpARM64LoweredAtomicAnd8, ssa.OpARM64LoweredAtomicAnd32, ssa.OpARM64LoweredAtomicOr8, @@ -725,6 +793,63 @@ func ssaGenValue(s *gc.SSAGenState, v *ssa.Value) { p3.From.Reg = arm64.REGTMP p3.To.Type = obj.TYPE_BRANCH gc.Patch(p3, p) + case ssa.OpARM64LoweredAtomicAnd8Variant, + ssa.OpARM64LoweredAtomicAnd32Variant: + atomic_clear := arm64.ALDCLRALW + if v.Op == ssa.OpARM64LoweredAtomicAnd8Variant { + atomic_clear = arm64.ALDCLRALB + } + r0 := v.Args[0].Reg() + r1 := v.Args[1].Reg() + out := v.Reg0() + + // MNV Rarg1 Rtemp + p := s.Prog(arm64.AMVN) + p.From.Type = obj.TYPE_REG + p.From.Reg = r1 + p.To.Type = obj.TYPE_REG + p.To.Reg = arm64.REGTMP + + // LDCLRALW Rtemp, (Rarg0), Rout + p1 := s.Prog(atomic_clear) + p1.From.Type = obj.TYPE_REG + p1.From.Reg = arm64.REGTMP + p1.To.Type = obj.TYPE_MEM + p1.To.Reg = r0 + p1.RegTo2 = out + + // AND Rarg1, Rout + p2 := s.Prog(arm64.AAND) + p2.From.Type = obj.TYPE_REG + p2.From.Reg = r1 + p2.To.Type = obj.TYPE_REG + p2.To.Reg = out + + case ssa.OpARM64LoweredAtomicOr8Variant, + ssa.OpARM64LoweredAtomicOr32Variant: + atomic_or := arm64.ALDORALW + if v.Op == ssa.OpARM64LoweredAtomicOr8Variant { + atomic_or = arm64.ALDORALB + } + r0 := v.Args[0].Reg() + r1 := v.Args[1].Reg() + out := v.Reg0() + + // LDORALW Rarg1, (Rarg0), Rout + p := s.Prog(atomic_or) + p.From.Type = obj.TYPE_REG + p.From.Reg = r1 + p.To.Type = obj.TYPE_MEM + p.To.Reg = r0 + p.RegTo2 = out + + // ORR Rarg1, Rout + p2 := s.Prog(arm64.AORR) + p2.From.Type = obj.TYPE_REG + p2.From.Reg = r1 + p2.To.Type = obj.TYPE_REG + p2.To.Reg = out + case ssa.OpARM64MOVBreg, ssa.OpARM64MOVBUreg, ssa.OpARM64MOVHreg, diff --git a/src/cmd/compile/internal/gc/ssa.go b/src/cmd/compile/internal/gc/ssa.go index 67484904a9..0b38e70cd2 100644 --- a/src/cmd/compile/internal/gc/ssa.go +++ b/src/cmd/compile/internal/gc/ssa.go @@ -3458,14 +3458,64 @@ func init() { s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) return s.newValue1(ssa.OpSelect0, types.Types[TUINT32], v) }, - sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) addF("runtime/internal/atomic", "Xchg64", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { v := s.newValue3(ssa.OpAtomicExchange64, types.NewTuple(types.Types[TUINT64], types.TypeMem), args[0], args[1], s.mem()) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) return s.newValue1(ssa.OpSelect0, types.Types[TUINT64], v) }, - sys.AMD64, sys.ARM64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + + type atomicOpEmitter func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType) + + makeAtomicGuardedIntrinsicARM64 := func(op0, op1 ssa.Op, typ, rtyp types.EType, emit atomicOpEmitter) intrinsicBuilder { + + return func(s *state, n *Node, args []*ssa.Value) *ssa.Value { + // Target Atomic feature is identified by dynamic detection + addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), arm64HasATOMICS, s.sb) + v := s.load(types.Types[TBOOL], addr) + b := s.endBlock() + b.Kind = ssa.BlockIf + b.SetControl(v) + bTrue := s.f.NewBlock(ssa.BlockPlain) + bFalse := s.f.NewBlock(ssa.BlockPlain) + bEnd := s.f.NewBlock(ssa.BlockPlain) + b.AddEdgeTo(bTrue) + b.AddEdgeTo(bFalse) + b.Likely = ssa.BranchLikely + + // We have atomic instructions - use it directly. + s.startBlock(bTrue) + emit(s, n, args, op1, typ) + s.endBlock().AddEdgeTo(bEnd) + + // Use original instruction sequence. + s.startBlock(bFalse) + emit(s, n, args, op0, typ) + s.endBlock().AddEdgeTo(bEnd) + + // Merge results. + s.startBlock(bEnd) + if rtyp == TNIL { + return nil + } else { + return s.variable(n, types.Types[rtyp]) + } + } + } + + atomicXchgXaddEmitterARM64 := func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType) { + v := s.newValue3(op, types.NewTuple(types.Types[typ], types.TypeMem), args[0], args[1], s.mem()) + s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v) + } + addF("runtime/internal/atomic", "Xchg", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange32, ssa.OpAtomicExchange32Variant, TUINT32, TUINT32, atomicXchgXaddEmitterARM64), + sys.ARM64) + addF("runtime/internal/atomic", "Xchg64", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicExchange64, ssa.OpAtomicExchange64Variant, TUINT64, TUINT64, atomicXchgXaddEmitterARM64), + sys.ARM64) addF("runtime/internal/atomic", "Xadd", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { @@ -3482,46 +3532,11 @@ func init() { }, sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) - makeXaddARM64 := func(op0 ssa.Op, op1 ssa.Op, ty types.EType) func(s *state, n *Node, args []*ssa.Value) *ssa.Value { - return func(s *state, n *Node, args []*ssa.Value) *ssa.Value { - // Target Atomic feature is identified by dynamic detection - addr := s.entryNewValue1A(ssa.OpAddr, types.Types[TBOOL].PtrTo(), arm64HasATOMICS, s.sb) - v := s.load(types.Types[TBOOL], addr) - b := s.endBlock() - b.Kind = ssa.BlockIf - b.SetControl(v) - bTrue := s.f.NewBlock(ssa.BlockPlain) - bFalse := s.f.NewBlock(ssa.BlockPlain) - bEnd := s.f.NewBlock(ssa.BlockPlain) - b.AddEdgeTo(bTrue) - b.AddEdgeTo(bFalse) - b.Likely = ssa.BranchUnlikely // most machines don't have Atomics nowadays - - // We have atomic instructions - use it directly. - s.startBlock(bTrue) - v0 := s.newValue3(op1, types.NewTuple(types.Types[ty], types.TypeMem), args[0], args[1], s.mem()) - s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v0) - s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[ty], v0) - s.endBlock().AddEdgeTo(bEnd) - - // Use original instruction sequence. - s.startBlock(bFalse) - v1 := s.newValue3(op0, types.NewTuple(types.Types[ty], types.TypeMem), args[0], args[1], s.mem()) - s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v1) - s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[ty], v1) - s.endBlock().AddEdgeTo(bEnd) - - // Merge results. - s.startBlock(bEnd) - return s.variable(n, types.Types[ty]) - } - } - addF("runtime/internal/atomic", "Xadd", - makeXaddARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, TUINT32), + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd32, ssa.OpAtomicAdd32Variant, TUINT32, TUINT32, atomicXchgXaddEmitterARM64), sys.ARM64) addF("runtime/internal/atomic", "Xadd64", - makeXaddARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, TUINT64), + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAdd64, ssa.OpAtomicAdd64Variant, TUINT64, TUINT64, atomicXchgXaddEmitterARM64), sys.ARM64) addF("runtime/internal/atomic", "Cas", @@ -3530,14 +3545,14 @@ func init() { s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) return s.newValue1(ssa.OpSelect0, types.Types[TBOOL], v) }, - sys.AMD64, sys.ARM64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + sys.AMD64, sys.MIPS, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) addF("runtime/internal/atomic", "Cas64", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { v := s.newValue4(ssa.OpAtomicCompareAndSwap64, types.NewTuple(types.Types[TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem()) s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) return s.newValue1(ssa.OpSelect0, types.Types[TBOOL], v) }, - sys.AMD64, sys.ARM64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) + sys.AMD64, sys.MIPS64, sys.PPC64, sys.RISCV64, sys.S390X) addF("runtime/internal/atomic", "CasRel", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { v := s.newValue4(ssa.OpAtomicCompareAndSwap32, types.NewTuple(types.Types[TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem()) @@ -3546,18 +3561,31 @@ func init() { }, sys.PPC64) + atomicCasEmitterARM64 := func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType) { + v := s.newValue4(op, types.NewTuple(types.Types[TBOOL], types.TypeMem), args[0], args[1], args[2], s.mem()) + s.vars[&memVar] = s.newValue1(ssa.OpSelect1, types.TypeMem, v) + s.vars[n] = s.newValue1(ssa.OpSelect0, types.Types[typ], v) + } + + addF("runtime/internal/atomic", "Cas", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap32, ssa.OpAtomicCompareAndSwap32Variant, TUINT32, TBOOL, atomicCasEmitterARM64), + sys.ARM64) + addF("runtime/internal/atomic", "Cas64", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicCompareAndSwap64, ssa.OpAtomicCompareAndSwap64Variant, TUINT64, TBOOL, atomicCasEmitterARM64), + sys.ARM64) + addF("runtime/internal/atomic", "And8", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd8, types.TypeMem, args[0], args[1], s.mem()) return nil }, - sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X) + sys.AMD64, sys.MIPS, sys.PPC64, sys.S390X) addF("runtime/internal/atomic", "And", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { s.vars[&memVar] = s.newValue3(ssa.OpAtomicAnd32, types.TypeMem, args[0], args[1], s.mem()) return nil }, - sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X) + sys.AMD64, sys.MIPS, sys.PPC64, sys.S390X) addF("runtime/internal/atomic", "Or8", func(s *state, n *Node, args []*ssa.Value) *ssa.Value { s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr8, types.TypeMem, args[0], args[1], s.mem()) @@ -3569,7 +3597,24 @@ func init() { s.vars[&memVar] = s.newValue3(ssa.OpAtomicOr32, types.TypeMem, args[0], args[1], s.mem()) return nil }, - sys.AMD64, sys.ARM64, sys.MIPS, sys.PPC64, sys.S390X) + sys.AMD64, sys.MIPS, sys.PPC64, sys.S390X) + + atomicAndOrEmitterARM64 := func(s *state, n *Node, args []*ssa.Value, op ssa.Op, typ types.EType) { + s.vars[&memVar] = s.newValue3(op, types.TypeMem, args[0], args[1], s.mem()) + } + + addF("runtime/internal/atomic", "And8", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd8, ssa.OpAtomicAnd8Variant, TNIL, TNIL, atomicAndOrEmitterARM64), + sys.ARM64) + addF("runtime/internal/atomic", "And", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicAnd32, ssa.OpAtomicAnd32Variant, TNIL, TNIL, atomicAndOrEmitterARM64), + sys.ARM64) + addF("runtime/internal/atomic", "Or8", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr8, ssa.OpAtomicOr8Variant, TNIL, TNIL, atomicAndOrEmitterARM64), + sys.ARM64) + addF("runtime/internal/atomic", "Or", + makeAtomicGuardedIntrinsicARM64(ssa.OpAtomicOr32, ssa.OpAtomicOr32Variant, TNIL, TNIL, atomicAndOrEmitterARM64), + sys.ARM64) alias("runtime/internal/atomic", "Loadint64", "runtime/internal/atomic", "Load64", all...) alias("runtime/internal/atomic", "Xaddint64", "runtime/internal/atomic", "Xadd64", all...) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index 7e014fe0a8..9edc0c94b0 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -543,17 +543,24 @@ (AtomicStore64 ...) => (STLR ...) (AtomicStorePtrNoWB ...) => (STLR ...) -(AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...) -(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...) +(AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...) +(AtomicAdd(32|64) ...) => (LoweredAtomicAdd(32|64) ...) (AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...) +(AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...) +(AtomicExchange(32|64)Variant ...) => (LoweredAtomicExchange(32|64)Variant ...) +(AtomicCompareAndSwap(32|64)Variant ...) => (LoweredAtomicCas(32|64)Variant ...) + // Currently the updated value is not used, but we need a register to temporarily hold it. (AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem)) (AtomicAnd32 ptr val mem) => (Select1 (LoweredAtomicAnd32 ptr val mem)) (AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem)) (AtomicOr32 ptr val mem) => (Select1 (LoweredAtomicOr32 ptr val mem)) -(AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...) +(AtomicAnd8Variant ptr val mem) => (Select1 (LoweredAtomicAnd8Variant ptr val mem)) +(AtomicAnd32Variant ptr val mem) => (Select1 (LoweredAtomicAnd32Variant ptr val mem)) +(AtomicOr8Variant ptr val mem) => (Select1 (LoweredAtomicOr8Variant ptr val mem)) +(AtomicOr32Variant ptr val mem) => (Select1 (LoweredAtomicOr32Variant ptr val mem)) // Write barrier. (WB ...) => (LoweredWB ...) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go index fe9edbf933..87db2b7c9d 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go @@ -621,6 +621,12 @@ func init() { {name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + // atomic exchange variant. + // store arg1 to arg0. arg2=mem. returns . auxint must be zero. + // SWPALD Rarg1, (Rarg0), Rout + {name: "LoweredAtomicExchange64Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true}, + {name: "LoweredAtomicExchange32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, faultOnNilArg0: true, hasSideEffects: true}, + // atomic add. // *arg0 += arg1. arg2=mem. returns . auxint must be zero. // LDAXR (Rarg0), Rout @@ -654,6 +660,21 @@ func init() { {name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + // atomic compare and swap variant. + // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. auxint must be zero. + // if *arg0 == arg1 { + // *arg0 = arg2 + // return (true, memory) + // } else { + // return (false, memory) + // } + // MOV Rarg1, Rtmp + // CASAL Rtmp, (Rarg0), Rarg2 + // CMP Rarg1, Rtmp + // CSET EQ, Rout + {name: "LoweredAtomicCas64Variant", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicCas32Variant", argLength: 4, reg: gpcas, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + // atomic and/or. // *arg0 &= (|=) arg1. arg2=mem. returns . auxint must be zero. // LDAXR (Rarg0), Rout @@ -665,6 +686,20 @@ func init() { {name: "LoweredAtomicOr8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + // atomic and/or variant. + // *arg0 &= (|=) arg1. arg2=mem. returns . auxint must be zero. + // AND: + // MNV Rarg1, Rtemp + // LDANDALB Rtemp, (Rarg0), Rout + // AND Rarg1, Rout + // OR: + // LDORALB Rarg1, (Rarg0), Rout + // ORR Rarg1, Rout + {name: "LoweredAtomicAnd8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicAnd32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicOr8Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true}, + {name: "LoweredAtomicOr32Variant", argLength: 3, reg: gpxchg, resultNotInArgs: true, typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true}, + // LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier // It saves all GP registers if necessary, // but clobbers R30 (LR) because it's a call. diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go index db8d7ba0cf..9565199d51 100644 --- a/src/cmd/compile/internal/ssa/gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/gen/genericOps.go @@ -574,8 +574,16 @@ var genericOps = []opData{ // These variants have the same semantics as above atomic operations. // But they are used for generating more efficient code on certain modern machines, with run-time CPU feature detection. // Currently, they are used on ARM64 only. - {name: "AtomicAdd32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory. - {name: "AtomicAdd64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory. + {name: "AtomicAdd32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory. + {name: "AtomicAdd64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory. + {name: "AtomicExchange32Variant", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory. + {name: "AtomicExchange64Variant", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory. + {name: "AtomicCompareAndSwap32Variant", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory. + {name: "AtomicCompareAndSwap64Variant", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory. + {name: "AtomicAnd8Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory. + {name: "AtomicAnd32Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory. + {name: "AtomicOr8Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. + {name: "AtomicOr32Variant", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. // Clobber experiment op {name: "Clobber", argLength: 0, typ: "Void", aux: "SymOff", symEffect: "None"}, // write an invalid pointer value to the given pointer slot of a stack variable diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 25c1df12ee..c0b663cd8f 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1581,16 +1581,24 @@ const ( OpARM64STLRW OpARM64LoweredAtomicExchange64 OpARM64LoweredAtomicExchange32 + OpARM64LoweredAtomicExchange64Variant + OpARM64LoweredAtomicExchange32Variant OpARM64LoweredAtomicAdd64 OpARM64LoweredAtomicAdd32 OpARM64LoweredAtomicAdd64Variant OpARM64LoweredAtomicAdd32Variant OpARM64LoweredAtomicCas64 OpARM64LoweredAtomicCas32 + OpARM64LoweredAtomicCas64Variant + OpARM64LoweredAtomicCas32Variant OpARM64LoweredAtomicAnd8 OpARM64LoweredAtomicAnd32 OpARM64LoweredAtomicOr8 OpARM64LoweredAtomicOr32 + OpARM64LoweredAtomicAnd8Variant + OpARM64LoweredAtomicAnd32Variant + OpARM64LoweredAtomicOr8Variant + OpARM64LoweredAtomicOr32Variant OpARM64LoweredWB OpARM64LoweredPanicBoundsA OpARM64LoweredPanicBoundsB @@ -2881,6 +2889,14 @@ const ( OpAtomicOr32 OpAtomicAdd32Variant OpAtomicAdd64Variant + OpAtomicExchange32Variant + OpAtomicExchange64Variant + OpAtomicCompareAndSwap32Variant + OpAtomicCompareAndSwap64Variant + OpAtomicAnd8Variant + OpAtomicAnd32Variant + OpAtomicOr8Variant + OpAtomicOr32Variant OpClobber ) @@ -20994,6 +21010,38 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "LoweredAtomicExchange64Variant", + argLen: 3, + resultNotInArgs: true, + faultOnNilArg0: true, + hasSideEffects: true, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "LoweredAtomicExchange32Variant", + argLen: 3, + resultNotInArgs: true, + faultOnNilArg0: true, + hasSideEffects: true, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, { name: "LoweredAtomicAdd64", argLen: 3, @@ -21098,6 +21146,44 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "LoweredAtomicCas64Variant", + argLen: 4, + resultNotInArgs: true, + clobberFlags: true, + faultOnNilArg0: true, + hasSideEffects: true, + unsafePoint: true, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "LoweredAtomicCas32Variant", + argLen: 4, + resultNotInArgs: true, + clobberFlags: true, + faultOnNilArg0: true, + hasSideEffects: true, + unsafePoint: true, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {2, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, { name: "LoweredAtomicAnd8", argLen: 3, @@ -21170,6 +21256,72 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "LoweredAtomicAnd8Variant", + argLen: 3, + resultNotInArgs: true, + faultOnNilArg0: true, + hasSideEffects: true, + unsafePoint: true, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "LoweredAtomicAnd32Variant", + argLen: 3, + resultNotInArgs: true, + faultOnNilArg0: true, + hasSideEffects: true, + unsafePoint: true, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "LoweredAtomicOr8Variant", + argLen: 3, + resultNotInArgs: true, + faultOnNilArg0: true, + hasSideEffects: true, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, + { + name: "LoweredAtomicOr32Variant", + argLen: 3, + resultNotInArgs: true, + faultOnNilArg0: true, + hasSideEffects: true, + reg: regInfo{ + inputs: []inputInfo{ + {1, 805044223}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 + {0, 9223372038733561855}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 g R30 SP SB + }, + outputs: []outputInfo{ + {0, 670826495}, // R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R19 R20 R21 R22 R23 R24 R25 R26 R30 + }, + }, + }, { name: "LoweredWB", auxType: auxSym, @@ -35874,6 +36026,54 @@ var opcodeTable = [...]opInfo{ hasSideEffects: true, generic: true, }, + { + name: "AtomicExchange32Variant", + argLen: 3, + hasSideEffects: true, + generic: true, + }, + { + name: "AtomicExchange64Variant", + argLen: 3, + hasSideEffects: true, + generic: true, + }, + { + name: "AtomicCompareAndSwap32Variant", + argLen: 4, + hasSideEffects: true, + generic: true, + }, + { + name: "AtomicCompareAndSwap64Variant", + argLen: 4, + hasSideEffects: true, + generic: true, + }, + { + name: "AtomicAnd8Variant", + argLen: 3, + hasSideEffects: true, + generic: true, + }, + { + name: "AtomicAnd32Variant", + argLen: 3, + hasSideEffects: true, + generic: true, + }, + { + name: "AtomicOr8Variant", + argLen: 3, + hasSideEffects: true, + generic: true, + }, + { + name: "AtomicOr32Variant", + argLen: 3, + hasSideEffects: true, + generic: true, + }, { name: "Clobber", auxType: auxSymOff, diff --git a/src/cmd/compile/internal/ssa/rewriteARM64.go b/src/cmd/compile/internal/ssa/rewriteARM64.go index 9a5e976dea..353696bf39 100644 --- a/src/cmd/compile/internal/ssa/rewriteARM64.go +++ b/src/cmd/compile/internal/ssa/rewriteARM64.go @@ -426,20 +426,36 @@ func rewriteValueARM64(v *Value) bool { return true case OpAtomicAnd32: return rewriteValueARM64_OpAtomicAnd32(v) + case OpAtomicAnd32Variant: + return rewriteValueARM64_OpAtomicAnd32Variant(v) case OpAtomicAnd8: return rewriteValueARM64_OpAtomicAnd8(v) + case OpAtomicAnd8Variant: + return rewriteValueARM64_OpAtomicAnd8Variant(v) case OpAtomicCompareAndSwap32: v.Op = OpARM64LoweredAtomicCas32 return true + case OpAtomicCompareAndSwap32Variant: + v.Op = OpARM64LoweredAtomicCas32Variant + return true case OpAtomicCompareAndSwap64: v.Op = OpARM64LoweredAtomicCas64 return true + case OpAtomicCompareAndSwap64Variant: + v.Op = OpARM64LoweredAtomicCas64Variant + return true case OpAtomicExchange32: v.Op = OpARM64LoweredAtomicExchange32 return true + case OpAtomicExchange32Variant: + v.Op = OpARM64LoweredAtomicExchange32Variant + return true case OpAtomicExchange64: v.Op = OpARM64LoweredAtomicExchange64 return true + case OpAtomicExchange64Variant: + v.Op = OpARM64LoweredAtomicExchange64Variant + return true case OpAtomicLoad32: v.Op = OpARM64LDARW return true @@ -454,8 +470,12 @@ func rewriteValueARM64(v *Value) bool { return true case OpAtomicOr32: return rewriteValueARM64_OpAtomicOr32(v) + case OpAtomicOr32Variant: + return rewriteValueARM64_OpAtomicOr32Variant(v) case OpAtomicOr8: return rewriteValueARM64_OpAtomicOr8(v) + case OpAtomicOr8Variant: + return rewriteValueARM64_OpAtomicOr8Variant(v) case OpAtomicStore32: v.Op = OpARM64STLRW return true @@ -21363,6 +21383,25 @@ func rewriteValueARM64_OpAtomicAnd32(v *Value) bool { return true } } +func rewriteValueARM64_OpAtomicAnd32Variant(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (AtomicAnd32Variant ptr val mem) + // result: (Select1 (LoweredAtomicAnd32Variant ptr val mem)) + for { + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd32Variant, types.NewTuple(typ.UInt32, types.TypeMem)) + v0.AddArg3(ptr, val, mem) + v.AddArg(v0) + return true + } +} func rewriteValueARM64_OpAtomicAnd8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -21382,6 +21421,25 @@ func rewriteValueARM64_OpAtomicAnd8(v *Value) bool { return true } } +func rewriteValueARM64_OpAtomicAnd8Variant(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (AtomicAnd8Variant ptr val mem) + // result: (Select1 (LoweredAtomicAnd8Variant ptr val mem)) + for { + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicAnd8Variant, types.NewTuple(typ.UInt8, types.TypeMem)) + v0.AddArg3(ptr, val, mem) + v.AddArg(v0) + return true + } +} func rewriteValueARM64_OpAtomicOr32(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -21401,6 +21459,25 @@ func rewriteValueARM64_OpAtomicOr32(v *Value) bool { return true } } +func rewriteValueARM64_OpAtomicOr32Variant(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (AtomicOr32Variant ptr val mem) + // result: (Select1 (LoweredAtomicOr32Variant ptr val mem)) + for { + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr32Variant, types.NewTuple(typ.UInt32, types.TypeMem)) + v0.AddArg3(ptr, val, mem) + v.AddArg(v0) + return true + } +} func rewriteValueARM64_OpAtomicOr8(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] @@ -21420,6 +21497,25 @@ func rewriteValueARM64_OpAtomicOr8(v *Value) bool { return true } } +func rewriteValueARM64_OpAtomicOr8Variant(v *Value) bool { + v_2 := v.Args[2] + v_1 := v.Args[1] + v_0 := v.Args[0] + b := v.Block + typ := &b.Func.Config.Types + // match: (AtomicOr8Variant ptr val mem) + // result: (Select1 (LoweredAtomicOr8Variant ptr val mem)) + for { + ptr := v_0 + val := v_1 + mem := v_2 + v.reset(OpSelect1) + v0 := b.NewValue0(v.Pos, OpARM64LoweredAtomicOr8Variant, types.NewTuple(typ.UInt8, types.TypeMem)) + v0.AddArg3(ptr, val, mem) + v.AddArg(v0) + return true + } +} func rewriteValueARM64_OpAvg64u(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] diff --git a/src/runtime/internal/atomic/bench_test.go b/src/runtime/internal/atomic/bench_test.go index 434aa6d434..2476c06c52 100644 --- a/src/runtime/internal/atomic/bench_test.go +++ b/src/runtime/internal/atomic/bench_test.go @@ -142,3 +142,54 @@ func BenchmarkXadd64(b *testing.B) { } }) } + +func BenchmarkCas(b *testing.B) { + var x uint32 + x = 1 + ptr := &x + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + atomic.Cas(ptr, 1, 0) + atomic.Cas(ptr, 0, 1) + } + }) +} + +func BenchmarkCas64(b *testing.B) { + var x uint64 + x = 1 + ptr := &x + b.RunParallel(func(pb *testing.PB) { + for pb.Next() { + atomic.Cas64(ptr, 1, 0) + atomic.Cas64(ptr, 0, 1) + } + }) +} +func BenchmarkXchg(b *testing.B) { + var x uint32 + x = 1 + ptr := &x + b.RunParallel(func(pb *testing.PB) { + var y uint32 + y = 1 + for pb.Next() { + y = atomic.Xchg(ptr, y) + y += 1 + } + }) +} + +func BenchmarkXchg64(b *testing.B) { + var x uint64 + x = 1 + ptr := &x + b.RunParallel(func(pb *testing.PB) { + var y uint64 + y = 1 + for pb.Next() { + y = atomic.Xchg64(ptr, y) + y += 1 + } + }) +}