all: replace runtime SSE2 detection with GO386 setting

When GO386=sse2 we can assume sse2 to be present without
a runtime check. If GO386=softfloat is set we can avoid
the usage of SSE2 even if detected.

This might cause a memcpy, memclr and bytealg slowdown of Go
binaries compiled with softfloat on machines that support
SSE2. Such setups are rare and should use GO386=sse2 instead
if performance matters.

On targets that support SSE2 we avoid the runtime overhead of
dynamic cpu feature dispatch.

The removal of runtime sse2 checks also allows to simplify
internal/cpu further by removing handling of the required
feature option as a followup after this CL.

Change-Id: I90a853a8853a405cb665497c6d1a86556947ba17
Reviewed-on: https://go-review.googlesource.com/c/go/+/344350
Trust: Martin Möhrmann <martin@golang.org>
Run-TryBot: Martin Möhrmann <martin@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Reviewed-by: Keith Randall <khr@golang.org>
This commit is contained in:
Martin Möhrmann 2021-08-23 11:34:51 +02:00
parent 22540abf76
commit 8157960d7f
15 changed files with 31 additions and 60 deletions

View file

@ -374,6 +374,11 @@ func asmArgs(a *Action, p *load.Package) []interface{} {
args = append(args, "-compiling-runtime")
}
if cfg.Goarch == "386" {
// Define GO386_value from cfg.GO386.
args = append(args, "-D", "GO386_"+cfg.GO386)
}
if cfg.Goarch == "mips" || cfg.Goarch == "mipsle" {
// Define GOMIPS_value from cfg.GOMIPS.
args = append(args, "-D", "GOMIPS_"+cfg.GOMIPS)

View file

@ -11,7 +11,6 @@ import (
// Offsets into internal/cpu records for use in assembly.
const (
offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
offsetX86HasSSE42 = unsafe.Offsetof(cpu.X86.HasSSE42)
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
offsetX86HasPOPCNT = unsafe.Offsetof(cpu.X86.HasPOPCNT)

View file

@ -36,8 +36,9 @@ TEXT cmpbody<>(SB),NOSPLIT,$0-0
JEQ allsame
CMPL BP, $4
JB small
CMPB internalcpu·X86+const_offsetX86HasSSE2(SB), $1
JNE mediumloop
#ifdef GO386_softfloat
JMP mediumloop
#endif
largeloop:
CMPL BP, $16
JB mediumloop

View file

@ -43,8 +43,9 @@ TEXT memeqbody<>(SB),NOSPLIT,$0-0
hugeloop:
CMPL BX, $64
JB bigloop
CMPB internalcpu·X86+const_offsetX86HasSSE2(SB), $1
JNE bigloop
#ifdef GO386_softfloat
JMP bigloop
#endif
MOVOU (SI), X0
MOVOU (DI), X1
MOVOU 16(SI), X2

View file

@ -37,7 +37,6 @@ var X86 struct {
HasPCLMULQDQ bool
HasPOPCNT bool
HasRDTSCP bool
HasSSE2 bool
HasSSE3 bool
HasSSSE3 bool
HasSSE41 bool

View file

@ -1,7 +0,0 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpu
const GOARCH = "386"

View file

@ -1,7 +0,0 @@
// Copyright 2018 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package cpu
const GOARCH = "amd64"

View file

@ -61,9 +61,6 @@ func doinit() {
{Name: "sse41", Feature: &X86.HasSSE41},
{Name: "sse42", Feature: &X86.HasSSE42},
{Name: "ssse3", Feature: &X86.HasSSSE3},
// These capabilities should always be enabled on amd64:
{Name: "sse2", Feature: &X86.HasSSE2, Required: GOARCH == "amd64"},
}
maxID, _, _, _ := cpuid(0, 0)
@ -74,8 +71,7 @@ func doinit() {
maxExtendedFunctionInformation, _, _, _ = cpuid(0x80000000, 0)
_, _, ecx1, edx1 := cpuid(1, 0)
X86.HasSSE2 = isSet(edx1, cpuid_SSE2)
_, _, ecx1, _ := cpuid(1, 0)
X86.HasSSE3 = isSet(ecx1, cpuid_SSE3)
X86.HasPCLMULQDQ = isSet(ecx1, cpuid_PCLMULQDQ)

View file

@ -10,7 +10,6 @@ package cpu_test
import (
. "internal/cpu"
"os"
"runtime"
"testing"
)
@ -20,23 +19,6 @@ func TestX86ifAVX2hasAVX(t *testing.T) {
}
}
func TestDisableSSE2(t *testing.T) {
runDebugOptionsTest(t, "TestSSE2DebugOption", "cpu.sse2=off")
}
func TestSSE2DebugOption(t *testing.T) {
MustHaveDebugOptionsSupport(t)
if os.Getenv("GODEBUG") != "cpu.sse2=off" {
t.Skipf("skipping test: GODEBUG=cpu.sse2=off not set")
}
want := runtime.GOARCH != "386" // SSE2 can only be disabled on 386.
if got := X86.HasSSE2; got != want {
t.Errorf("X86.HasSSE2 on %s expected %v, got %v", runtime.GOARCH, want, got)
}
}
func TestDisableSSE3(t *testing.T) {
runDebugOptionsTest(t, "TestSSE3DebugOption", "cpu.sse3=off")
}

View file

@ -838,8 +838,9 @@ TEXT runtime·cputicks(SB),NOSPLIT,$0-8
// When no SSE2 support is present do not enforce any serialization
// since using CPUID to serialize the instruction stream is
// very costly.
CMPB internalcpu·X86+const_offsetX86HasSSE2(SB), $1
JNE rdtsc
#ifdef GO386_softfloat
JMP rdtsc // no fence instructions available
#endif
CMPB internalcpu·X86+const_offsetX86HasRDTSCP(SB), $1
JNE fences
// Instruction stream serializing RDTSCP is supported.

View file

@ -15,7 +15,6 @@ const (
offsetX86HasAVX2 = unsafe.Offsetof(cpu.X86.HasAVX2)
offsetX86HasERMS = unsafe.Offsetof(cpu.X86.HasERMS)
offsetX86HasRDTSCP = unsafe.Offsetof(cpu.X86.HasRDTSCP)
offsetX86HasSSE2 = unsafe.Offsetof(cpu.X86.HasSSE2)
offsetARMHasIDIVA = unsafe.Offsetof(cpu.ARM.HasIDIVA)

View file

@ -30,8 +30,9 @@ tail:
JBE _5through8
CMPL BX, $16
JBE _9through16
CMPB internalcpu·X86+const_offsetX86HasSSE2(SB), $1
JNE nosse2
#ifdef GO386_softfloat
JMP nosse2
#endif
PXOR X0, X0
CMPL BX, $32
JBE _17through32

View file

@ -55,8 +55,9 @@ tail:
JBE move_5through8
CMPL BX, $16
JBE move_9through16
CMPB internalcpu·X86+const_offsetX86HasSSE2(SB), $1
JNE nosse2
#ifdef GO386_softfloat
JMP nosse2
#endif
CMPL BX, $32
JBE move_17through32
CMPL BX, $64

View file

@ -200,6 +200,8 @@ func gen386() {
l.add("MOVL", reg, 4)
}
softfloat := "GO386_softfloat"
// Save SSE state only if supported.
lSSE := layout{stack: l.stack, sp: "SP"}
for i := 0; i < 8; i++ {
@ -209,13 +211,13 @@ func gen386() {
p("ADJSP $%d", lSSE.stack)
p("NOP SP")
l.save()
p("CMPB internalcpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse")
p("#ifndef %s", softfloat)
lSSE.save()
label("nosse:")
p("#endif")
p("CALL ·asyncPreempt2(SB)")
p("CMPB internalcpu·X86+const_offsetX86HasSSE2(SB), $1\nJNE nosse2")
p("#ifndef %s", softfloat)
lSSE.restore()
label("nosse2:")
p("#endif")
l.restore()
p("ADJSP $%d", -lSSE.stack)

View file

@ -14,8 +14,7 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
MOVL BP, 16(SP)
MOVL SI, 20(SP)
MOVL DI, 24(SP)
CMPB internalcpu·X86+const_offsetX86HasSSE2(SB), $1
JNE nosse
#ifndef GO386_softfloat
MOVUPS X0, 28(SP)
MOVUPS X1, 44(SP)
MOVUPS X2, 60(SP)
@ -24,10 +23,9 @@ TEXT ·asyncPreempt(SB),NOSPLIT|NOFRAME,$0-0
MOVUPS X5, 108(SP)
MOVUPS X6, 124(SP)
MOVUPS X7, 140(SP)
nosse:
#endif
CALL ·asyncPreempt2(SB)
CMPB internalcpu·X86+const_offsetX86HasSSE2(SB), $1
JNE nosse2
#ifndef GO386_softfloat
MOVUPS 140(SP), X7
MOVUPS 124(SP), X6
MOVUPS 108(SP), X5
@ -36,7 +34,7 @@ nosse:
MOVUPS 60(SP), X2
MOVUPS 44(SP), X1
MOVUPS 28(SP), X0
nosse2:
#endif
MOVL 24(SP), DI
MOVL 20(SP), SI
MOVL 16(SP), BP