go/test/nosplit.go

398 lines
11 KiB
Go
Raw Normal View History

// +build !nacl,!js,!aix,!gcflags_noopt,gc
// run
// Copyright 2014 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package main
import (
"bytes"
"fmt"
"io/ioutil"
"log"
"os"
"os/exec"
"path/filepath"
"regexp"
"runtime"
"strconv"
"strings"
)
const debug = false
var tests = `
# These are test cases for the linker analysis that detects chains of
# nosplit functions that would cause a stack overflow.
#
# Lines beginning with # are comments.
#
# Each test case describes a sequence of functions, one per line.
# Each function definition is the function name, then the frame size,
# then optionally the keyword 'nosplit', then the body of the function.
# The body is assembly code, with some shorthands.
# The shorthand 'call x' stands for CALL x(SB).
# The shorthand 'callind' stands for 'CALL R0', where R0 is a register.
# Each test case must define a function named start, and it must be first.
# That is, a line beginning "start " indicates the start of a new test case.
# Within a stanza, ; can be used instead of \n to separate lines.
#
# After the function definition, the test case ends with an optional
# REJECT line, specifying the architectures on which the case should
# be rejected. "REJECT" without any architectures means reject on all architectures.
# The linker should accept the test case on systems not explicitly rejected.
#
# 64-bit systems do not attempt to execute test cases with frame sizes
# that are only 32-bit aligned.
# Ordinary function should work
start 0
# Large frame marked nosplit is always wrong.
start 10000 nosplit
REJECT
# Calling a large frame is okay.
start 0 call big
big 10000
# But not if the frame is nosplit.
start 0 call big
big 10000 nosplit
REJECT
# Recursion is okay.
start 0 call start
# Recursive nosplit runs out of space.
start 0 nosplit call start
REJECT
# Chains of ordinary functions okay.
start 0 call f1
f1 80 call f2
f2 80
# Chains of nosplit must fit in the stack limit, 128 bytes.
start 0 call f1
f1 80 nosplit call f2
f2 80 nosplit
REJECT
# Larger chains.
start 0 call f1
f1 16 call f2
f2 16 call f3
f3 16 call f4
f4 16 call f5
f5 16 call f6
f6 16 call f7
f7 16 call f8
f8 16 call end
end 1000
start 0 call f1
f1 16 nosplit call f2
f2 16 nosplit call f3
f3 16 nosplit call f4
f4 16 nosplit call f5
f5 16 nosplit call f6
f6 16 nosplit call f7
f7 16 nosplit call f8
f8 16 nosplit call end
end 1000
REJECT
# Test cases near the 128-byte limit.
# Ordinary stack split frame is always okay.
start 112
start 116
start 120
start 124
start 128
start 132
start 136
# A nosplit leaf can use the whole 128-CallSize bytes available on entry.
# (CallSize is 32 on ppc64, 8 on amd64 for frame pointer.)
start 96 nosplit
start 100 nosplit; REJECT ppc64 ppc64le
start 104 nosplit; REJECT ppc64 ppc64le arm64
start 108 nosplit; REJECT ppc64 ppc64le
start 112 nosplit; REJECT ppc64 ppc64le arm64
start 116 nosplit; REJECT ppc64 ppc64le
start 120 nosplit; REJECT ppc64 ppc64le amd64 arm64
start 124 nosplit; REJECT ppc64 ppc64le amd64
start 128 nosplit; REJECT
start 132 nosplit; REJECT
start 136 nosplit; REJECT
# Calling a nosplit function from a nosplit function requires
# having room for the saved caller PC and the called frame.
# Because ARM doesn't save LR in the leaf, it gets an extra 4 bytes.
# Because arm64 doesn't save LR in the leaf, it gets an extra 8 bytes.
# ppc64 doesn't save LR in the leaf, but CallSize is 32, so it gets 24 bytes.
# Because AMD64 uses frame pointer, it has 8 fewer bytes.
start 96 nosplit call f; f 0 nosplit
start 100 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le
start 104 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le arm64
start 108 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le
start 112 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 arm64
start 116 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64
start 120 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 arm64
start 124 nosplit call f; f 0 nosplit; REJECT ppc64 ppc64le amd64 386
start 128 nosplit call f; f 0 nosplit; REJECT
start 132 nosplit call f; f 0 nosplit; REJECT
start 136 nosplit call f; f 0 nosplit; REJECT
# Calling a splitting function from a nosplit function requires
# having room for the saved caller PC of the call but also the
# saved caller PC for the call to morestack.
# Architectures differ in the same way as before.
start 96 nosplit call f; f 0 call f
start 100 nosplit call f; f 0 call f; REJECT ppc64 ppc64le
start 104 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 arm64
start 108 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64
start 112 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 arm64
start 116 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64
start 120 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 386 arm64
start 124 nosplit call f; f 0 call f; REJECT ppc64 ppc64le amd64 386
start 128 nosplit call f; f 0 call f; REJECT
start 132 nosplit call f; f 0 call f; REJECT
start 136 nosplit call f; f 0 call f; REJECT
# Indirect calls are assumed to be splitting functions.
start 96 nosplit callind
start 100 nosplit callind; REJECT ppc64 ppc64le
start 104 nosplit callind; REJECT ppc64 ppc64le amd64 arm64
start 108 nosplit callind; REJECT ppc64 ppc64le amd64
start 112 nosplit callind; REJECT ppc64 ppc64le amd64 arm64
start 116 nosplit callind; REJECT ppc64 ppc64le amd64
start 120 nosplit callind; REJECT ppc64 ppc64le amd64 386 arm64
start 124 nosplit callind; REJECT ppc64 ppc64le amd64 386
start 128 nosplit callind; REJECT
start 132 nosplit callind; REJECT
start 136 nosplit callind; REJECT
# Issue 7623
start 0 call f; f 112
start 0 call f; f 116
start 0 call f; f 120
start 0 call f; f 124
start 0 call f; f 128
start 0 call f; f 132
start 0 call f; f 136
`
var (
commentRE = regexp.MustCompile(`(?m)^#.*`)
rejectRE = regexp.MustCompile(`(?s)\A(.+?)((\n|; *)REJECT(.*))?\z`)
lineRE = regexp.MustCompile(`(\w+) (\d+)( nosplit)?(.*)`)
callRE = regexp.MustCompile(`\bcall (\w+)\b`)
callindRE = regexp.MustCompile(`\bcallind\b`)
)
func main() {
goarch := os.Getenv("GOARCH")
if goarch == "" {
goarch = runtime.GOARCH
}
dir, err := ioutil.TempDir("", "go-test-nosplit")
if err != nil {
bug()
fmt.Printf("creating temp dir: %v\n", err)
return
}
defer os.RemoveAll(dir)
os.Setenv("GOPATH", filepath.Join(dir, "_gopath"))
if err := ioutil.WriteFile(filepath.Join(dir, "go.mod"), []byte("module go-test-nosplit\n"), 0666); err != nil {
log.Panic(err)
}
tests = strings.Replace(tests, "\t", " ", -1)
tests = commentRE.ReplaceAllString(tests, "")
nok := 0
nfail := 0
TestCases:
for len(tests) > 0 {
var stanza string
i := strings.Index(tests, "\nstart ")
if i < 0 {
stanza, tests = tests, ""
} else {
stanza, tests = tests[:i], tests[i+1:]
}
m := rejectRE.FindStringSubmatch(stanza)
if m == nil {
bug()
fmt.Printf("invalid stanza:\n\t%s\n", indent(stanza))
continue
}
lines := strings.TrimSpace(m[1])
reject := false
if m[2] != "" {
if strings.TrimSpace(m[4]) == "" {
reject = true
} else {
for _, rej := range strings.Fields(m[4]) {
if rej == goarch {
reject = true
}
}
}
}
if lines == "" && !reject {
continue
}
var gobuf bytes.Buffer
fmt.Fprintf(&gobuf, "package main\n")
var buf bytes.Buffer
ptrSize := 4
switch goarch {
case "mips", "mipsle":
fmt.Fprintf(&buf, "#define REGISTER (R0)\n")
case "mips64", "mips64le":
ptrSize = 8
fmt.Fprintf(&buf, "#define REGISTER (R0)\n")
case "ppc64", "ppc64le":
ptrSize = 8
fmt.Fprintf(&buf, "#define REGISTER (CTR)\n")
case "arm":
fmt.Fprintf(&buf, "#define REGISTER (R0)\n")
case "arm64":
ptrSize = 8
fmt.Fprintf(&buf, "#define REGISTER (R0)\n")
case "amd64":
ptrSize = 8
fmt.Fprintf(&buf, "#define REGISTER AX\n")
case "riscv64":
ptrSize = 8
fmt.Fprintf(&buf, "#define REGISTER A0\n")
case "s390x":
ptrSize = 8
fmt.Fprintf(&buf, "#define REGISTER R10\n")
default:
fmt.Fprintf(&buf, "#define REGISTER AX\n")
}
// Since all of the functions we're generating are
// ABI0, first enter ABI0 via a splittable function
// and then go to the chain we're testing. This way we
// don't have to account for ABI wrappers in the chain.
fmt.Fprintf(&gobuf, "func main0()\n")
fmt.Fprintf(&gobuf, "func main() { main0() }\n")
fmt.Fprintf(&buf, "TEXT ·main0(SB),0,$0-0\n\tCALL ·start(SB)\n")
for _, line := range strings.Split(lines, "\n") {
line = strings.TrimSpace(line)
if line == "" {
continue
}
for i, subline := range strings.Split(line, ";") {
subline = strings.TrimSpace(subline)
if subline == "" {
continue
}
m := lineRE.FindStringSubmatch(subline)
if m == nil {
bug()
fmt.Printf("invalid function line: %s\n", subline)
continue TestCases
}
name := m[1]
size, _ := strconv.Atoi(m[2])
runtime: static lock ranking for the runtime (enabled by GOEXPERIMENT) I took some of the infrastructure from Austin's lock logging CR https://go-review.googlesource.com/c/go/+/192704 (with deadlock detection from the logs), and developed a setup to give static lock ranking for runtime locks. Static lock ranking establishes a documented total ordering among locks, and then reports an error if the total order is violated. This can happen if a deadlock happens (by acquiring a sequence of locks in different orders), or if just one side of a possible deadlock happens. Lock ordering deadlocks cannot happen as long as the lock ordering is followed. Along the way, I found a deadlock involving the new timer code, which Ian fixed via https://go-review.googlesource.com/c/go/+/207348, as well as two other potential deadlocks. See the constants at the top of runtime/lockrank.go to show the static lock ranking that I ended up with, along with some comments. This is great documentation of the current intended lock ordering when acquiring multiple locks in the runtime. I also added an array lockPartialOrder[] which shows and enforces the current partial ordering among locks (which is embedded within the total ordering). This is more specific about the dependencies among locks. I don't try to check the ranking within a lock class with multiple locks that can be acquired at the same time (i.e. check the ranking when multiple hchan locks are acquired). Currently, I am doing a lockInit() call to set the lock rank of most locks. Any lock that is not otherwise initialized is assumed to be a leaf lock (a very high rank lock), so that eliminates the need to do anything for a bunch of locks (including all architecture-dependent locks). For two locks, root.lock and notifyList.lock (only in the runtime/sema.go file), it is not as easy to do lock initialization, so instead, I am passing the lock rank with the lock calls. For Windows compilation, I needed to increase the StackGuard size from 896 to 928 because of the new lock-rank checking functions. Checking of the static lock ranking is enabled by setting GOEXPERIMENT=staticlockranking before doing a run. To make sure that the static lock ranking code has no overhead in memory or CPU when not enabled by GOEXPERIMENT, I changed 'go build/install' so that it defines a build tag (with the same name) whenever any experiment has been baked into the toolchain (by checking Expstring()). This allows me to avoid increasing the size of the 'mutex' type when static lock ranking is not enabled. Fixes #38029 Change-Id: I154217ff307c47051f8dae9c2a03b53081acd83a Reviewed-on: https://go-review.googlesource.com/c/go/+/207619 Reviewed-by: Dan Scales <danscales@google.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Dan Scales <danscales@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2019-11-14 01:34:47 +00:00
// The limit was originally 128 but is now 800 (928-128).
// Instead of rewriting the test cases above, adjust
// the first stack frame to use up the extra bytes.
if i == 0 {
runtime: static lock ranking for the runtime (enabled by GOEXPERIMENT) I took some of the infrastructure from Austin's lock logging CR https://go-review.googlesource.com/c/go/+/192704 (with deadlock detection from the logs), and developed a setup to give static lock ranking for runtime locks. Static lock ranking establishes a documented total ordering among locks, and then reports an error if the total order is violated. This can happen if a deadlock happens (by acquiring a sequence of locks in different orders), or if just one side of a possible deadlock happens. Lock ordering deadlocks cannot happen as long as the lock ordering is followed. Along the way, I found a deadlock involving the new timer code, which Ian fixed via https://go-review.googlesource.com/c/go/+/207348, as well as two other potential deadlocks. See the constants at the top of runtime/lockrank.go to show the static lock ranking that I ended up with, along with some comments. This is great documentation of the current intended lock ordering when acquiring multiple locks in the runtime. I also added an array lockPartialOrder[] which shows and enforces the current partial ordering among locks (which is embedded within the total ordering). This is more specific about the dependencies among locks. I don't try to check the ranking within a lock class with multiple locks that can be acquired at the same time (i.e. check the ranking when multiple hchan locks are acquired). Currently, I am doing a lockInit() call to set the lock rank of most locks. Any lock that is not otherwise initialized is assumed to be a leaf lock (a very high rank lock), so that eliminates the need to do anything for a bunch of locks (including all architecture-dependent locks). For two locks, root.lock and notifyList.lock (only in the runtime/sema.go file), it is not as easy to do lock initialization, so instead, I am passing the lock rank with the lock calls. For Windows compilation, I needed to increase the StackGuard size from 896 to 928 because of the new lock-rank checking functions. Checking of the static lock ranking is enabled by setting GOEXPERIMENT=staticlockranking before doing a run. To make sure that the static lock ranking code has no overhead in memory or CPU when not enabled by GOEXPERIMENT, I changed 'go build/install' so that it defines a build tag (with the same name) whenever any experiment has been baked into the toolchain (by checking Expstring()). This allows me to avoid increasing the size of the 'mutex' type when static lock ranking is not enabled. Fixes #38029 Change-Id: I154217ff307c47051f8dae9c2a03b53081acd83a Reviewed-on: https://go-review.googlesource.com/c/go/+/207619 Reviewed-by: Dan Scales <danscales@google.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Dan Scales <danscales@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2019-11-14 01:34:47 +00:00
size += (928 - 128) - 128
// Noopt builds have a larger stackguard.
// See ../src/cmd/dist/buildruntime.go:stackGuardMultiplier
// This increase is included in objabi.StackGuard
for _, s := range strings.Split(os.Getenv("GO_GCFLAGS"), " ") {
if s == "-N" {
runtime: static lock ranking for the runtime (enabled by GOEXPERIMENT) I took some of the infrastructure from Austin's lock logging CR https://go-review.googlesource.com/c/go/+/192704 (with deadlock detection from the logs), and developed a setup to give static lock ranking for runtime locks. Static lock ranking establishes a documented total ordering among locks, and then reports an error if the total order is violated. This can happen if a deadlock happens (by acquiring a sequence of locks in different orders), or if just one side of a possible deadlock happens. Lock ordering deadlocks cannot happen as long as the lock ordering is followed. Along the way, I found a deadlock involving the new timer code, which Ian fixed via https://go-review.googlesource.com/c/go/+/207348, as well as two other potential deadlocks. See the constants at the top of runtime/lockrank.go to show the static lock ranking that I ended up with, along with some comments. This is great documentation of the current intended lock ordering when acquiring multiple locks in the runtime. I also added an array lockPartialOrder[] which shows and enforces the current partial ordering among locks (which is embedded within the total ordering). This is more specific about the dependencies among locks. I don't try to check the ranking within a lock class with multiple locks that can be acquired at the same time (i.e. check the ranking when multiple hchan locks are acquired). Currently, I am doing a lockInit() call to set the lock rank of most locks. Any lock that is not otherwise initialized is assumed to be a leaf lock (a very high rank lock), so that eliminates the need to do anything for a bunch of locks (including all architecture-dependent locks). For two locks, root.lock and notifyList.lock (only in the runtime/sema.go file), it is not as easy to do lock initialization, so instead, I am passing the lock rank with the lock calls. For Windows compilation, I needed to increase the StackGuard size from 896 to 928 because of the new lock-rank checking functions. Checking of the static lock ranking is enabled by setting GOEXPERIMENT=staticlockranking before doing a run. To make sure that the static lock ranking code has no overhead in memory or CPU when not enabled by GOEXPERIMENT, I changed 'go build/install' so that it defines a build tag (with the same name) whenever any experiment has been baked into the toolchain (by checking Expstring()). This allows me to avoid increasing the size of the 'mutex' type when static lock ranking is not enabled. Fixes #38029 Change-Id: I154217ff307c47051f8dae9c2a03b53081acd83a Reviewed-on: https://go-review.googlesource.com/c/go/+/207619 Reviewed-by: Dan Scales <danscales@google.com> Reviewed-by: Keith Randall <khr@golang.org> Run-TryBot: Dan Scales <danscales@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2019-11-14 01:34:47 +00:00
size += 928
}
}
}
build: support frame-pointer for arm64 Supporting frame-pointer makes Linux's perf and other profilers much more useful because it lets them gather a stack trace efficiently on profiling events. Major changes include: 1. save FP on the word below where RSP is pointing to (proposed by Cherry and Austin) 2. adjust some specific offsets in runtime assembly and wrapper code 3. add support to FP in goroutine scheduler 4. adjust link stack overflow check to take the extra word into account 5. adjust nosplit test cases to enable frame sizes which are 16 bytes aligned Performance impacts on go1 benchmarks: Enable frame-pointer (by default) name old time/op new time/op delta BinaryTree17-46 5.94s ± 0% 6.00s ± 0% +1.03% (p=0.029 n=4+4) Fannkuch11-46 2.84s ± 1% 2.77s ± 0% -2.58% (p=0.008 n=5+5) FmtFprintfEmpty-46 55.0ns ± 1% 58.9ns ± 1% +7.06% (p=0.008 n=5+5) FmtFprintfString-46 102ns ± 0% 105ns ± 0% +2.94% (p=0.008 n=5+5) FmtFprintfInt-46 118ns ± 0% 117ns ± 1% -1.19% (p=0.000 n=4+5) FmtFprintfIntInt-46 181ns ± 0% 182ns ± 1% ~ (p=0.444 n=5+5) FmtFprintfPrefixedInt-46 215ns ± 1% 214ns ± 0% ~ (p=0.254 n=5+4) FmtFprintfFloat-46 292ns ± 0% 296ns ± 0% +1.46% (p=0.029 n=4+4) FmtManyArgs-46 720ns ± 0% 732ns ± 0% +1.72% (p=0.008 n=5+5) GobDecode-46 9.82ms ± 1% 10.03ms ± 2% +2.10% (p=0.008 n=5+5) GobEncode-46 8.14ms ± 0% 8.72ms ± 1% +7.14% (p=0.008 n=5+5) Gzip-46 420ms ± 0% 424ms ± 0% +0.92% (p=0.008 n=5+5) Gunzip-46 48.2ms ± 0% 48.4ms ± 0% +0.41% (p=0.008 n=5+5) HTTPClientServer-46 201µs ± 4% 201µs ± 0% ~ (p=0.730 n=5+4) JSONEncode-46 17.1ms ± 0% 17.7ms ± 1% +3.80% (p=0.008 n=5+5) JSONDecode-46 88.0ms ± 0% 90.1ms ± 0% +2.42% (p=0.008 n=5+5) Mandelbrot200-46 5.06ms ± 0% 5.07ms ± 0% ~ (p=0.310 n=5+5) GoParse-46 5.04ms ± 0% 5.12ms ± 0% +1.53% (p=0.008 n=5+5) RegexpMatchEasy0_32-46 117ns ± 0% 117ns ± 0% ~ (all equal) RegexpMatchEasy0_1K-46 332ns ± 0% 329ns ± 0% -0.78% (p=0.008 n=5+5) RegexpMatchEasy1_32-46 104ns ± 0% 113ns ± 0% +8.65% (p=0.029 n=4+4) RegexpMatchEasy1_1K-46 563ns ± 0% 569ns ± 0% +1.10% (p=0.008 n=5+5) RegexpMatchMedium_32-46 167ns ± 2% 177ns ± 1% +5.74% (p=0.008 n=5+5) RegexpMatchMedium_1K-46 49.5µs ± 0% 53.4µs ± 0% +7.81% (p=0.008 n=5+5) RegexpMatchHard_32-46 2.56µs ± 1% 2.72µs ± 0% +6.01% (p=0.008 n=5+5) RegexpMatchHard_1K-46 77.0µs ± 0% 81.8µs ± 0% +6.24% (p=0.016 n=5+4) Revcomp-46 631ms ± 1% 627ms ± 1% ~ (p=0.095 n=5+5) Template-46 81.8ms ± 0% 86.3ms ± 0% +5.55% (p=0.008 n=5+5) TimeParse-46 423ns ± 0% 432ns ± 0% +2.32% (p=0.008 n=5+5) TimeFormat-46 478ns ± 2% 497ns ± 1% +3.89% (p=0.008 n=5+5) [Geo mean] 71.6µs 73.3µs +2.45% name old speed new speed delta GobDecode-46 78.1MB/s ± 1% 76.6MB/s ± 2% -2.04% (p=0.008 n=5+5) GobEncode-46 94.3MB/s ± 0% 88.0MB/s ± 1% -6.67% (p=0.008 n=5+5) Gzip-46 46.2MB/s ± 0% 45.8MB/s ± 0% -0.91% (p=0.008 n=5+5) Gunzip-46 403MB/s ± 0% 401MB/s ± 0% -0.41% (p=0.008 n=5+5) JSONEncode-46 114MB/s ± 0% 109MB/s ± 1% -3.66% (p=0.008 n=5+5) JSONDecode-46 22.0MB/s ± 0% 21.5MB/s ± 0% -2.35% (p=0.008 n=5+5) GoParse-46 11.5MB/s ± 0% 11.3MB/s ± 0% -1.51% (p=0.008 n=5+5) RegexpMatchEasy0_32-46 272MB/s ± 0% 272MB/s ± 1% ~ (p=0.190 n=4+5) RegexpMatchEasy0_1K-46 3.08GB/s ± 0% 3.11GB/s ± 0% +0.77% (p=0.008 n=5+5) RegexpMatchEasy1_32-46 306MB/s ± 0% 283MB/s ± 0% -7.63% (p=0.029 n=4+4) RegexpMatchEasy1_1K-46 1.82GB/s ± 0% 1.80GB/s ± 0% -1.07% (p=0.008 n=5+5) RegexpMatchMedium_32-46 5.99MB/s ± 0% 5.64MB/s ± 1% -5.77% (p=0.016 n=4+5) RegexpMatchMedium_1K-46 20.7MB/s ± 0% 19.2MB/s ± 0% -7.25% (p=0.008 n=5+5) RegexpMatchHard_32-46 12.5MB/s ± 1% 11.8MB/s ± 0% -5.66% (p=0.008 n=5+5) RegexpMatchHard_1K-46 13.3MB/s ± 0% 12.5MB/s ± 1% -6.01% (p=0.008 n=5+5) Revcomp-46 402MB/s ± 1% 405MB/s ± 1% ~ (p=0.095 n=5+5) Template-46 23.7MB/s ± 0% 22.5MB/s ± 0% -5.25% (p=0.008 n=5+5) [Geo mean] 82.2MB/s 79.6MB/s -3.26% Disable frame-pointer (GOEXPERIMENT=noframepointer) name old time/op new time/op delta BinaryTree17-46 5.94s ± 0% 5.96s ± 0% +0.39% (p=0.029 n=4+4) Fannkuch11-46 2.84s ± 1% 2.79s ± 1% -1.68% (p=0.008 n=5+5) FmtFprintfEmpty-46 55.0ns ± 1% 55.2ns ± 3% ~ (p=0.794 n=5+5) FmtFprintfString-46 102ns ± 0% 103ns ± 0% +0.98% (p=0.016 n=5+4) FmtFprintfInt-46 118ns ± 0% 115ns ± 0% -2.54% (p=0.029 n=4+4) FmtFprintfIntInt-46 181ns ± 0% 179ns ± 0% -1.10% (p=0.000 n=5+4) FmtFprintfPrefixedInt-46 215ns ± 1% 213ns ± 0% ~ (p=0.143 n=5+4) FmtFprintfFloat-46 292ns ± 0% 300ns ± 0% +2.83% (p=0.029 n=4+4) FmtManyArgs-46 720ns ± 0% 739ns ± 0% +2.64% (p=0.008 n=5+5) GobDecode-46 9.82ms ± 1% 9.78ms ± 1% ~ (p=0.151 n=5+5) GobEncode-46 8.14ms ± 0% 8.12ms ± 1% ~ (p=0.690 n=5+5) Gzip-46 420ms ± 0% 420ms ± 0% ~ (p=0.548 n=5+5) Gunzip-46 48.2ms ± 0% 48.0ms ± 0% -0.33% (p=0.032 n=5+5) HTTPClientServer-46 201µs ± 4% 199µs ± 3% ~ (p=0.548 n=5+5) JSONEncode-46 17.1ms ± 0% 17.2ms ± 0% ~ (p=0.056 n=5+5) JSONDecode-46 88.0ms ± 0% 88.6ms ± 0% +0.64% (p=0.008 n=5+5) Mandelbrot200-46 5.06ms ± 0% 5.07ms ± 0% ~ (p=0.548 n=5+5) GoParse-46 5.04ms ± 0% 5.07ms ± 0% +0.65% (p=0.008 n=5+5) RegexpMatchEasy0_32-46 117ns ± 0% 112ns ± 4% -4.27% (p=0.016 n=4+5) RegexpMatchEasy0_1K-46 332ns ± 0% 330ns ± 1% ~ (p=0.095 n=5+5) RegexpMatchEasy1_32-46 104ns ± 0% 110ns ± 1% +5.29% (p=0.029 n=4+4) RegexpMatchEasy1_1K-46 563ns ± 0% 567ns ± 2% ~ (p=0.151 n=5+5) RegexpMatchMedium_32-46 167ns ± 2% 166ns ± 0% ~ (p=0.333 n=5+4) RegexpMatchMedium_1K-46 49.5µs ± 0% 49.6µs ± 0% ~ (p=0.841 n=5+5) RegexpMatchHard_32-46 2.56µs ± 1% 2.49µs ± 0% -2.81% (p=0.008 n=5+5) RegexpMatchHard_1K-46 77.0µs ± 0% 75.8µs ± 0% -1.55% (p=0.008 n=5+5) Revcomp-46 631ms ± 1% 628ms ± 0% ~ (p=0.095 n=5+5) Template-46 81.8ms ± 0% 84.3ms ± 1% +3.05% (p=0.008 n=5+5) TimeParse-46 423ns ± 0% 425ns ± 0% +0.52% (p=0.008 n=5+5) TimeFormat-46 478ns ± 2% 478ns ± 1% ~ (p=1.000 n=5+5) [Geo mean] 71.6µs 71.6µs -0.01% name old speed new speed delta GobDecode-46 78.1MB/s ± 1% 78.5MB/s ± 1% ~ (p=0.151 n=5+5) GobEncode-46 94.3MB/s ± 0% 94.5MB/s ± 1% ~ (p=0.690 n=5+5) Gzip-46 46.2MB/s ± 0% 46.2MB/s ± 0% ~ (p=0.571 n=5+5) Gunzip-46 403MB/s ± 0% 404MB/s ± 0% +0.33% (p=0.032 n=5+5) JSONEncode-46 114MB/s ± 0% 113MB/s ± 0% ~ (p=0.056 n=5+5) JSONDecode-46 22.0MB/s ± 0% 21.9MB/s ± 0% -0.64% (p=0.008 n=5+5) GoParse-46 11.5MB/s ± 0% 11.4MB/s ± 0% -0.64% (p=0.008 n=5+5) RegexpMatchEasy0_32-46 272MB/s ± 0% 285MB/s ± 4% +4.74% (p=0.016 n=4+5) RegexpMatchEasy0_1K-46 3.08GB/s ± 0% 3.10GB/s ± 1% ~ (p=0.151 n=5+5) RegexpMatchEasy1_32-46 306MB/s ± 0% 290MB/s ± 1% -5.21% (p=0.029 n=4+4) RegexpMatchEasy1_1K-46 1.82GB/s ± 0% 1.81GB/s ± 2% ~ (p=0.151 n=5+5) RegexpMatchMedium_32-46 5.99MB/s ± 0% 6.02MB/s ± 1% ~ (p=0.063 n=4+5) RegexpMatchMedium_1K-46 20.7MB/s ± 0% 20.7MB/s ± 0% ~ (p=0.659 n=5+5) RegexpMatchHard_32-46 12.5MB/s ± 1% 12.8MB/s ± 0% +2.88% (p=0.008 n=5+5) RegexpMatchHard_1K-46 13.3MB/s ± 0% 13.5MB/s ± 0% +1.58% (p=0.008 n=5+5) Revcomp-46 402MB/s ± 1% 405MB/s ± 0% ~ (p=0.095 n=5+5) Template-46 23.7MB/s ± 0% 23.0MB/s ± 1% -2.95% (p=0.008 n=5+5) [Geo mean] 82.2MB/s 82.3MB/s +0.04% Frame-pointer is enabled on Linux by default but can be disabled by setting: GOEXPERIMENT=noframepointer. Fixes #10110 Change-Id: I1bfaca6dba29a63009d7c6ab04ed7a1413d9479e Reviewed-on: https://go-review.googlesource.com/61511 Reviewed-by: Cherry Zhang <cherryyz@google.com> Run-TryBot: Cherry Zhang <cherryyz@google.com> TryBot-Result: Gobot Gobot <gobot@golang.org>
2018-08-29 06:55:03 +00:00
if size%ptrSize == 4 {
continue TestCases
}
nosplit := m[3]
body := m[4]
if nosplit != "" {
nosplit = ",7"
} else {
nosplit = ",0"
}
body = callRE.ReplaceAllString(body, "CALL ·$1(SB);")
body = callindRE.ReplaceAllString(body, "CALL REGISTER;")
fmt.Fprintf(&gobuf, "func %s()\n", name)
fmt.Fprintf(&buf, "TEXT ·%s(SB)%s,$%d-0\n\t%s\n\tRET\n\n", name, nosplit, size, body)
}
}
if debug {
fmt.Printf("===\n%s\n", strings.TrimSpace(stanza))
fmt.Printf("-- main.go --\n%s", gobuf.String())
fmt.Printf("-- asm.s --\n%s", buf.String())
}
if err := ioutil.WriteFile(filepath.Join(dir, "asm.s"), buf.Bytes(), 0666); err != nil {
log.Fatal(err)
}
if err := ioutil.WriteFile(filepath.Join(dir, "main.go"), gobuf.Bytes(), 0666); err != nil {
log.Fatal(err)
}
cmd := exec.Command("go", "build")
cmd.Dir = dir
output, err := cmd.CombinedOutput()
if err == nil {
nok++
if reject {
bug()
fmt.Printf("accepted incorrectly:\n\t%s\n", indent(strings.TrimSpace(stanza)))
}
} else {
nfail++
if !reject {
bug()
fmt.Printf("rejected incorrectly:\n\t%s\n", indent(strings.TrimSpace(stanza)))
fmt.Printf("\n\tlinker output:\n\t%s\n", indent(string(output)))
}
}
}
if !bugged && (nok == 0 || nfail == 0) {
bug()
fmt.Printf("not enough test cases run\n")
}
}
func indent(s string) string {
return strings.Replace(s, "\n", "\n\t", -1)
}
var bugged = false
func bug() {
if !bugged {
bugged = true
fmt.Printf("BUG\n")
}
}