cmd/compile: make link register allocatable in non-leaf functions

We save and restore the link register in non-leaf functions because
it is clobbered by CALLs. It is therefore available for general
purpose use.

Only enabled on s390x currently. The RC4 benchmarks in particular
benefit from the extra register:

name     old speed     new speed     delta
RC4_128  243MB/s ± 2%  341MB/s ± 2%  +40.46%  (p=0.008 n=5+5)
RC4_1K   267MB/s ± 0%  359MB/s ± 1%  +34.32%  (p=0.008 n=5+5)
RC4_8K   271MB/s ± 0%  362MB/s ± 0%  +33.61%  (p=0.008 n=5+5)

Change-Id: Id23bff95e771da9425353da2f32668b8e34ba09f
Reviewed-on: https://go-review.googlesource.com/30597
Reviewed-by: Cherry Zhang <cherryyz@google.com>
Run-TryBot: Michael Munday <munday@ca.ibm.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
This commit is contained in:
Michael Munday 2016-10-06 15:06:45 -04:00
parent 809a1de1ac
commit 15817e409b
11 changed files with 358 additions and 315 deletions

View file

@ -25,6 +25,7 @@ type Config struct {
fpRegMask regMask // floating point register mask
specialRegMask regMask // special register mask
FPReg int8 // register number of frame pointer, -1 if not used
LinkReg int8 // register number of link register if it is a general purpose register, -1 if not used
hasGReg bool // has hardware g register
fe Frontend // callbacks into compiler frontend
HTML *HTMLWriter // html writer, for debugging
@ -143,6 +144,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.gpRegMask = gpRegMaskAMD64
c.fpRegMask = fpRegMaskAMD64
c.FPReg = framepointerRegAMD64
c.LinkReg = linkRegAMD64
c.hasGReg = false
case "amd64p32":
c.IntSize = 4
@ -154,6 +156,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.gpRegMask = gpRegMaskAMD64
c.fpRegMask = fpRegMaskAMD64
c.FPReg = framepointerRegAMD64
c.LinkReg = linkRegAMD64
c.hasGReg = false
c.noDuffDevice = true
case "386":
@ -166,6 +169,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.gpRegMask = gpRegMask386
c.fpRegMask = fpRegMask386
c.FPReg = framepointerReg386
c.LinkReg = linkReg386
c.hasGReg = false
case "arm":
c.IntSize = 4
@ -177,6 +181,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.gpRegMask = gpRegMaskARM
c.fpRegMask = fpRegMaskARM
c.FPReg = framepointerRegARM
c.LinkReg = linkRegARM
c.hasGReg = true
case "arm64":
c.IntSize = 8
@ -188,6 +193,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.gpRegMask = gpRegMaskARM64
c.fpRegMask = fpRegMaskARM64
c.FPReg = framepointerRegARM64
c.LinkReg = linkRegARM64
c.hasGReg = true
c.noDuffDevice = obj.GOOS == "darwin" // darwin linker cannot handle BR26 reloc with non-zero addend
case "ppc64":
@ -203,6 +209,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.gpRegMask = gpRegMaskPPC64
c.fpRegMask = fpRegMaskPPC64
c.FPReg = framepointerRegPPC64
c.LinkReg = linkRegPPC64
c.noDuffDevice = true // TODO: Resolve PPC64 DuffDevice (has zero, but not copy)
c.NeedsFpScratch = true
c.hasGReg = true
@ -217,6 +224,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.fpRegMask = fpRegMaskMIPS64
c.specialRegMask = specialRegMaskMIPS64
c.FPReg = framepointerRegMIPS64
c.LinkReg = linkRegMIPS64
c.hasGReg = true
case "s390x":
c.IntSize = 8
@ -228,6 +236,7 @@ func NewConfig(arch string, fe Frontend, ctxt *obj.Link, optimize bool) *Config
c.gpRegMask = gpRegMaskS390X
c.fpRegMask = fpRegMaskS390X
c.FPReg = framepointerRegS390X
c.LinkReg = linkRegS390X
c.hasGReg = true
c.noDuffDevice = true
default:

View file

@ -501,5 +501,6 @@ func init() {
gpregmask: gp,
fpregmask: fp,
framepointerreg: int8(num["BP"]),
linkreg: -1, // not used
})
}

View file

@ -588,5 +588,6 @@ func init() {
gpregmask: gp,
fpregmask: fp,
framepointerreg: int8(num["BP"]),
linkreg: -1, // not used
})
}

View file

@ -530,5 +530,6 @@ func init() {
gpregmask: gp,
fpregmask: fp,
framepointerreg: -1, // not used
linkreg: -1, // not used
})
}

View file

@ -526,5 +526,6 @@ func init() {
gpregmask: gp,
fpregmask: fp,
framepointerreg: -1, // not used
linkreg: -1, // not used
})
}

View file

@ -376,5 +376,6 @@ func init() {
fpregmask: fp,
specialregmask: hi | lo,
framepointerreg: -1, // not used
linkreg: -1, // not used
})
}

View file

@ -393,5 +393,6 @@ func init() {
gpregmask: gp,
fpregmask: fp,
framepointerreg: int8(num["SP"]),
linkreg: -1, // not used
})
}

View file

@ -91,7 +91,7 @@ func init() {
r0 = buildReg("R0")
// R10 and R11 are reserved by the assembler.
gp = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12")
gp = buildReg("R0 R1 R2 R3 R4 R5 R6 R7 R8 R9 R12 R14")
gpsp = gp | sp
// R0 is considered to contain the value 0 in address calculations.
@ -547,5 +547,6 @@ func init() {
gpregmask: gp,
fpregmask: fp,
framepointerreg: -1, // not used
linkreg: int8(num["R14"]),
})
}

View file

@ -31,6 +31,7 @@ type arch struct {
fpregmask regMask
specialregmask regMask
framepointerreg int8
linkreg int8
generic bool
}
@ -295,6 +296,7 @@ func genOp() {
fmt.Fprintf(w, "var fpRegMask%s = regMask(%d)\n", a.name, a.fpregmask)
fmt.Fprintf(w, "var specialRegMask%s = regMask(%d)\n", a.name, a.specialregmask)
fmt.Fprintf(w, "var framepointerReg%s = int8(%d)\n", a.name, a.framepointerreg)
fmt.Fprintf(w, "var linkReg%s = int8(%d)\n", a.name, a.linkreg)
}
// gofmt result

File diff suppressed because it is too large Load diff

View file

@ -460,6 +460,18 @@ func (s *regAllocState) allocValToReg(v *Value, mask regMask, nospill bool, line
return c
}
// isLeaf reports whether f performs any calls.
func isLeaf(f *Func) bool {
for _, b := range f.Blocks {
for _, v := range b.Values {
if opcodeTable[v.Op].call {
return false
}
}
}
return true
}
func (s *regAllocState) init(f *Func) {
s.f = f
s.registers = f.Config.registers
@ -510,6 +522,12 @@ func (s *regAllocState) init(f *Func) {
s.allocatable &^= 1 << 12 // R12
}
}
if s.f.Config.LinkReg != -1 {
if isLeaf(f) {
// Leaf functions don't save/restore the link register.
s.allocatable &^= 1 << uint(s.f.Config.LinkReg)
}
}
if s.f.Config.ctxt.Flag_dynlink {
switch s.f.Config.arch {
case "amd64":