runtime: faster segmented stacks

benchmark                      old ns/op    new ns/op    delta
BenchmarkStackGrowth                 665          548  -17.59%
BenchmarkStackGrowth-2               333          274  -17.72%
BenchmarkStackGrowth-4               224          168  -25.00%
BenchmarkStackGrowth-8               124           91  -26.21%
BenchmarkStackGrowth-16               82           70  -14.55%
BenchmarkStackGrowth-32               73           59  -19.49%

R=nigeltao, minux.ma, rsc
CC=golang-dev
https://golang.org/cl/7026044
This commit is contained in:
Dmitriy Vyukov 2013-01-10 11:36:40 +04:00
parent f82db7d9e4
commit 7847f328cd
2 changed files with 35 additions and 21 deletions

View file

@ -829,7 +829,7 @@ runtime·stackalloc(uint32 n)
// allocator, assuming that inside malloc all the stack
// frames are small, so that all the stack allocations
// will be a single size, the minimum (right now, 5k).
if(m->mallocing || m->gcing || n == FixedStack) {
if(n == FixedStack || m->mallocing || m->gcing) {
if(n != FixedStack) {
runtime·printf("stackalloc: in malloc, size=%d want %d", FixedStack, n);
runtime·throw("stackalloc");
@ -852,7 +852,7 @@ runtime·stackfree(void *v, uintptr n)
{
uint32 pos;
if(m->mallocing || m->gcing || n == FixedStack) {
if(n == FixedStack || m->mallocing || m->gcing) {
if(m->stackcachecnt == StackCacheSize)
stackcacherelease();
pos = m->stackcachepos;

View file

@ -1093,10 +1093,12 @@ runtime·exitsyscall(void)
void
runtime·oldstack(void)
{
Stktop *top, old;
Stktop *top;
Gobuf label;
uint32 argsize;
uintptr cret;
byte *sp;
byte *sp, *old;
uintptr *src, *dst, *dstend;
G *gp;
int64 goid;
@ -1104,24 +1106,29 @@ runtime·oldstack(void)
gp = m->curg;
top = (Stktop*)gp->stackbase;
old = (byte*)gp->stackguard - StackGuard;
sp = (byte*)top;
old = *top;
argsize = old.argsize;
argsize = top->argsize;
if(argsize > 0) {
sp -= argsize;
runtime·memmove(top->argp, sp, argsize);
dst = (uintptr*)top->argp;
dstend = dst + argsize/sizeof(*dst);
src = (uintptr*)sp;
while(dst < dstend)
*dst++ = *src++;
}
goid = old.gobuf.g->goid; // fault if g is bad, before gogo
goid = top->gobuf.g->goid; // fault if g is bad, before gogo
USED(goid);
if(old.free != 0)
runtime·stackfree((byte*)gp->stackguard - StackGuard, old.free);
gp->stackbase = (uintptr)old.stackbase;
gp->stackguard = (uintptr)old.stackguard;
label = top->gobuf;
gp->stackbase = (uintptr)top->stackbase;
gp->stackguard = (uintptr)top->stackguard;
if(top->free != 0)
runtime·stackfree(old, top->free);
cret = m->cret;
m->cret = 0; // drop reference
runtime·gogo(&old.gobuf, cret);
runtime·gogo(&label, cret);
}
// Called from reflect·call or from runtime·morestack when a new
@ -1135,18 +1142,16 @@ runtime·newstack(void)
int32 framesize, minalloc, argsize;
Stktop *top;
byte *stk, *sp;
uintptr *src, *dst, *dstend;
G *gp;
Gobuf label;
bool reflectcall;
uintptr free;
framesize = m->moreframesize;
minalloc = m->moreframesize_minalloc;
argsize = m->moreargsize;
gp = m->curg;
m->moreframesize_minalloc = 0;
if(m->morebuf.sp < gp->stackguard - StackGuard) {
runtime·printf("runtime: split stack overflow: %p < %p\n", m->morebuf.sp, gp->stackguard - StackGuard);
runtime·throw("runtime: split stack overflow");
@ -1156,12 +1161,17 @@ runtime·newstack(void)
runtime·throw("runtime: stack split argsize");
}
minalloc = 0;
reflectcall = framesize==1;
if(reflectcall)
if(reflectcall) {
framesize = 0;
if(framesize < minalloc)
framesize = minalloc;
// moreframesize_minalloc is only set in runtime·gc(),
// that calls newstack via reflect·call().
minalloc = m->moreframesize_minalloc;
m->moreframesize_minalloc = 0;
if(framesize < minalloc)
framesize = minalloc;
}
if(reflectcall && minalloc == 0 && m->morebuf.sp - sizeof(Stktop) - argsize - 32 > gp->stackguard) {
// special case: called from reflect.call (framesize==1)
@ -1209,7 +1219,11 @@ runtime·newstack(void)
sp = (byte*)top;
if(argsize > 0) {
sp -= argsize;
runtime·memmove(sp, top->argp, argsize);
dst = (uintptr*)sp;
dstend = dst + argsize/sizeof(*dst);
src = (uintptr*)top->argp;
while(dst < dstend)
*dst++ = *src++;
}
if(thechar == '5') {
// caller would have saved its LR below args.