From 4aa50434e13d12eb9755a992d6d4ad93e201d624 Mon Sep 17 00:00:00 2001 From: Keith Randall Date: Wed, 30 Jul 2014 09:01:52 -0700 Subject: [PATCH] runtime: rewrite malloc in Go. This change introduces gomallocgc, a Go clone of mallocgc. Only a few uses have been moved over, so there are still lots of uses from C. Many of these C uses will be moved over to Go (e.g. in slice.goc), but probably not all. What should remain of C's mallocgc is an open question. LGTM=rsc, dvyukov R=rsc, khr, dave, bradfitz, dvyukov CC=golang-codereviews https://golang.org/cl/108840046 --- src/cmd/api/goapi.go | 2 +- src/cmd/cc/godefs.c | 28 +- src/cmd/gc/builtin.c | 2 +- src/cmd/gc/runtime.go | 2 +- src/cmd/gc/walk.c | 2 +- src/pkg/runtime/asm_386.s | 50 +++ src/pkg/runtime/asm_amd64.s | 50 +++ src/pkg/runtime/asm_amd64p32.s | 51 +++ src/pkg/runtime/asm_arm.s | 50 +++ src/pkg/runtime/extern.go | 44 --- src/pkg/runtime/hashmap.go | 18 +- src/pkg/runtime/{malloc.goc => malloc.c} | 295 ++++------------ src/pkg/runtime/malloc.go | 426 +++++++++++++++++++++++ src/pkg/runtime/malloc.h | 8 +- src/pkg/runtime/mem.go | 3 - src/pkg/runtime/mgc0.c | 90 +++-- src/pkg/runtime/mprof.goc | 31 ++ src/pkg/runtime/proc.c | 1 + src/pkg/runtime/race.go | 7 - src/pkg/runtime/runtime.h | 17 +- src/pkg/runtime/string.go | 53 +++ src/pkg/runtime/stubs.go | 61 +++- src/pkg/runtime/stubs.goc | 87 ++--- src/pkg/runtime/traceback_arm.c | 6 + src/pkg/runtime/traceback_x86.c | 6 + test/live.go | 10 +- 26 files changed, 992 insertions(+), 408 deletions(-) rename src/pkg/runtime/{malloc.goc => malloc.c} (74%) create mode 100644 src/pkg/runtime/malloc.go diff --git a/src/cmd/api/goapi.go b/src/cmd/api/goapi.go index 007601328c..7216f4e0ed 100644 --- a/src/cmd/api/goapi.go +++ b/src/cmd/api/goapi.go @@ -378,7 +378,7 @@ func (w *Walker) parseFile(dir, file string) (*ast.File, error) { } if w.context != nil && file == fmt.Sprintf("zruntime_defs_%s_%s.go", w.context.GOOS, w.context.GOARCH) { // Just enough to keep the api checker happy. - src := "package runtime; type maptype struct{}; type _type struct{}; type alg struct{}" + src := "package runtime; type maptype struct{}; type _type struct{}; type alg struct{}; type mspan struct{}; type m struct{}; type lock struct{}" f, err = parser.ParseFile(fset, filename, src, 0) if err != nil { log.Fatalf("incorrect generated file: %s", err) diff --git a/src/cmd/cc/godefs.c b/src/cmd/cc/godefs.c index 7457bd0007..3755a8fc09 100644 --- a/src/cmd/cc/godefs.c +++ b/src/cmd/cc/godefs.c @@ -206,16 +206,36 @@ printtypename(Type *t) Bprint(&outbuf, "uint16"); break; case TLONG: - Bprint(&outbuf, "int32"); + // The 32/64-bit ambiguous types (int,uint,uintptr) + // are assigned a TLONG/TULONG to distinguish them + // from always 32-bit types which get a TINT/TUINT. + // (See int_x/uint_x in pkg/runtime/runtime.h.) + // For LONG and VLONG types, we generate the + // unqualified Go type when appropriate. + // This makes it easier to write Go code that + // modifies objects with autogenerated-from-C types. + if(ewidth[TIND] == 4) + Bprint(&outbuf, "int"); + else + Bprint(&outbuf, "int32"); break; case TULONG: - Bprint(&outbuf, "uint32"); + if(ewidth[TIND] == 4) + Bprint(&outbuf, "uint"); + else + Bprint(&outbuf, "uint32"); break; case TVLONG: - Bprint(&outbuf, "int64"); + if(ewidth[TIND] == 8) + Bprint(&outbuf, "int"); + else + Bprint(&outbuf, "int64"); break; case TUVLONG: - Bprint(&outbuf, "uint64"); + if(ewidth[TIND] == 8) + Bprint(&outbuf, "uint"); + else + Bprint(&outbuf, "uint64"); break; case TFLOAT: Bprint(&outbuf, "float32"); diff --git a/src/cmd/gc/builtin.c b/src/cmd/gc/builtin.c index 986a1de9ac..9de934b067 100644 --- a/src/cmd/gc/builtin.c +++ b/src/cmd/gc/builtin.c @@ -2,7 +2,7 @@ char *runtimeimport = "package runtime\n" "import runtime \"runtime\"\n" - "func @\"\".new (@\"\".typ·2 *byte) (? *any)\n" + "func @\"\".newobject (@\"\".typ·2 *byte) (? *any)\n" "func @\"\".panicindex ()\n" "func @\"\".panicslice ()\n" "func @\"\".panicdivide ()\n" diff --git a/src/cmd/gc/runtime.go b/src/cmd/gc/runtime.go index 6a9e68bcb4..2f282d6a03 100644 --- a/src/cmd/gc/runtime.go +++ b/src/cmd/gc/runtime.go @@ -12,7 +12,7 @@ package PACKAGE // emitted by compiler, not referred to by go programs -func new(typ *byte) *any +func newobject(typ *byte) *any func panicindex() func panicslice() func panicdivide() diff --git a/src/cmd/gc/walk.c b/src/cmd/gc/walk.c index eb9ce11316..e8d9e1ebcc 100644 --- a/src/cmd/gc/walk.c +++ b/src/cmd/gc/walk.c @@ -1915,7 +1915,7 @@ callnew(Type *t) Node *fn; dowidth(t); - fn = syslook("new", 1); + fn = syslook("newobject", 1); argtype(fn, t); return mkcall1(fn, ptrto(t), nil, typename(t)); } diff --git a/src/pkg/runtime/asm_386.s b/src/pkg/runtime/asm_386.s index 2aeb4bdeef..d2853bb0fb 100644 --- a/src/pkg/runtime/asm_386.s +++ b/src/pkg/runtime/asm_386.s @@ -195,6 +195,56 @@ TEXT runtime·mcall(SB), NOSPLIT, $0-4 JMP AX RET +// switchtoM is a dummy routine that onM leaves at the bottom +// of the G stack. We need to distinguish the routine that +// lives at the bottom of the G stack from the one that lives +// at the top of the M stack because the one at the top of +// the M stack terminates the stack walk (see topofstack()). +TEXT runtime·switchtoM(SB), NOSPLIT, $0-4 + RET + +// void onM(void (*fn)()) +// calls fn() on the M stack. +// switches to the M stack if not already on it, and +// switches back when fn() returns. +TEXT runtime·onM(SB), NOSPLIT, $0-4 + MOVL fn+0(FP), DI // DI = fn + get_tls(CX) + MOVL g(CX), AX // AX = g + MOVL g_m(AX), BX // BX = m + MOVL m_g0(BX), DX // DX = g0 + CMPL AX, DX + JEQ onm + + // save our state in g->sched. Pretend to + // be switchtoM if the G stack is scanned. + MOVL $runtime·switchtoM(SB), (g_sched+gobuf_pc)(AX) + MOVL SP, (g_sched+gobuf_sp)(AX) + MOVL AX, (g_sched+gobuf_g)(AX) + + // switch to g0 + MOVL DX, g(CX) + MOVL (g_sched+gobuf_sp)(DX), SP + + // call target function + ARGSIZE(0) + CALL DI + + // switch back to g + get_tls(CX) + MOVL g(CX), AX + MOVL g_m(AX), BX + MOVL m_curg(BX), AX + MOVL AX, g(CX) + MOVL (g_sched+gobuf_sp)(AX), SP + MOVL $0, (g_sched+gobuf_sp)(AX) + RET + +onm: + // already on m stack, just call directly + CALL DI + RET + /* * support for morestack */ diff --git a/src/pkg/runtime/asm_amd64.s b/src/pkg/runtime/asm_amd64.s index 80fc3b0d36..f396422a78 100644 --- a/src/pkg/runtime/asm_amd64.s +++ b/src/pkg/runtime/asm_amd64.s @@ -186,6 +186,56 @@ TEXT runtime·mcall(SB), NOSPLIT, $0-8 JMP AX RET +// switchtoM is a dummy routine that onM leaves at the bottom +// of the G stack. We need to distinguish the routine that +// lives at the bottom of the G stack from the one that lives +// at the top of the M stack because the one at the top of +// the M stack terminates the stack walk (see topofstack()). +TEXT runtime·switchtoM(SB), NOSPLIT, $0-8 + RET + +// void onM(void (*fn)()) +// calls fn() on the M stack. +// switches to the M stack if not already on it, and +// switches back when fn() returns. +TEXT runtime·onM(SB), NOSPLIT, $0-8 + MOVQ fn+0(FP), DI // DI = fn + get_tls(CX) + MOVQ g(CX), AX // AX = g + MOVQ g_m(AX), BX // BX = m + MOVQ m_g0(BX), DX // DX = g0 + CMPQ AX, DX + JEQ onm + + // save our state in g->sched. Pretend to + // be switchtoM if the G stack is scanned. + MOVQ $runtime·switchtoM(SB), (g_sched+gobuf_pc)(AX) + MOVQ SP, (g_sched+gobuf_sp)(AX) + MOVQ AX, (g_sched+gobuf_g)(AX) + + // switch to g0 + MOVQ DX, g(CX) + MOVQ (g_sched+gobuf_sp)(DX), SP + + // call target function + ARGSIZE(0) + CALL DI + + // switch back to g + get_tls(CX) + MOVQ g(CX), AX + MOVQ g_m(AX), BX + MOVQ m_curg(BX), AX + MOVQ AX, g(CX) + MOVQ (g_sched+gobuf_sp)(AX), SP + MOVQ $0, (g_sched+gobuf_sp)(AX) + RET + +onm: + // already on m stack, just call directly + CALL DI + RET + /* * support for morestack */ diff --git a/src/pkg/runtime/asm_amd64p32.s b/src/pkg/runtime/asm_amd64p32.s index 6a3f033e8b..8fe0cd5b79 100644 --- a/src/pkg/runtime/asm_amd64p32.s +++ b/src/pkg/runtime/asm_amd64p32.s @@ -165,6 +165,57 @@ TEXT runtime·mcall(SB), NOSPLIT, $0-4 JMP AX RET +// switchtoM is a dummy routine that onM leaves at the bottom +// of the G stack. We need to distinguish the routine that +// lives at the bottom of the G stack from the one that lives +// at the top of the M stack because the one at the top of +// the M stack terminates the stack walk (see topofstack()). +TEXT runtime·switchtoM(SB), NOSPLIT, $0-4 + RET + +// void onM(void (*fn)()) +// calls fn() on the M stack. +// switches to the M stack if not already on it, and +// switches back when fn() returns. +TEXT runtime·onM(SB), NOSPLIT, $0-4 + MOVL fn+0(FP), DI // DI = fn + get_tls(CX) + MOVL g(CX), AX // AX = g + MOVL g_m(AX), BX // BX = m + MOVL m_g0(BX), DX // DX = g0 + CMPL AX, DX + JEQ onm + + // save our state in g->sched. Pretend to + // be switchtoM if the G stack is scanned. + MOVL $runtime·switchtoM(SB), SI + MOVL SI, (g_sched+gobuf_pc)(AX) + MOVL SP, (g_sched+gobuf_sp)(AX) + MOVL AX, (g_sched+gobuf_g)(AX) + + // switch to g0 + MOVL DX, g(CX) + MOVL (g_sched+gobuf_sp)(DX), SP + + // call target function + ARGSIZE(0) + CALL DI + + // switch back to g + get_tls(CX) + MOVL g(CX), AX + MOVL g_m(AX), BX + MOVL m_curg(BX), AX + MOVL AX, g(CX) + MOVL (g_sched+gobuf_sp)(AX), SP + MOVL $0, (g_sched+gobuf_sp)(AX) + RET + +onm: + // already on m stack, just call directly + CALL DI + RET + /* * support for morestack */ diff --git a/src/pkg/runtime/asm_arm.s b/src/pkg/runtime/asm_arm.s index 1d2065c30b..ea7c1d34ef 100644 --- a/src/pkg/runtime/asm_arm.s +++ b/src/pkg/runtime/asm_arm.s @@ -178,6 +178,56 @@ TEXT runtime·mcall(SB), NOSPLIT, $-4-4 B runtime·badmcall2(SB) RET +// switchtoM is a dummy routine that onM leaves at the bottom +// of the G stack. We need to distinguish the routine that +// lives at the bottom of the G stack from the one that lives +// at the top of the M stack because the one at the top of +// the M stack terminates the stack walk (see topofstack()). +TEXT runtime·switchtoM(SB), NOSPLIT, $0-4 + MOVW $0, R0 + BL (R0) // clobber lr to ensure push {lr} is kept + RET + +// void onM(void (*fn)()) +// calls fn() on the M stack. +// switches to the M stack if not already on it, and +// switches back when fn() returns. +TEXT runtime·onM(SB), NOSPLIT, $0-4 + MOVW fn+0(FP), R0 // R0 = fn + MOVW g_m(g), R1 // R1 = m + MOVW m_g0(R1), R2 // R2 = g0 + CMP g, R2 + B.EQ onm + + // save our state in g->sched. Pretend to + // be switchtoM if the G stack is scanned. + MOVW $runtime·switchtoM(SB), R3 + ADD $4, R3, R3 // get past push {lr} + MOVW R3, (g_sched+gobuf_pc)(g) + MOVW SP, (g_sched+gobuf_sp)(g) + MOVW LR, (g_sched+gobuf_lr)(g) + MOVW g, (g_sched+gobuf_g)(g) + + // switch to g0 + MOVW R2, g + MOVW (g_sched+gobuf_sp)(R2), SP + + // call target function + ARGSIZE(0) + BL (R0) + + // switch back to g + MOVW g_m(g), R1 + MOVW m_curg(R1), g + MOVW (g_sched+gobuf_sp)(g), SP + MOVW $0, R3 + MOVW R3, (g_sched+gobuf_sp)(g) + RET + +onm: + BL (R0) + RET + /* * support for morestack */ diff --git a/src/pkg/runtime/extern.go b/src/pkg/runtime/extern.go index 053dc10147..57f09aaf7d 100644 --- a/src/pkg/runtime/extern.go +++ b/src/pkg/runtime/extern.go @@ -132,50 +132,6 @@ func funcline_go(*Func, uintptr) (string, int) func funcname_go(*Func) string func funcentry_go(*Func) uintptr -// SetFinalizer sets the finalizer associated with x to f. -// When the garbage collector finds an unreachable block -// with an associated finalizer, it clears the association and runs -// f(x) in a separate goroutine. This makes x reachable again, but -// now without an associated finalizer. Assuming that SetFinalizer -// is not called again, the next time the garbage collector sees -// that x is unreachable, it will free x. -// -// SetFinalizer(x, nil) clears any finalizer associated with x. -// -// The argument x must be a pointer to an object allocated by -// calling new or by taking the address of a composite literal. -// The argument f must be a function that takes a single argument -// to which x's type can be assigned, and can have arbitrary ignored return -// values. If either of these is not true, SetFinalizer aborts the -// program. -// -// Finalizers are run in dependency order: if A points at B, both have -// finalizers, and they are otherwise unreachable, only the finalizer -// for A runs; once A is freed, the finalizer for B can run. -// If a cyclic structure includes a block with a finalizer, that -// cycle is not guaranteed to be garbage collected and the finalizer -// is not guaranteed to run, because there is no ordering that -// respects the dependencies. -// -// The finalizer for x is scheduled to run at some arbitrary time after -// x becomes unreachable. -// There is no guarantee that finalizers will run before a program exits, -// so typically they are useful only for releasing non-memory resources -// associated with an object during a long-running program. -// For example, an os.File object could use a finalizer to close the -// associated operating system file descriptor when a program discards -// an os.File without calling Close, but it would be a mistake -// to depend on a finalizer to flush an in-memory I/O buffer such as a -// bufio.Writer, because the buffer would not be flushed at program exit. -// -// It is not guaranteed that a finalizer will run if the size of *x is -// zero bytes. -// -// A single goroutine runs all finalizers for a program, sequentially. -// If a finalizer must run for a long time, it should do so by starting -// a new goroutine. -func SetFinalizer(x, f interface{}) - func getgoroot() string // GOROOT returns the root of the Go tree. diff --git a/src/pkg/runtime/hashmap.go b/src/pkg/runtime/hashmap.go index 68ad37c901..d181f9c930 100644 --- a/src/pkg/runtime/hashmap.go +++ b/src/pkg/runtime/hashmap.go @@ -221,14 +221,14 @@ func makemap(t *maptype, hint int64) *hmap { if checkgc { memstats.next_gc = memstats.heap_alloc } - buckets = unsafe_NewArray(t.bucket, uintptr(1)< 32 kB) are allocated straight from the heap. -// If the block will be freed with runtime·free(), typ must be 0. void* runtime·mallocgc(uintptr size, Type *typ, uint32 flag) { - int32 sizeclass; - uintptr tinysize, size0, size1; - intgo rate; - MCache *c; - MSpan *s; - MLink *v, *next; - byte *tiny; + void *ret; - if(size == 0) { - // All 0-length allocations use this pointer. - // The language does not require the allocations to - // have distinct values. - return &runtime·zerobase; - } - if(g->m->mallocing) - runtime·throw("malloc/free - deadlock"); - // Disable preemption during settype. - // We can not use m->mallocing for this, because settype calls mallocgc. - g->m->locks++; - g->m->mallocing = 1; - - size0 = size; - c = g->m->mcache; - if(!runtime·debug.efence && size <= MaxSmallSize) { - if((flag&(FlagNoScan|FlagNoGC)) == FlagNoScan && size < TinySize) { - // Tiny allocator. - // - // Tiny allocator combines several tiny allocation requests - // into a single memory block. The resulting memory block - // is freed when all subobjects are unreachable. The subobjects - // must be FlagNoScan (don't have pointers), this ensures that - // the amount of potentially wasted memory is bounded. - // - // Size of the memory block used for combining (TinySize) is tunable. - // Current setting is 16 bytes, which relates to 2x worst case memory - // wastage (when all but one subobjects are unreachable). - // 8 bytes would result in no wastage at all, but provides less - // opportunities for combining. - // 32 bytes provides more opportunities for combining, - // but can lead to 4x worst case wastage. - // The best case winning is 8x regardless of block size. - // - // Objects obtained from tiny allocator must not be freed explicitly. - // So when an object will be freed explicitly, we ensure that - // its size >= TinySize. - // - // SetFinalizer has a special case for objects potentially coming - // from tiny allocator, it such case it allows to set finalizers - // for an inner byte of a memory block. - // - // The main targets of tiny allocator are small strings and - // standalone escaping variables. On a json benchmark - // the allocator reduces number of allocations by ~12% and - // reduces heap size by ~20%. - - tinysize = c->tinysize; - if(size <= tinysize) { - tiny = c->tiny; - // Align tiny pointer for required (conservative) alignment. - if((size&7) == 0) - tiny = (byte*)ROUND((uintptr)tiny, 8); - else if((size&3) == 0) - tiny = (byte*)ROUND((uintptr)tiny, 4); - else if((size&1) == 0) - tiny = (byte*)ROUND((uintptr)tiny, 2); - size1 = size + (tiny - c->tiny); - if(size1 <= tinysize) { - // The object fits into existing tiny block. - v = (MLink*)tiny; - c->tiny += size1; - c->tinysize -= size1; - g->m->mallocing = 0; - g->m->locks--; - if(g->m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack - g->stackguard0 = StackPreempt; - return v; - } - } - // Allocate a new TinySize block. - s = c->alloc[TinySizeClass]; - if(s->freelist == nil) - s = runtime·MCache_Refill(c, TinySizeClass); - v = s->freelist; - next = v->next; - s->freelist = next; - s->ref++; - if(next != nil) // prefetching nil leads to a DTLB miss - PREFETCH(next); - ((uint64*)v)[0] = 0; - ((uint64*)v)[1] = 0; - // See if we need to replace the existing tiny block with the new one - // based on amount of remaining free space. - if(TinySize-size > tinysize) { - c->tiny = (byte*)v + size; - c->tinysize = TinySize - size; - } - size = TinySize; - goto done; - } - // Allocate from mcache free lists. - // Inlined version of SizeToClass(). - if(size <= 1024-8) - sizeclass = runtime·size_to_class8[(size+7)>>3]; - else - sizeclass = runtime·size_to_class128[(size-1024+127) >> 7]; - size = runtime·class_to_size[sizeclass]; - s = c->alloc[sizeclass]; - if(s->freelist == nil) - s = runtime·MCache_Refill(c, sizeclass); - v = s->freelist; - next = v->next; - s->freelist = next; - s->ref++; - if(next != nil) // prefetching nil leads to a DTLB miss - PREFETCH(next); - if(!(flag & FlagNoZero)) { - v->next = nil; - // block is zeroed iff second word is zero ... - if(size > 2*sizeof(uintptr) && ((uintptr*)v)[1] != 0) - runtime·memclr((byte*)v, size); - } - done: - c->local_cachealloc += size; - } else { - // Allocate directly from heap. - s = largealloc(flag, &size); - v = (void*)(s->start << PageShift); - } - - if(!(flag & FlagNoGC)) - runtime·markallocated(v, size, size0, typ, !(flag&FlagNoScan)); - - g->m->mallocing = 0; - - if(raceenabled) - runtime·racemalloc(v, size); - - if(runtime·debug.allocfreetrace) - runtime·tracealloc(v, size, typ); - - if(!(flag & FlagNoProfiling) && (rate = runtime·MemProfileRate) > 0) { - if(size < rate && size < c->next_sample) - c->next_sample -= size; - else - profilealloc(v, size); - } - - g->m->locks--; - if(g->m->locks == 0 && g->preempt) // restore the preemption request in case we've cleared it in newstack - g->stackguard0 = StackPreempt; - - if(!(flag & FlagNoInvokeGC) && mstats.heap_alloc >= mstats.next_gc) - runtime·gc(0); - - return v; -} - -static MSpan* -largealloc(uint32 flag, uintptr *sizep) -{ - uintptr npages, size; - MSpan *s; - void *v; - - // Allocate directly from heap. - size = *sizep; - if(size + PageSize < size) - runtime·throw("out of memory"); - npages = size >> PageShift; - if((size & PageMask) != 0) - npages++; - s = runtime·MHeap_Alloc(&runtime·mheap, npages, 0, 1, !(flag & FlagNoZero)); - if(s == nil) - runtime·throw("out of memory"); - s->limit = (byte*)(s->start<start << PageShift); - // setup for mark sweep - runtime·markspan(v, 0, 0, true); - return s; -} - -static void -profilealloc(void *v, uintptr size) -{ - uintptr rate; - int32 next; - MCache *c; - - c = g->m->mcache; - rate = runtime·MemProfileRate; - if(size < rate) { - // pick next profile time - // If you change this, also change allocmcache. - if(rate > 0x3fffffff) // make 2*rate not overflow - rate = 0x3fffffff; - next = runtime·fastrand1() % (2*rate); - // Subtract the "remainder" of the current allocation. - // Otherwise objects that are close in size to sampling rate - // will be under-sampled, because we consistently discard this remainder. - next -= (size - c->next_sample); - if(next < 0) - next = 0; - c->next_sample = next; - } - runtime·MProf_Malloc(v, size); + // Call into the Go version of mallocgc. + // TODO: maybe someday we can get rid of this. It is + // probably the only location where we run Go code on the M stack. + runtime·cmallocgc(size, typ, flag, &ret); + return ret; } void* @@ -421,6 +211,10 @@ uintptr runtime·sizeof_C_MStats = sizeof(MStats) - (NumSizeClasses - 61) * size #define MaxArena32 (2U<<30) +// For use by Go. It can't be a constant in Go, unfortunately, +// because it depends on the OS. +uintptr runtime·maxMem = MaxMem; + void runtime·mallocinit(void) { @@ -708,11 +502,6 @@ runtime·mal(uintptr n) return runtime·mallocgc(n, nil, 0); } -#pragma textflag NOSPLIT -func new(typ *Type) (ret *uint8) { - ret = runtime·mallocgc(typ->size, typ, typ->kind&KindNoPointers ? FlagNoScan : 0); -} - static void* cnew(Type *typ, intgo n) { @@ -734,11 +523,9 @@ runtime·cnewarray(Type *typ, intgo n) return cnew(typ, n); } -func GC() { - runtime·gc(2); // force GC and do eager sweep -} - -func SetFinalizer(obj Eface, finalizer Eface) { +static void +setFinalizer(Eface obj, Eface finalizer) +{ byte *base; uintptr size; FuncType *ft; @@ -823,8 +610,52 @@ throw: runtime·throw("runtime.SetFinalizer"); } -// For testing. -func GCMask(x Eface) (mask Slice) { - runtime·getgcmask(x.data, x.type, &mask.array, &mask.len); - mask.cap = mask.len; +void +runtime·setFinalizer(void) +{ + Eface obj, finalizer; + + obj.type = g->m->ptrarg[0]; + obj.data = g->m->ptrarg[1]; + finalizer.type = g->m->ptrarg[2]; + finalizer.data = g->m->ptrarg[3]; + g->m->ptrarg[0] = nil; + g->m->ptrarg[1] = nil; + g->m->ptrarg[2] = nil; + g->m->ptrarg[3] = nil; + setFinalizer(obj, finalizer); +} + +// mcallable cache refill +void +runtime·mcacheRefill(void) +{ + runtime·MCache_Refill(g->m->mcache, (int32)g->m->scalararg[0]); +} + +void +runtime·largeAlloc(void) +{ + uintptr npages, size; + MSpan *s; + void *v; + int32 flag; + + //runtime·printf("largeAlloc size=%D\n", g->m->scalararg[0]); + // Allocate directly from heap. + size = g->m->scalararg[0]; + flag = (int32)g->m->scalararg[1]; + if(size + PageSize < size) + runtime·throw("out of memory"); + npages = size >> PageShift; + if((size & PageMask) != 0) + npages++; + s = runtime·MHeap_Alloc(&runtime·mheap, npages, 0, 1, !(flag & FlagNoZero)); + if(s == nil) + runtime·throw("out of memory"); + s->limit = (byte*)(s->start<start << PageShift); + // setup for mark sweep + runtime·markspan(v, 0, 0, true); + g->m->ptrarg[0] = s; } diff --git a/src/pkg/runtime/malloc.go b/src/pkg/runtime/malloc.go new file mode 100644 index 0000000000..cac8f966e7 --- /dev/null +++ b/src/pkg/runtime/malloc.go @@ -0,0 +1,426 @@ +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package runtime + +import ( + "unsafe" +) + +const ( + flagNoScan = 1 << 0 // GC doesn't have to scan object + flagNoProfiling = 1 << 1 // must not profile + flagNoZero = 1 << 3 // don't zero memory + flagNoInvokeGC = 1 << 4 // don't invoke GC + + kindArray = 17 + kindFunc = 19 + kindInterface = 20 + kindPtr = 22 + kindStruct = 25 + kindMask = 1<<6 - 1 + kindGCProg = 1 << 6 + kindNoPointers = 1 << 7 + + maxTinySize = 16 + tinySizeClass = 2 + maxSmallSize = 32 << 10 + + pageShift = 13 + pageSize = 1 << pageShift + pageMask = pageSize - 1 +) + +// All zero-sized allocations return a pointer to this byte. +var zeroObject byte + +// Maximum possible heap size. +var maxMem uintptr + +// Allocate an object of at least size bytes. +// Small objects are allocated from the per-thread cache's free lists. +// Large objects (> 32 kB) are allocated straight from the heap. +// If the block will be freed with runtime·free(), typ must be nil. +func gomallocgc(size uintptr, typ *_type, flags int) unsafe.Pointer { + if size == 0 { + return unsafe.Pointer(&zeroObject) + } + mp := acquirem() + if mp.mallocing != 0 { + gothrow("malloc/free - deadlock") + } + mp.mallocing = 1 + size0 := size + + c := mp.mcache + var s *mspan + var x unsafe.Pointer + if size <= maxSmallSize { + if flags&flagNoScan != 0 && size < maxTinySize { + // Tiny allocator. + // + // Tiny allocator combines several tiny allocation requests + // into a single memory block. The resulting memory block + // is freed when all subobjects are unreachable. The subobjects + // must be FlagNoScan (don't have pointers), this ensures that + // the amount of potentially wasted memory is bounded. + // + // Size of the memory block used for combining (maxTinySize) is tunable. + // Current setting is 16 bytes, which relates to 2x worst case memory + // wastage (when all but one subobjects are unreachable). + // 8 bytes would result in no wastage at all, but provides less + // opportunities for combining. + // 32 bytes provides more opportunities for combining, + // but can lead to 4x worst case wastage. + // The best case winning is 8x regardless of block size. + // + // Objects obtained from tiny allocator must not be freed explicitly. + // So when an object will be freed explicitly, we ensure that + // its size >= maxTinySize. + // + // SetFinalizer has a special case for objects potentially coming + // from tiny allocator, it such case it allows to set finalizers + // for an inner byte of a memory block. + // + // The main targets of tiny allocator are small strings and + // standalone escaping variables. On a json benchmark + // the allocator reduces number of allocations by ~12% and + // reduces heap size by ~20%. + + tinysize := uintptr(c.tinysize) + if size <= tinysize { + tiny := unsafe.Pointer(c.tiny) + // Align tiny pointer for required (conservative) alignment. + if size&7 == 0 { + tiny = roundup(tiny, 8) + } else if size&3 == 0 { + tiny = roundup(tiny, 4) + } else if size&1 == 0 { + tiny = roundup(tiny, 2) + } + size1 := size + (uintptr(tiny) - uintptr(unsafe.Pointer(c.tiny))) + if size1 <= tinysize { + // The object fits into existing tiny block. + x = tiny + c.tiny = (*byte)(add(x, size)) + c.tinysize -= uint(size1) + mp.mallocing = 0 + releasem(mp) + return x + } + } + // Allocate a new maxTinySize block. + s = c.alloc[tinySizeClass] + v := s.freelist + if v == nil { + mp.scalararg[0] = tinySizeClass + onM(&mcacheRefill) + s = c.alloc[tinySizeClass] + v = s.freelist + } + s.freelist = v.next + s.ref++ + //TODO: prefetch v.next + x = unsafe.Pointer(v) + (*[2]uint64)(x)[0] = 0 + (*[2]uint64)(x)[1] = 0 + // See if we need to replace the existing tiny block with the new one + // based on amount of remaining free space. + if maxTinySize-size > tinysize { + c.tiny = (*byte)(add(x, size)) + c.tinysize = uint(maxTinySize - size) + } + size = maxTinySize + } else { + var sizeclass int8 + if size <= 1024-8 { + sizeclass = size_to_class8[(size+7)>>3] + } else { + sizeclass = size_to_class128[(size-1024+127)>>7] + } + size = uintptr(class_to_size[sizeclass]) + s = c.alloc[sizeclass] + v := s.freelist + if v == nil { + mp.scalararg[0] = uint(sizeclass) + onM(&mcacheRefill) + s = c.alloc[sizeclass] + v = s.freelist + } + s.freelist = v.next + s.ref++ + //TODO: prefetch + x = unsafe.Pointer(v) + if flags&flagNoZero == 0 { + v.next = nil + if size > 2*ptrSize && ((*[2]uintptr)(x))[1] != 0 { + memclr(unsafe.Pointer(v), size) + } + } + } + c.local_cachealloc += int(size) + } else { + mp.scalararg[0] = uint(size) + mp.scalararg[1] = uint(flags) + onM(&largeAlloc) + s = (*mspan)(mp.ptrarg[0]) + mp.ptrarg[0] = nil + x = unsafe.Pointer(uintptr(s.start << pageShift)) + size = uintptr(s.elemsize) + } + + // TODO: write markallocated in Go + mp.ptrarg[0] = x + mp.scalararg[0] = uint(size) + mp.scalararg[1] = uint(size0) + mp.ptrarg[1] = unsafe.Pointer(typ) + mp.scalararg[2] = uint(flags & flagNoScan) + onM(&markallocated_m) + + mp.mallocing = 0 + + if raceenabled { + racemalloc(x, size) + } + if debug.allocfreetrace != 0 { + tracealloc(x, size, typ) + } + if flags&flagNoProfiling == 0 { + rate := MemProfileRate + if rate > 0 { + if size < uintptr(rate) && int32(size) < c.next_sample { + c.next_sample -= int32(size) + } else { + profilealloc(mp, x, size) + } + } + } + + releasem(mp) + + if flags&flagNoInvokeGC == 0 && memstats.heap_alloc >= memstats.next_gc { + gogc(0) + } + + return x +} + +// cmallocgc is a trampoline used to call the Go malloc from C. +func cmallocgc(size uintptr, typ *_type, flags int, ret *unsafe.Pointer) { + *ret = gomallocgc(size, typ, flags) +} + +// implementation of new builtin +func newobject(typ *_type) unsafe.Pointer { + flags := 0 + if typ.kind&kindNoPointers != 0 { + flags |= flagNoScan + } + return gomallocgc(uintptr(typ.size), typ, flags) +} + +// implementation of make builtin for slices +func newarray(typ *_type, n uintptr) unsafe.Pointer { + flags := 0 + if typ.kind&kindNoPointers != 0 { + flags |= flagNoScan + } + if int(n) < 0 || (typ.size > 0 && n > maxMem/uintptr(typ.size)) { + panic("runtime: allocation size out of range") + } + return gomallocgc(uintptr(typ.size)*n, typ, flags) +} + +// round size up to next size class +func goroundupsize(size uintptr) uintptr { + if size < maxSmallSize { + if size <= 1024-8 { + return uintptr(class_to_size[size_to_class8[(size+7)>>3]]) + } + return uintptr(class_to_size[size_to_class128[(size-1024+127)>>7]]) + } + if size+pageSize < size { + return size + } + return (size + pageSize - 1) &^ pageMask +} + +func profilealloc(mp *m, x unsafe.Pointer, size uintptr) { + c := mp.mcache + rate := MemProfileRate + if size < uintptr(rate) { + // pick next profile time + // If you change this, also change allocmcache. + if rate > 0x3fffffff { // make 2*rate not overflow + rate = 0x3fffffff + } + next := int32(fastrand2()) % (2 * int32(rate)) + // Subtract the "remainder" of the current allocation. + // Otherwise objects that are close in size to sampling rate + // will be under-sampled, because we consistently discard this remainder. + next -= (int32(size) - c.next_sample) + if next < 0 { + next = 0 + } + c.next_sample = next + } + mp.scalararg[0] = uint(size) + mp.ptrarg[0] = x + onM(&mprofMalloc) +} + +// force = 1 - do GC regardless of current heap usage +// force = 2 - go GC and eager sweep +func gogc(force int32) { + if memstats.enablegc == 0 { + return + } + + // TODO: should never happen? Only C calls malloc while holding a lock? + mp := acquirem() + if mp.locks > 1 { + releasem(mp) + return + } + releasem(mp) + + if panicking != 0 { + return + } + if gcpercent == gcpercentUnknown { + golock(&mheap_.lock) + if gcpercent == gcpercentUnknown { + gcpercent = goreadgogc() + } + gounlock(&mheap_.lock) + } + if gcpercent < 0 { + return + } + + semacquire(&worldsema, false) + + if force == 0 && memstats.heap_alloc < memstats.next_gc { + // typically threads which lost the race to grab + // worldsema exit here when gc is done. + semrelease(&worldsema) + return + } + + // Ok, we're doing it! Stop everybody else + startTime := gonanotime() + mp = acquirem() + mp.gcing = 1 + stoptheworld() + + clearpools() + + // Run gc on the g0 stack. We do this so that the g stack + // we're currently running on will no longer change. Cuts + // the root set down a bit (g0 stacks are not scanned, and + // we don't need to scan gc's internal state). We also + // need to switch to g0 so we can shrink the stack. + n := 1 + if debug.gctrace > 1 { + n = 2 + } + for i := 0; i < n; i++ { + if i > 0 { + startTime = gonanotime() + } + // switch to g0, call gc, then switch back + mp.scalararg[0] = uint(startTime) + if force >= 2 { + mp.scalararg[1] = 1 // eagersweep + } else { + mp.scalararg[1] = 0 + } + onM(&mgc2) + } + + // all done + mp.gcing = 0 + semrelease(&worldsema) + starttheworld() + releasem(mp) + + // now that gc is done, kick off finalizer thread if needed + if !concurrentSweep { + // give the queued finalizers, if any, a chance to run + gosched() + } +} + +// GC runs a garbage collection. +func GC() { + gogc(2) +} + +// SetFinalizer sets the finalizer associated with x to f. +// When the garbage collector finds an unreachable block +// with an associated finalizer, it clears the association and runs +// f(x) in a separate goroutine. This makes x reachable again, but +// now without an associated finalizer. Assuming that SetFinalizer +// is not called again, the next time the garbage collector sees +// that x is unreachable, it will free x. +// +// SetFinalizer(x, nil) clears any finalizer associated with x. +// +// The argument x must be a pointer to an object allocated by +// calling new or by taking the address of a composite literal. +// The argument f must be a function that takes a single argument +// to which x's type can be assigned, and can have arbitrary ignored return +// values. If either of these is not true, SetFinalizer aborts the +// program. +// +// Finalizers are run in dependency order: if A points at B, both have +// finalizers, and they are otherwise unreachable, only the finalizer +// for A runs; once A is freed, the finalizer for B can run. +// If a cyclic structure includes a block with a finalizer, that +// cycle is not guaranteed to be garbage collected and the finalizer +// is not guaranteed to run, because there is no ordering that +// respects the dependencies. +// +// The finalizer for x is scheduled to run at some arbitrary time after +// x becomes unreachable. +// There is no guarantee that finalizers will run before a program exits, +// so typically they are useful only for releasing non-memory resources +// associated with an object during a long-running program. +// For example, an os.File object could use a finalizer to close the +// associated operating system file descriptor when a program discards +// an os.File without calling Close, but it would be a mistake +// to depend on a finalizer to flush an in-memory I/O buffer such as a +// bufio.Writer, because the buffer would not be flushed at program exit. +// +// It is not guaranteed that a finalizer will run if the size of *x is +// zero bytes. +// +// A single goroutine runs all finalizers for a program, sequentially. +// If a finalizer must run for a long time, it should do so by starting +// a new goroutine. +func SetFinalizer(obj interface{}, finalizer interface{}) { + // We do just enough work here to make the mcall type safe. + // The rest is done on the M stack. + e := (*eface)(unsafe.Pointer(&obj)) + typ := e._type + if typ == nil { + gothrow("runtime.SetFinalizer: first argument is nil") + } + if typ.kind&kindMask != kindPtr { + gothrow("runtime.SetFinalizer: first argument is " + *typ._string + ", not pointer") + } + + f := (*eface)(unsafe.Pointer(&finalizer)) + ftyp := f._type + if ftyp != nil && ftyp.kind&kindMask != kindFunc { + gothrow("runtime.SetFinalizer: second argument is " + *ftyp._string + ", not a function") + } + mp := acquirem() + mp.ptrarg[0] = unsafe.Pointer(typ) + mp.ptrarg[1] = e.data + mp.ptrarg[2] = unsafe.Pointer(ftyp) + mp.ptrarg[3] = f.data + onM(&setFinalizer) + releasem(mp) +} diff --git a/src/pkg/runtime/malloc.h b/src/pkg/runtime/malloc.h index a6425581f3..50656e4ee9 100644 --- a/src/pkg/runtime/malloc.h +++ b/src/pkg/runtime/malloc.h @@ -280,7 +280,7 @@ struct MStats } by_size[NumSizeClasses]; }; -#define mstats runtime·memStats +#define mstats runtime·memstats extern MStats mstats; void runtime·updatememstats(GCStats *stats); @@ -500,6 +500,7 @@ struct MHeap uint64 nlargefree; // number of frees for large objects (>MaxSmallSize) uint64 nsmallfree[NumSizeClasses]; // number of frees for small objects (<=MaxSmallSize) }; +#define runtime·mheap runtime·mheap_ extern MHeap runtime·mheap; void runtime·MHeap_Init(MHeap *h); @@ -531,6 +532,10 @@ void runtime·tracealloc(void*, uintptr, Type*); void runtime·tracefree(void*, uintptr); void runtime·tracegc(void); +int32 runtime·gcpercent; +int32 runtime·readgogc(void); +void runtime·clearpools(void); + enum { // flags to malloc @@ -551,6 +556,7 @@ void runtime·gchelper(void); void runtime·createfing(void); G* runtime·wakefing(void); void runtime·getgcmask(byte*, Type*, byte**, uintptr*); +extern G* runtime·fing; extern bool runtime·fingwait; extern bool runtime·fingwake; diff --git a/src/pkg/runtime/mem.go b/src/pkg/runtime/mem.go index 0fec501e7a..34391b2eb2 100644 --- a/src/pkg/runtime/mem.go +++ b/src/pkg/runtime/mem.go @@ -70,6 +70,3 @@ func init() { // ReadMemStats populates m with memory allocator statistics. func ReadMemStats(m *MemStats) - -// GC runs a garbage collection. -func GC() diff --git a/src/pkg/runtime/mgc0.c b/src/pkg/runtime/mgc0.c index 082aedeb37..5e1236c24f 100644 --- a/src/pkg/runtime/mgc0.c +++ b/src/pkg/runtime/mgc0.c @@ -81,7 +81,7 @@ enum { #define GcpercentUnknown (-2) // Initialized from $GOGC. GOGC=off means no gc. -static int32 gcpercent = GcpercentUnknown; +extern int32 runtime·gcpercent = GcpercentUnknown; static FuncVal* poolcleanup; @@ -91,8 +91,8 @@ sync·runtime_registerPoolCleanup(FuncVal *f) poolcleanup = f; } -static void -clearpools(void) +void +runtime·clearpools(void) { P *p, **pp; MCache *c; @@ -174,7 +174,6 @@ bool runtime·fingwait; bool runtime·fingwake; static Lock gclock; -static G* fing; static void runfinq(void); static void bgsweep(void); @@ -670,6 +669,8 @@ scanframe(Stkframe *frame, void *unused) // Frame is dead. return true; } + if(Debug > 1) + runtime·printf("scanframe %s\n", runtime·funcname(f)); if(targetpc != f->entry) targetpc--; pcdata = runtime·pcdatavalue(f, PCDATA_StackMapIndex, targetpc); @@ -971,7 +972,7 @@ runtime·MSpan_Sweep(MSpan *s) runtime·MHeap_Free(&runtime·mheap, s, 1); c->local_nlargefree++; c->local_largefree += size; - runtime·xadd64(&mstats.next_gc, -(uint64)(size * (gcpercent + 100)/100)); + runtime·xadd64(&mstats.next_gc, -(uint64)(size * (runtime·gcpercent + 100)/100)); res = true; } else { // Free small object. @@ -1005,7 +1006,7 @@ runtime·MSpan_Sweep(MSpan *s) if(nfree > 0) { c->local_nsmallfree[cl] += nfree; c->local_cachealloc -= nfree * size; - runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (gcpercent + 100)/100)); + runtime·xadd64(&mstats.next_gc, -(uint64)(nfree * size * (runtime·gcpercent + 100)/100)); res = runtime·MCentral_FreeSpan(&runtime·mheap.central[cl], s, nfree, head.next, end); // MCentral_FreeSpan updates sweepgen } @@ -1238,8 +1239,8 @@ struct gc_args static void gc(struct gc_args *args); static void mgc(G *gp); -static int32 -readgogc(void) +int32 +runtime·readgogc(void) { byte *p; @@ -1259,16 +1260,8 @@ runtime·gc(int32 force) struct gc_args a; int32 i; - // The atomic operations are not atomic if the uint64s - // are not aligned on uint64 boundaries. This has been - // a problem in the past. - if((((uintptr)&work.empty) & 7) != 0) - runtime·throw("runtime: gc work buffer is misaligned"); - if((((uintptr)&work.full) & 7) != 0) - runtime·throw("runtime: gc work buffer is misaligned"); if(sizeof(Workbuf) != WorkbufSize) runtime·throw("runtime: size of Workbuf is suboptimal"); - // The gc is turned off (via enablegc) until // the bootstrap has completed. // Also, malloc gets called in the guts @@ -1280,13 +1273,13 @@ runtime·gc(int32 force) if(!mstats.enablegc || g == g->m->g0 || g->m->locks > 0 || runtime·panicking) return; - if(gcpercent == GcpercentUnknown) { // first time through + if(runtime·gcpercent == GcpercentUnknown) { // first time through runtime·lock(&runtime·mheap); - if(gcpercent == GcpercentUnknown) - gcpercent = readgogc(); + if(runtime·gcpercent == GcpercentUnknown) + runtime·gcpercent = runtime·readgogc(); runtime·unlock(&runtime·mheap); } - if(gcpercent < 0) + if(runtime·gcpercent < 0) return; runtime·semacquire(&runtime·worldsema, false); @@ -1303,7 +1296,7 @@ runtime·gc(int32 force) g->m->gcing = 1; runtime·stoptheworld(); - clearpools(); + runtime·clearpools(); // Run gc on the g0 stack. We do this so that the g stack // we're currently running on will no longer change. Cuts @@ -1343,6 +1336,23 @@ mgc(G *gp) runtime·gogo(&gp->sched); } +void +runtime·mgc2(void) +{ + struct gc_args a; + G *gp; + + gp = g->m->curg; + gp->status = Gwaiting; + gp->waitreason = "garbage collection"; + + a.start_time = g->m->scalararg[0]; + a.eagersweep = g->m->scalararg[1]; + gc(&a); + + gp->status = Grunning; +} + static void gc(struct gc_args *args) { @@ -1409,10 +1419,10 @@ gc(struct gc_args *args) cachestats(); // next_gc calculation is tricky with concurrent sweep since we don't know size of live heap // estimate what was live heap size after previous GC (for tracing only) - heap0 = mstats.next_gc*100/(gcpercent+100); + heap0 = mstats.next_gc*100/(runtime·gcpercent+100); // conservatively set next_gc to high value assuming that everything is live // concurrent/lazy sweep will reduce this number while discovering new garbage - mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*gcpercent/100; + mstats.next_gc = mstats.heap_alloc+mstats.heap_alloc*runtime·gcpercent/100; t4 = runtime·nanotime(); mstats.last_gc = runtime·unixnanotime(); // must be Unix time to make sense to user @@ -1554,12 +1564,12 @@ runtime·setgcpercent(int32 in) { int32 out; runtime·lock(&runtime·mheap); - if(gcpercent == GcpercentUnknown) - gcpercent = readgogc(); - out = gcpercent; + if(runtime·gcpercent == GcpercentUnknown) + runtime·gcpercent = runtime·readgogc(); + out = runtime·gcpercent; if(in < 0) in = -1; - gcpercent = in; + runtime·gcpercent = in; runtime·unlock(&runtime·mheap); return out; } @@ -1678,17 +1688,24 @@ runfinq(void) void runtime·createfing(void) { - if(fing != nil) + if(runtime·fing != nil) return; // Here we use gclock instead of finlock, // because newproc1 can allocate, which can cause on-demand span sweep, // which can queue finalizers, which would deadlock. runtime·lock(&gclock); - if(fing == nil) - fing = runtime·newproc1(&runfinqv, nil, 0, 0, runtime·gc); + if(runtime·fing == nil) + runtime·fing = runtime·newproc1(&runfinqv, nil, 0, 0, runtime·gc); runtime·unlock(&gclock); } +void +runtime·createfingM(G *gp) +{ + runtime·createfing(); + runtime·gogo(&gp->sched); +} + G* runtime·wakefing(void) { @@ -1699,7 +1716,7 @@ runtime·wakefing(void) if(runtime·fingwait && runtime·fingwake) { runtime·fingwait = false; runtime·fingwake = false; - res = fing; + res = runtime·fing; } runtime·unlock(&finlock); return res; @@ -1944,6 +1961,17 @@ runtime·markallocated(void *v, uintptr size, uintptr size0, Type *typ, bool sca } } +void +runtime·markallocated_m(void) +{ + M *mp; + + mp = g->m; + runtime·markallocated(mp->ptrarg[0], mp->scalararg[0], mp->scalararg[1], mp->ptrarg[1], mp->scalararg[2] == 0); + mp->ptrarg[0] = nil; + mp->ptrarg[1] = nil; +} + // mark the block at v as freed. void runtime·markfreed(void *v) diff --git a/src/pkg/runtime/mprof.goc b/src/pkg/runtime/mprof.goc index 87ec70ef0b..69187f2a74 100644 --- a/src/pkg/runtime/mprof.goc +++ b/src/pkg/runtime/mprof.goc @@ -140,6 +140,37 @@ runtime·MProf_Malloc(void *p, uintptr size) runtime·setprofilebucket(p, b); } +// Called by malloc to record a profiled block. +void +runtime·mprofMalloc(void) +{ + uintptr stk[32]; + Bucket *b; + int32 nstk; + uintptr size; + void *p; + + size = g->m->scalararg[0]; + p = g->m->ptrarg[0]; + g->m->ptrarg[0] = nil; + + if(g->m->curg == nil) + nstk = runtime·callers(1, stk, nelem(stk)); + else + nstk = runtime·gcallers(g->m->curg, 1, stk, nelem(stk)); + runtime·lock(&proflock); + b = stkbucket(MProf, size, stk, nstk, true); + b->recent_allocs++; + b->recent_alloc_bytes += size; + runtime·unlock(&proflock); + + // Setprofilebucket locks a bunch of other mutexes, so we call it outside of proflock. + // This reduces potential contention and chances of deadlocks. + // Since the object must be alive during call to MProf_Malloc, + // it's fine to do this non-atomically. + runtime·setprofilebucket(p, b); +} + // Called when freeing a profiled block. void runtime·MProf_Free(Bucket *b, uintptr size, bool freed) diff --git a/src/pkg/runtime/proc.c b/src/pkg/runtime/proc.c index 1e7fdd421e..e21da4f309 100644 --- a/src/pkg/runtime/proc.c +++ b/src/pkg/runtime/proc.c @@ -3136,6 +3136,7 @@ runtime·topofstack(Func *f) return f->entry == (uintptr)runtime·goexit || f->entry == (uintptr)runtime·mstart || f->entry == (uintptr)runtime·mcall || + f->entry == (uintptr)runtime·onM || f->entry == (uintptr)runtime·morestack || f->entry == (uintptr)runtime·lessstack || f->entry == (uintptr)_rt0_go || diff --git a/src/pkg/runtime/race.go b/src/pkg/runtime/race.go index a2c9cbb152..bdb7193ffb 100644 --- a/src/pkg/runtime/race.go +++ b/src/pkg/runtime/race.go @@ -12,13 +12,6 @@ import ( "unsafe" ) -const ( - // TODO: where should these live? - kindNoPointers = 1 << 7 - kindArray = 17 - kindStruct = 25 -) - // RaceDisable disables handling of race events in the current goroutine. func RaceDisable() diff --git a/src/pkg/runtime/runtime.h b/src/pkg/runtime/runtime.h index d21112de5a..3690ad37d7 100644 --- a/src/pkg/runtime/runtime.h +++ b/src/pkg/runtime/runtime.h @@ -22,10 +22,17 @@ typedef int64 intptr; typedef int64 intgo; // Go's int typedef uint64 uintgo; // Go's uint #else -typedef uint32 uintptr; -typedef int32 intptr; -typedef int32 intgo; // Go's int -typedef uint32 uintgo; // Go's uint +// Normally, "int" == "long int" == 32 bits. +// However, the C compiler uses this distinction +// to disambiguate true 32 bit ints (e.g. int32) +// from 32/64 bit ints (e.g. uintptr) so that it +// can generate the corresponding go type correctly. +typedef signed long int int32_x; +typedef unsigned long int uint32_x; +typedef uint32_x uintptr; +typedef int32_x intptr; +typedef int32_x intgo; // Go's int +typedef uint32_x uintgo; // Go's uint #endif #ifdef _64BITREG @@ -874,6 +881,7 @@ uintptr runtime·getcallersp(void*); int32 runtime·mcount(void); int32 runtime·gcount(void); void runtime·mcall(void(*)(G*)); +void runtime·onM(void(*)(void)); uint32 runtime·fastrand1(void); void runtime·rewindmorestack(Gobuf*); int32 runtime·timediv(int64, int32, int32*); @@ -916,6 +924,7 @@ void runtime·exitsyscall(void); G* runtime·newproc1(FuncVal*, byte*, int32, int32, void*); bool runtime·sigsend(int32 sig); int32 runtime·callers(int32, uintptr*, int32); +int32 runtime·gcallers(G*, int32, uintptr*, int32); int64 runtime·nanotime(void); // monotonic time int64 runtime·unixnanotime(void); // real time, can skip void runtime·dopanic(int32); diff --git a/src/pkg/runtime/string.go b/src/pkg/runtime/string.go index 475c837e36..c5b0917482 100644 --- a/src/pkg/runtime/string.go +++ b/src/pkg/runtime/string.go @@ -202,3 +202,56 @@ func stringiter2(s string, k int) (int, rune) { r, n := charntorune(s[k:]) return k + n, r } + +// rawstring allocates storage for a new string. The returned +// string and byte slice both refer to the same storage. +// The storage is not zeroed. Callers should use +// b to set the string contents and then drop b. +func rawstring(size int) (s string, b []byte) { + p := gomallocgc(uintptr(size), nil, flagNoScan|flagNoZero) + + (*stringStruct)(unsafe.Pointer(&s)).str = p + (*stringStruct)(unsafe.Pointer(&s)).len = size + + (*slice)(unsafe.Pointer(&b)).array = (*uint8)(p) + (*slice)(unsafe.Pointer(&b)).len = uint(size) + (*slice)(unsafe.Pointer(&b)).cap = uint(size) + + for { + ms := maxstring + if uintptr(size) <= uintptr(ms) || gocasx((*uintptr)(unsafe.Pointer(&maxstring)), uintptr(ms), uintptr(size)) { + return + } + } +} + +// rawbyteslice allocates a new byte slice. The byte slice is not zeroed. +func rawbyteslice(size int) (b []byte) { + cap := goroundupsize(uintptr(size)) + p := gomallocgc(cap, nil, flagNoScan|flagNoZero) + if cap != uintptr(size) { + memclr(add(p, uintptr(size)), cap-uintptr(size)) + } + + (*slice)(unsafe.Pointer(&b)).array = (*uint8)(p) + (*slice)(unsafe.Pointer(&b)).len = uint(size) + (*slice)(unsafe.Pointer(&b)).cap = uint(cap) + return +} + +// rawruneslice allocates a new rune slice. The rune slice is not zeroed. +func rawruneslice(size int) (b []rune) { + if uintptr(size) > maxMem/4 { + gothrow("out of memory") + } + mem := goroundupsize(uintptr(size) * 4) + p := gomallocgc(mem, nil, flagNoScan|flagNoZero) + if mem != uintptr(size)*4 { + memclr(add(p, uintptr(size)*4), mem-uintptr(size)*4) + } + + (*slice)(unsafe.Pointer(&b)).array = (*uint8)(p) + (*slice)(unsafe.Pointer(&b)).len = uint(size) + (*slice)(unsafe.Pointer(&b)).cap = uint(mem / 4) + return +} diff --git a/src/pkg/runtime/stubs.go b/src/pkg/runtime/stubs.go index 17177bba27..ceb8031936 100644 --- a/src/pkg/runtime/stubs.go +++ b/src/pkg/runtime/stubs.go @@ -15,18 +15,6 @@ const ( ptrSize = unsafe.Sizeof((*byte)(nil)) ) -// rawstring allocates storage for a new string. The returned -// string and byte slice both refer to the same storage. -// The storage is not zeroed. Callers should use -// b to set the string contents and then drop b. -func rawstring(size int) (string, []byte) - -// rawbyteslice allocates a new byte slice. The byte slice is not zeroed. -func rawbyteslice(size int) []byte - -// rawruneslice allocates a new rune slice. The rune slice is not zeroed. -func rawruneslice(size int) []rune - //go:noescape func gogetcallerpc(p unsafe.Pointer) uintptr @@ -44,15 +32,38 @@ func add(p unsafe.Pointer, x uintptr) unsafe.Pointer { return unsafe.Pointer(uintptr(p) + x) } -// Make a new object of the given type +// n must be a power of 2 +func roundup(p unsafe.Pointer, n uintptr) unsafe.Pointer { + return unsafe.Pointer((uintptr(p) + n - 1) &^ (n - 1)) +} + // in stubs.goc -func unsafe_New(t *_type) unsafe.Pointer -func unsafe_NewArray(t *_type, n uintptr) unsafe.Pointer +func acquirem() *m +func releasem(mp *m) + +// in asm_*.s +func mcall(fn *byte) +func onM(fn *byte) + +// C routines that run on the M stack. Call these like +// mcall(&mcacheRefill) +// Arguments should be passed in m->scalararg[x] and +// m->ptrarg[x]. Return values can be passed in those +// same slots. +var mcacheRefill byte +var largeAlloc byte +var mprofMalloc byte +var mgc2 byte +var setFinalizer byte +var markallocated_m byte // memclr clears n bytes starting at ptr. // in memclr_*.s func memclr(ptr unsafe.Pointer, n uintptr) +func racemalloc(p unsafe.Pointer, size uintptr) +func tracealloc(p unsafe.Pointer, size uintptr, typ *_type) + // memmove copies n bytes from "from" to "to". // in memmove_*.s func memmove(to unsafe.Pointer, from unsafe.Pointer, n uintptr) @@ -60,11 +71,26 @@ func memmove(to unsafe.Pointer, from unsafe.Pointer, n uintptr) // in asm_*.s func fastrand2() uint32 +const ( + gcpercentUnknown = -2 + concurrentSweep = true +) + // in asm_*.s // if *p == x { *p = y; return true } else { return false }, atomically //go:noescape func gocas(p *uint32, x uint32, y uint32) bool +//go:noescape +func gocasx(p *uintptr, x uintptr, y uintptr) bool + +func goreadgogc() int32 +func gonanotime() int64 +func gosched() +func starttheworld() +func stoptheworld() +func clearpools() + // in asm_*.s //go:noescape func gohash(a *alg, p unsafe.Pointer, size uintptr, seed uintptr) uintptr @@ -86,3 +112,8 @@ var nohashcode uintptr // Go version of runtime.throw. // in panic.c func gothrow(s string) + +func golock(x *lock) +func gounlock(x *lock) +func semacquire(*uint32, bool) +func semrelease(*uint32) diff --git a/src/pkg/runtime/stubs.goc b/src/pkg/runtime/stubs.goc index 6b7e83ad74..c64e73de05 100644 --- a/src/pkg/runtime/stubs.goc +++ b/src/pkg/runtime/stubs.goc @@ -6,6 +6,7 @@ package runtime #include "runtime.h" #include "arch_GOARCH.h" #include "malloc.h" +#include "stack.h" #include "../../cmd/ld/textflag.h" // This file contains functions called by Go but written @@ -23,51 +24,17 @@ package runtime // finished converting runtime support code from C to Go. #pragma textflag NOSPLIT -func rawstring(size intgo) (s String, b Slice) { - uintptr ms; - byte *p; - - p = runtime·mallocgc(size, 0, FlagNoScan|FlagNoZero); - s.str = p; - s.len = size; - b.array = p; - b.len = size; - b.cap = size; - for(;;) { - ms = runtime·maxstring; - if((uintptr)size <= ms || runtime·casp((void**)&runtime·maxstring, (void*)ms, (void*)size)) - break; - } +func golock(p *Lock) { + runtime·lock(p); +} +#pragma textflag NOSPLIT +func gounlock(p *Lock) { + runtime·unlock(p); } #pragma textflag NOSPLIT -func rawbyteslice(size intgo) (b Slice) { - uintptr cap; - byte *p; - - cap = runtime·roundupsize(size); - p = runtime·mallocgc(cap, 0, FlagNoScan|FlagNoZero); - if(cap != size) - runtime·memclr(p + size, cap - size); - b.array = p; - b.len = size; - b.cap = cap; -} - -#pragma textflag NOSPLIT -func rawruneslice(size intgo) (b Slice) { - uintptr mem; - byte *p; - - if(size > MaxMem/sizeof(int32)) - runtime·throw("out of memory"); - mem = runtime·roundupsize(size*sizeof(int32)); - p = runtime·mallocgc(mem, 0, FlagNoScan|FlagNoZero); - if(mem != size*sizeof(int32)) - runtime·memclr(p + size*sizeof(int32), mem - size*sizeof(int32)); - b.array = p; - b.len = size; - b.cap = mem/sizeof(int32); +func goreadgogc() (r int32) { + r = runtime·readgogc(); } // entry point for testing @@ -77,16 +44,38 @@ func gostringW(str Slice) (s String) { } #pragma textflag NOSPLIT -func runtime·unsafe_New(t *Type) (ret *byte) { - ret = runtime·cnew(t); -} - -#pragma textflag NOSPLIT -func runtime·unsafe_NewArray(t *Type, n int) (ret *byte) { - ret = runtime·cnewarray(t, n); +func gonanotime() (r int64) { + r = runtime·nanotime(); } #pragma textflag NOSPLIT func runtime·gocas(p *uint32, x uint32, y uint32) (ret bool) { ret = runtime·cas(p, x, y); } + +#pragma textflag NOSPLIT +func runtime·gocasx(p *uintptr, x uintptr, y uintptr) (ret bool) { + ret = runtime·casp((void**)p, (void*)x, (void*)y); +} + +#pragma textflag NOSPLIT +func runtime·acquirem() (ret *M) { + ret = g->m; + ret->locks++; +} + +#pragma textflag NOSPLIT +func runtime·releasem(mp *M) { + mp->locks--; + if(mp->locks == 0 && g->preempt) { + // restore the preemption request in case we've cleared it in newstack + g->stackguard0 = StackPreempt; + } +} + +// For testing. +// TODO: find a better place for this. +func GCMask(x Eface) (mask Slice) { + runtime·getgcmask(x.data, x.type, &mask.array, &mask.len); + mask.cap = mask.len; +} diff --git a/src/pkg/runtime/traceback_arm.c b/src/pkg/runtime/traceback_arm.c index 3595002d89..c297582059 100644 --- a/src/pkg/runtime/traceback_arm.c +++ b/src/pkg/runtime/traceback_arm.c @@ -350,3 +350,9 @@ runtime·callers(int32 skip, uintptr *pcbuf, int32 m) return runtime·gentraceback(pc, sp, 0, g, skip, pcbuf, m, nil, nil, false); } + +int32 +runtime·gcallers(G *gp, int32 skip, uintptr *pcbuf, int32 m) +{ + return runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, skip, pcbuf, m, nil, nil, false); +} diff --git a/src/pkg/runtime/traceback_x86.c b/src/pkg/runtime/traceback_x86.c index 0ecaecf9cf..23952b1726 100644 --- a/src/pkg/runtime/traceback_x86.c +++ b/src/pkg/runtime/traceback_x86.c @@ -428,3 +428,9 @@ runtime·callers(int32 skip, uintptr *pcbuf, int32 m) return runtime·gentraceback(pc, sp, 0, g, skip, pcbuf, m, nil, nil, false); } + +int32 +runtime·gcallers(G *gp, int32 skip, uintptr *pcbuf, int32 m) +{ + return runtime·gentraceback(~(uintptr)0, ~(uintptr)0, 0, gp, skip, pcbuf, m, nil, nil, false); +} diff --git a/test/live.go b/test/live.go index fd52798473..6ac1d6a464 100644 --- a/test/live.go +++ b/test/live.go @@ -397,9 +397,9 @@ func f27defer(b bool) { func f27go(b bool) { x := 0 if b { - go call27(func() {x++}) // ERROR "live at call to new: &x" "live at call to newproc: &x$" + go call27(func() {x++}) // ERROR "live at call to newobject: &x" "live at call to newproc: &x$" } - go call27(func() {x++}) // ERROR "live at call to new: &x" + go call27(func() {x++}) // ERROR "live at call to newobject: &x" println() } @@ -461,7 +461,7 @@ func f31(b1, b2, b3 bool) { g31("a") // ERROR "live at call to convT2E: autotmp_[0-9]+$" "live at call to g31: autotmp_[0-9]+$" } if b2 { - h31("b") // ERROR "live at call to new: autotmp_[0-9]+$" "live at call to convT2E: autotmp_[0-9]+ autotmp_[0-9]+$" "live at call to h31: autotmp_[0-9]+$" + h31("b") // ERROR "live at call to newobject: autotmp_[0-9]+$" "live at call to convT2E: autotmp_[0-9]+ autotmp_[0-9]+$" "live at call to h31: autotmp_[0-9]+$" } if b3 { panic("asdf") // ERROR "live at call to convT2E: autotmp_[0-9]+$" "live at call to panic: autotmp_[0-9]+$" @@ -583,13 +583,13 @@ func f39a() (x []int) { } func f39b() (x [10]*int) { - x = [10]*int{new(int)} // ERROR "live at call to new: x" + x = [10]*int{new(int)} // ERROR "live at call to newobject: x" println() // ERROR "live at call to printnl: x" return x } func f39c() (x [10]*int) { - x = [10]*int{new(int)} // ERROR "live at call to new: x" + x = [10]*int{new(int)} // ERROR "live at call to newobject: x" println() // ERROR "live at call to printnl: x" return }