From 097362fd2e01735b25b79c71ba6005cd38f81da0 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 30 Oct 2014 10:45:41 -0400 Subject: [PATCH 01/26] [dev.power64] runtime: match argument/return type signedness in power64x assembly Previously, the power64x runtime assembly was sloppy about using sign-extending versus zero-extending moves of arguments and return values. I think all of the cases that actually mattered have been fixed in recent CLs; this CL fixes up the few remaining mismatches. LGTM=rsc R=rsc, dave CC=golang-codereviews https://golang.org/cl/162480043 --- src/runtime/asm_power64x.s | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/src/runtime/asm_power64x.s b/src/runtime/asm_power64x.s index f77658032e..b6eac96110 100644 --- a/src/runtime/asm_power64x.s +++ b/src/runtime/asm_power64x.s @@ -299,7 +299,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-8-0 // Note: can't just "BR NAME(SB)" - bad inlining results. TEXT ·reflectcall(SB), NOSPLIT, $-8-24 - MOVW argsize+16(FP), R3 + MOVWZ argsize+16(FP), R3 DISPATCH(runtime·call16, 16) DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) @@ -336,7 +336,7 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ NO_LOCAL_POINTERS; \ /* copy arguments to stack */ \ MOVD argptr+8(FP), R3; \ - MOVW argsize+16(FP), R4; \ + MOVWZ argsize+16(FP), R4; \ MOVD R1, R5; \ ADD $(8-1), R5; \ SUB $1, R3; \ @@ -354,8 +354,8 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ BL (CTR); \ /* copy return values back */ \ MOVD argptr+8(FP), R3; \ - MOVW argsize+16(FP), R4; \ - MOVW retoffset+20(FP), R6; \ + MOVWZ argsize+16(FP), R4; \ + MOVWZ retoffset+20(FP), R6; \ MOVD R1, R5; \ ADD R6, R5; \ ADD R6, R3; \ @@ -398,7 +398,7 @@ CALLFN(·call268435456, 268435456) CALLFN(·call536870912, 536870912) CALLFN(·call1073741824, 1073741824) -// bool cas(int32 *val, int32 old, int32 new) +// bool cas(uint32 *val, uint32 old, uint32 new) // Atomically: // if(*val == old){ // *val = new; @@ -407,8 +407,8 @@ CALLFN(·call1073741824, 1073741824) // return 0; TEXT runtime·cas(SB), NOSPLIT, $0-17 MOVD p+0(FP), R3 - MOVW old+8(FP), R4 - MOVW new+12(FP), R5 + MOVWZ old+8(FP), R4 + MOVWZ new+12(FP), R5 cas_again: SYNC LWAR (R3), R6 From 36d417c0e380b8ea762812b415796cf4b0af72de Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 30 Oct 2014 11:17:26 -0400 Subject: [PATCH 02/26] [dev.power64] runtime: test CAS on large unsigned 32-bit numbers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a test to runtime·check to ensure CAS of large unsigned 32-bit numbers does not accidentally sign-extend its arguments. LGTM=rsc R=rsc CC=golang-codereviews https://golang.org/cl/162490044 --- src/runtime/runtime.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/runtime/runtime.c b/src/runtime/runtime.c index f19f8e4be3..a684142848 100644 --- a/src/runtime/runtime.c +++ b/src/runtime/runtime.c @@ -226,6 +226,12 @@ runtime·check(void) if(z != 4) runtime·throw("cas4"); + z = 0xffffffff; + if(!runtime·cas(&z, 0xffffffff, 0xfffffffe)) + runtime·throw("cas5"); + if(z != 0xfffffffe) + runtime·throw("cas6"); + k = (byte*)0xfedcb123; if(sizeof(void*) == 8) k = (byte*)((uintptr)k<<10); From 4cf28a11e3807f2f34785d6d4e6aac0821bac654 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 30 Oct 2014 12:08:21 -0400 Subject: [PATCH 03/26] [dev.power64] runtime: fix out-of-date comment in panic LGTM=bradfitz R=rsc, bradfitz CC=golang-codereviews https://golang.org/cl/162500043 --- src/runtime/panic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/panic.c b/src/runtime/panic.c index 46683b2b0c..b19fdd0e18 100644 --- a/src/runtime/panic.c +++ b/src/runtime/panic.c @@ -69,7 +69,7 @@ runtime·recovery_m(G *gp) // each call to deferproc. // (The pc we're returning to does pop pop // before it tests the return value.) - // On the arm there are 2 saved LRs mixed in too. + // On the arm and power there are 2 saved LRs mixed in too. if(thechar == '5' || thechar == '9') gp->sched.sp = (uintptr)argp - 4*sizeof(uintptr); else From 8a09639ae8b02317d990ef8e8c5929baf96659cd Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 30 Oct 2014 15:58:30 -0400 Subject: [PATCH 04/26] [dev.power64] runtime: make asm_power64x.s go vet-clean No real problems found. Just lots of argument names that didn't quite match up. LGTM=rsc R=rsc, dave CC=golang-codereviews https://golang.org/cl/169790043 --- src/runtime/asm_power64x.s | 41 +++++++++++++++++++------------------- 1 file changed, 21 insertions(+), 20 deletions(-) diff --git a/src/runtime/asm_power64x.s b/src/runtime/asm_power64x.s index b6eac96110..e1f8a84aff 100644 --- a/src/runtime/asm_power64x.s +++ b/src/runtime/asm_power64x.s @@ -86,7 +86,7 @@ TEXT runtime·reginit(SB),NOSPLIT,$-8-0 // void gosave(Gobuf*) // save state in Gobuf; setjmp TEXT runtime·gosave(SB), NOSPLIT, $-8-8 - MOVD gobuf+0(FP), R3 + MOVD buf+0(FP), R3 MOVD R1, gobuf_sp(R3) MOVD LR, R31 MOVD R31, gobuf_pc(R3) @@ -99,7 +99,7 @@ TEXT runtime·gosave(SB), NOSPLIT, $-8-8 // void gogo(Gobuf*) // restore state from Gobuf; longjmp TEXT runtime·gogo(SB), NOSPLIT, $-8-8 - MOVD gobuf+0(FP), R5 + MOVD buf+0(FP), R5 MOVD gobuf_g(R5), g // make sure g is not nil MOVD 0(g), R4 MOVD gobuf_sp(R5), R1 @@ -299,7 +299,7 @@ TEXT runtime·morestack_noctxt(SB),NOSPLIT,$-8-0 // Note: can't just "BR NAME(SB)" - bad inlining results. TEXT ·reflectcall(SB), NOSPLIT, $-8-24 - MOVWZ argsize+16(FP), R3 + MOVWZ n+16(FP), R3 DISPATCH(runtime·call16, 16) DISPATCH(runtime·call32, 32) DISPATCH(runtime·call64, 64) @@ -335,8 +335,8 @@ TEXT ·reflectcall(SB), NOSPLIT, $-8-24 TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ NO_LOCAL_POINTERS; \ /* copy arguments to stack */ \ - MOVD argptr+8(FP), R3; \ - MOVWZ argsize+16(FP), R4; \ + MOVD arg+8(FP), R3; \ + MOVWZ n+16(FP), R4; \ MOVD R1, R5; \ ADD $(8-1), R5; \ SUB $1, R3; \ @@ -353,8 +353,8 @@ TEXT NAME(SB), WRAPPER, $MAXSIZE-24; \ PCDATA $PCDATA_StackMapIndex, $0; \ BL (CTR); \ /* copy return values back */ \ - MOVD argptr+8(FP), R3; \ - MOVWZ argsize+16(FP), R4; \ + MOVD arg+8(FP), R3; \ + MOVWZ n+16(FP), R4; \ MOVWZ retoffset+20(FP), R6; \ MOVD R1, R5; \ ADD R6, R5; \ @@ -398,7 +398,7 @@ CALLFN(·call268435456, 268435456) CALLFN(·call536870912, 536870912) CALLFN(·call1073741824, 1073741824) -// bool cas(uint32 *val, uint32 old, uint32 new) +// bool cas(uint32 *ptr, uint32 old, uint32 new) // Atomically: // if(*val == old){ // *val = new; @@ -406,7 +406,7 @@ CALLFN(·call1073741824, 1073741824) // } else // return 0; TEXT runtime·cas(SB), NOSPLIT, $0-17 - MOVD p+0(FP), R3 + MOVD ptr+0(FP), R3 MOVWZ old+8(FP), R4 MOVWZ new+12(FP), R5 cas_again: @@ -425,7 +425,7 @@ cas_fail: MOVD $0, R3 BR -5(PC) -// bool runtime·cas64(uint64 *val, uint64 old, uint64 new) +// bool runtime·cas64(uint64 *ptr, uint64 old, uint64 new) // Atomically: // if(*val == *old){ // *val = new; @@ -434,7 +434,7 @@ cas_fail: // return 0; // } TEXT runtime·cas64(SB), NOSPLIT, $0-25 - MOVD p+0(FP), R3 + MOVD ptr+0(FP), R3 MOVD old+8(FP), R4 MOVD new+16(FP), R5 cas64_again: @@ -475,12 +475,12 @@ TEXT runtime·atomicstoreuintptr(SB), NOSPLIT, $0-16 TEXT runtime·casp(SB), NOSPLIT, $0-25 BR runtime·cas64(SB) -// uint32 xadd(uint32 volatile *val, int32 delta) +// uint32 xadd(uint32 volatile *ptr, int32 delta) // Atomically: // *val += delta; // return *val; TEXT runtime·xadd(SB), NOSPLIT, $0-20 - MOVD p+0(FP), R4 + MOVD ptr+0(FP), R4 MOVW delta+8(FP), R5 SYNC LWAR (R4), R3 @@ -493,7 +493,7 @@ TEXT runtime·xadd(SB), NOSPLIT, $0-20 RETURN TEXT runtime·xadd64(SB), NOSPLIT, $0-24 - MOVD p+0(FP), R4 + MOVD ptr+0(FP), R4 MOVD delta+8(FP), R5 SYNC LDAR (R4), R3 @@ -506,7 +506,7 @@ TEXT runtime·xadd64(SB), NOSPLIT, $0-24 RETURN TEXT runtime·xchg(SB), NOSPLIT, $0-20 - MOVD p+0(FP), R4 + MOVD ptr+0(FP), R4 MOVW new+8(FP), R5 SYNC LWAR (R4), R3 @@ -518,7 +518,7 @@ TEXT runtime·xchg(SB), NOSPLIT, $0-20 RETURN TEXT runtime·xchg64(SB), NOSPLIT, $0-24 - MOVD p+0(FP), R4 + MOVD ptr+0(FP), R4 MOVD new+8(FP), R5 SYNC LDAR (R4), R3 @@ -651,7 +651,7 @@ TEXT runtime·setcallerpc(SB),NOSPLIT,$-8-16 RETURN TEXT runtime·getcallersp(SB),NOSPLIT,$0-16 - MOVD sp+0(FP), R3 + MOVD argp+0(FP), R3 SUB $8, R3 MOVD R3, ret+8(FP) RETURN @@ -695,22 +695,23 @@ TEXT runtime·aeshashstr(SB),NOSPLIT,$-8-0 TEXT runtime·memeq(SB),NOSPLIT,$-8-25 MOVD a+0(FP), R3 MOVD b+8(FP), R4 - MOVD count+16(FP), R5 + MOVD size+16(FP), R5 SUB $1, R3 SUB $1, R4 ADD R3, R5, R8 loop: CMP R3, R8 - BNE 4(PC) + BNE test MOVD $1, R3 MOVB R3, ret+24(FP) RETURN +test: MOVBZU 1(R3), R6 MOVBZU 1(R4), R7 CMP R6, R7 BEQ loop - MOVB R0, ret+24(FP) + MOVB $0, ret+24(FP) RETURN // eqstring tests whether two strings are equal. From c24156bafe24a82ca4c182f289b1bff121ea72e0 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 30 Oct 2014 16:44:42 -0400 Subject: [PATCH 05/26] [dev.power64] runtime: fix a syntax error that slipped in to asm_power64x.s Apparently I had already moved on to fixing another problem when I submitted CL 169790043. LGTM=dave R=rsc, dave CC=golang-codereviews https://golang.org/cl/165210043 --- src/runtime/asm_power64x.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/asm_power64x.s b/src/runtime/asm_power64x.s index e1f8a84aff..ab2db061c2 100644 --- a/src/runtime/asm_power64x.s +++ b/src/runtime/asm_power64x.s @@ -711,7 +711,7 @@ test: CMP R6, R7 BEQ loop - MOVB $0, ret+24(FP) + MOVB R0, ret+24(FP) RETURN // eqstring tests whether two strings are equal. From 6e86003651be7feb6da46360d6c411ff1c29b7f5 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 31 Oct 2014 11:08:27 -0400 Subject: [PATCH 06/26] [dev.power64] 9g: fix under-zeroing in clearfat All three cases of clearfat were wrong on power64x. The cases that handle 1032 bytes and up and 32 bytes and up both use MOVDU (one directly generated in a loop and the other via duffzero), which leaves the pointer register pointing at the *last written* address. The generated code was not accounting for this, so the byte fill loop was re-zeroing the last zeroed dword, rather than the bytes following the last zeroed dword. Fix this by simply adding an additional 8 byte offset to the byte zeroing loop. The case that handled under 32 bytes was also wrong. It didn't update the pointer register at all, so the byte zeroing loop was simply re-zeroing the beginning of region. Again, the fix is to add an offset to the byte zeroing loop to account for this. LGTM=dave, bradfitz R=rsc, dave, bradfitz CC=golang-codereviews https://golang.org/cl/168870043 --- src/cmd/9g/ggen.c | 20 +++++++---- src/runtime/asm_power64x.s | 2 +- test/clearfat.go | 68 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 82 insertions(+), 8 deletions(-) create mode 100644 test/clearfat.go diff --git a/src/cmd/9g/ggen.c b/src/cmd/9g/ggen.c index c41d8eb414..7d9cf5050d 100644 --- a/src/cmd/9g/ggen.c +++ b/src/cmd/9g/ggen.c @@ -900,7 +900,7 @@ ret: void clearfat(Node *nl) { - uint64 w, c, q, t; + uint64 w, c, q, t, boff; Node dst, end, r0, *f; Prog *p, *pl; @@ -944,6 +944,8 @@ clearfat(Node *nl) patch(gbranch(ABNE, T, 0), pl); regfree(&end); + // The loop leaves R3 on the last zeroed dword + boff = 8; } else if(q >= 4) { p = gins(ASUB, N, &dst); p->from.type = D_CONST; @@ -953,17 +955,21 @@ clearfat(Node *nl) afunclit(&p->to, f); // 4 and 128 = magic constants: see ../../runtime/asm_power64x.s p->to.offset = 4*(128-q); - } else - for(t = 0; t < q; t++) { - p = gins(AMOVD, &r0, &dst); - p->to.type = D_OREG; - p->to.offset = 8*t; + // duffzero leaves R3 on the last zeroed dword + boff = 8; + } else { + for(t = 0; t < q; t++) { + p = gins(AMOVD, &r0, &dst); + p->to.type = D_OREG; + p->to.offset = 8*t; + } + boff = 8*q; } for(t = 0; t < c; t++) { p = gins(AMOVB, &r0, &dst); p->to.type = D_OREG; - p->to.offset = t; + p->to.offset = t+boff; } reg[REGRT1]--; } diff --git a/src/runtime/asm_power64x.s b/src/runtime/asm_power64x.s index ab2db061c2..2ad3e56e94 100644 --- a/src/runtime/asm_power64x.s +++ b/src/runtime/asm_power64x.s @@ -829,7 +829,7 @@ notfound: // in ../../cmd/9g/ggen.c:/^clearfat. // R0: always zero // R3 (aka REGRT1): ptr to memory to be zeroed - 8 -// R3 is updated as a side effect. +// On return, R3 points to the last zeroed dword. TEXT runtime·duffzero(SB), NOSPLIT, $-8-0 MOVDU R0, 8(R3) MOVDU R0, 8(R3) diff --git a/test/clearfat.go b/test/clearfat.go new file mode 100644 index 0000000000..45d539306e --- /dev/null +++ b/test/clearfat.go @@ -0,0 +1,68 @@ +// runoutput + +// Copyright 2014 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +// Check that {5,6,8,9}g/ggen.c:clearfat is zeroing the entire object. + +package main + +import ( + "bytes" + "fmt" + "strconv" + "strings" +) + +const ntest = 1100 + +func main() { + var decls, calls bytes.Buffer + + for i := 1; i <= ntest; i++ { + s := strconv.Itoa(i) + decls.WriteString(strings.Replace(decl, "$", s, -1)) + calls.WriteString(strings.Replace("poison$()\n\tclearfat$()\n\t", "$", s, -1)) + } + + program = strings.Replace(program, "$DECLS", decls.String(), 1) + program = strings.Replace(program, "$CALLS", calls.String(), 1) + fmt.Print(program) +} + +var program = `package main + +var count int + +$DECLS + +func main() { + $CALLS + if count != 0 { + println("failed", count, "case(s)") + } +} +` + +const decl = ` +func poison$() { + // Grow and poison the stack space that will be used by clearfat$ + var t [2*$]byte + for i := range t { + t[i] = 0xff + } +} + +func clearfat$() { + var t [$]byte + + for _, x := range t { + if x != 0 { +// println("clearfat$: index", i, "expected 0, got", x) + count++ + break + } + } +} +` From 40a5b3ecb1578a68b0423b8ef4eaebd5fb4c7869 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 31 Oct 2014 13:39:36 -0400 Subject: [PATCH 07/26] [dev.power64] runtime: fix fastrand1 on power64x fastrand1 depends on testing the high bit of its uint32 state. For efficiency, all of the architectures implement this as a sign bit test. However, on power64, fastrand1 was using a 64-bit sign test on the zero-extended 32-bit state. This always failed, causing fastrand1 to have very short periods and often decay to 0 and get stuck. Fix this by using a 32-bit signed compare instead of a 64-bit compare. This fixes various tests for the randomization of select of map iteration. LGTM=rsc R=rsc, dave CC=golang-codereviews https://golang.org/cl/166990043 --- src/runtime/asm_power64x.s | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/runtime/asm_power64x.s b/src/runtime/asm_power64x.s index 2ad3e56e94..713cc5f549 100644 --- a/src/runtime/asm_power64x.s +++ b/src/runtime/asm_power64x.s @@ -965,7 +965,7 @@ TEXT runtime·fastrand1(SB), NOSPLIT, $0-4 MOVD g_m(g), R4 MOVWZ m_fastrand(R4), R3 ADD R3, R3 - CMP R3, $0 + CMPW R3, $0 BGE 2(PC) XOR $0x88888eef, R3 MOVW R3, m_fastrand(R4) From 700ab16daf01e77f77faca242e148ad2100b6627 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 31 Oct 2014 15:29:03 -0400 Subject: [PATCH 08/26] [dev.power64] reflect: fix asm on power64x reflect/asm_power64x.s was missing changes made to other platforms for stack maps. This CL ports those changes. With this fix, the reflect test passes on power64x. LGTM=rsc R=rsc, dave CC=golang-codereviews https://golang.org/cl/170870043 --- src/reflect/asm_power64x.s | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/src/reflect/asm_power64x.s b/src/reflect/asm_power64x.s index e430cdf04c..4720638242 100644 --- a/src/reflect/asm_power64x.s +++ b/src/reflect/asm_power64x.s @@ -5,12 +5,14 @@ // +build power64 power64le #include "textflag.h" +#include "funcdata.h" // makeFuncStub is the code half of the function returned by MakeFunc. // See the comment on the declaration of makeFuncStub in makefunc.go // for more details. -// No argsize here, gc generates argsize info at call site. +// No arg size here, runtime pulls arg map out of the func value. TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$16 + NO_LOCAL_POINTERS MOVD R11, 8(R1) MOVD $argframe+0(FP), R3 MOVD R3, 16(R1) @@ -20,8 +22,9 @@ TEXT ·makeFuncStub(SB),(NOSPLIT|WRAPPER),$16 // methodValueCall is the code half of the function returned by makeMethodValue. // See the comment on the declaration of methodValueCall in makefunc.go // for more details. -// No argsize here, gc generates argsize info at call site. +// No arg size here; runtime pulls arg map out of the func value. TEXT ·methodValueCall(SB),(NOSPLIT|WRAPPER),$16 + NO_LOCAL_POINTERS MOVD R11, 8(R1) MOVD $argframe+0(FP), R3 MOVD R3, 16(R1) From e1db508ffdcfbb78a73c6df7e3d0a6b0cb6f001a Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 31 Oct 2014 16:58:12 -0400 Subject: [PATCH 09/26] [dev.power64] runtime: fix gcinfo_test on power64x The GC info masks for slices and strings were changed in commit caab29a25f68, but the reference masks used by gcinfo_test for power64x hadn't caught up. Now they're identical to amd64, so this CL fixes this test by combining the reference masks for these platforms. LGTM=rsc R=rsc, dave CC=golang-codereviews https://golang.org/cl/162620044 --- src/runtime/gcinfo_test.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/runtime/gcinfo_test.go b/src/runtime/gcinfo_test.go index 7d432983b1..2c6d4d662f 100644 --- a/src/runtime/gcinfo_test.go +++ b/src/runtime/gcinfo_test.go @@ -137,7 +137,7 @@ func infoBigStruct() []byte { BitsScalar, BitsScalar, BitsScalar, BitsScalar, // t int; y uint16; u uint64 BitsPointer, BitsDead, // i string } - case "amd64": + case "amd64", "power64", "power64le": return []byte{ BitsPointer, // q *int BitsScalar, BitsScalar, BitsScalar, // w byte; e [17]byte @@ -153,12 +153,6 @@ func infoBigStruct() []byte { BitsScalar, BitsScalar, BitsDead, BitsScalar, BitsScalar, // t int; y uint16; u uint64 BitsPointer, BitsDead, // i string } - case "power64", "power64le": - return []byte{ - BitsPointer, BitsScalar, BitsScalar, BitsScalar, - BitsMultiWord, BitsSlice, BitsScalar, BitsScalar, - BitsScalar, BitsScalar, BitsMultiWord, BitsString, - } default: panic("unknown arch") } From 84f7ac98f7f187ce851b87d020bdc8efe6a15f1f Mon Sep 17 00:00:00 2001 From: Dave Cheney Date: Sun, 2 Nov 2014 11:23:41 +1100 Subject: [PATCH 10/26] [dev.power64] cmd/objdump: disable tests on power64/power64le LGTM=rsc, austin R=austin, rsc, bradfitz CC=golang-codereviews https://golang.org/cl/164300043 --- src/cmd/objdump/objdump_test.go | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/cmd/objdump/objdump_test.go b/src/cmd/objdump/objdump_test.go index 0a2d2565a7..41f51ebf20 100644 --- a/src/cmd/objdump/objdump_test.go +++ b/src/cmd/objdump/objdump_test.go @@ -49,6 +49,10 @@ func runObjDump(t *testing.T, exe, startaddr, endaddr string) (path, lineno stri case "android", "nacl": t.Skipf("skipping on %s", runtime.GOOS) } + switch runtime.GOARCH { + case "power64", "power64le": + t.Skipf("skipping on %s, issue 9039", runtime.GOARCH) + } cmd := exec.Command(exe, os.Args[0], startaddr, endaddr) out, err := cmd.CombinedOutput() @@ -199,6 +203,10 @@ func testDisasm(t *testing.T, flags ...string) { } func TestDisasm(t *testing.T) { + switch runtime.GOARCH { + case "power64", "power64le": + t.Skipf("skipping on %s, issue 9039", runtime.GOARCH) + } testDisasm(t) } @@ -207,5 +215,9 @@ func TestDisasmExtld(t *testing.T) { case "plan9", "windows": t.Skipf("skipping on %s", runtime.GOOS) } + switch runtime.GOARCH { + case "power64", "power64le": + t.Skipf("skipping on %s, no support for external linking, issue 9038", runtime.GOARCH) + } testDisasm(t, "-ldflags=-linkmode=external") } From 810019286fdb8ad6b60096f2b0e2dcf0bc900c3a Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Mon, 3 Nov 2014 15:48:51 -0500 Subject: [PATCH 11/26] [dev.power64] 9g: fix nilopt Previously, nilopt was disabled on power64x because it threw away "seemly random segments of code." Indeed, excise on power64x failed to preserve the link field, so it excised not only the requested instruction but all following instructions in the function. Fix excise to retain the link field while otherwise zeroing the instruction. This makes nilopt safe on power64x. It still fails nilptr3.go's tests for removal of repeated nil checks because those depend on also optimizing away repeated loads, which doesn't currently happen on power64x. LGTM=dave, rsc R=rsc, dave CC=golang-codereviews https://golang.org/cl/168120043 --- src/cmd/9g/peep.c | 4 +++- src/cmd/gc/popt.c | 4 ---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/src/cmd/9g/peep.c b/src/cmd/9g/peep.c index 5721d7b04f..ec314d6338 100644 --- a/src/cmd/9g/peep.c +++ b/src/cmd/9g/peep.c @@ -44,13 +44,15 @@ peep(Prog *p) void excise(Flow *r) { - Prog *p; + Prog *p, *l; p = r->prog; if(debug['P'] && debug['v']) print("%P ===delete===\n", p); + l = p->link; *p = zprog; p->as = ANOP; + p->link = l; ostats.ndelmov++; } diff --git a/src/cmd/gc/popt.c b/src/cmd/gc/popt.c index 6e6db88ef8..993bb24821 100644 --- a/src/cmd/gc/popt.c +++ b/src/cmd/gc/popt.c @@ -847,10 +847,6 @@ nilopt(Prog *firstp) Graph *g; int ncheck, nkill; - // TODO(minux): nilopt on power64 throw away seemly random segment of code. - if(thechar == '9') - return; - g = flowstart(firstp, sizeof(NilFlow)); if(g == nil) return; From 473bfae5ae3c4adccebc14bca40d0fb0a2ff09ab Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Mon, 3 Nov 2014 17:24:13 -0500 Subject: [PATCH 12/26] [dev.power64] liblink: fix printing of branch targets Print PC stored in target Prog* of branch instructions when available instead of the offset stored in the branch instruction. The offset tends to be wrong after code transformations, so previously this led to confusing listings. LGTM=rsc R=rsc CC=golang-codereviews https://golang.org/cl/168980043 --- src/liblink/list9.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/src/liblink/list9.c b/src/liblink/list9.c index 041c6884f1..c9190d8940 100644 --- a/src/liblink/list9.c +++ b/src/liblink/list9.c @@ -259,11 +259,12 @@ Dconv(Fmt *fp) sprint(str, "%s+%.5lux(BRANCH)", a->sym->name, v); else sprint(str, "%.5lux(BRANCH)", v); - } else - if(a->sym != nil) - sprint(str, "%s+%lld(APC)", a->sym->name, a->offset); - else - sprint(str, "%lld(APC)", a->offset); + } else if(a->u.branch != nil) + sprint(str, "%lld", a->u.branch->pc); + else if(a->sym != nil) + sprint(str, "%s+%lld(APC)", a->sym->name, a->offset); + else + sprint(str, "%lld(APC)", a->offset); break; case D_FCONST: From d10a115ef9511b9be8b1f7b0fb5a292a9917aab9 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Mon, 3 Nov 2014 17:25:03 -0500 Subject: [PATCH 13/26] [dev.power64] test: disable nilptr3 test on power64x The remaining failures in this test are because of incomplete optimization support on power64x. Tracked in issue 9058. LGTM=rsc R=rsc CC=golang-codereviews https://golang.org/cl/168130043 --- test/nilptr3.go | 2 ++ 1 file changed, 2 insertions(+) diff --git a/test/nilptr3.go b/test/nilptr3.go index 2757daef0b..9d65e1e91b 100644 --- a/test/nilptr3.go +++ b/test/nilptr3.go @@ -1,4 +1,6 @@ // errorcheck -0 -d=nil +// Fails on power64x because of incomplete optimization. See issue 9058. +// +build !power64,!power64le // Copyright 2013 The Go Authors. All rights reserved. // Use of this source code is governed by a BSD-style From a5e1e1599c427013cbbaec5716da4fefbd67a4d1 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Mon, 3 Nov 2014 17:25:36 -0500 Subject: [PATCH 14/26] [dev.power64] test: "fix" live.go test on power64x On power64x, this one line in live.go reports that t is live because of missing optimization passes. This isn't what this test is trying to test, so shuffle bad40 so that it still accomplishes the intent of the test without also depending on optimization. LGTM=rsc R=rsc, dave CC=golang-codereviews https://golang.org/cl/167110043 --- test/live.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/test/live.go b/test/live.go index f15bb74ba1..f69d0a4c1e 100644 --- a/test/live.go +++ b/test/live.go @@ -614,8 +614,8 @@ func newT40() *T40 { func bad40() { t := newT40() - println() _ = t + println() } func good40() { From fa32e922d54c2a2d4b9bb97009ac6e3c84af3dd6 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Tue, 4 Nov 2014 16:34:56 -0500 Subject: [PATCH 15/26] [dev.power64] gc: convert Bits to a uint64 array So far all of our architectures have had at most 32 registers, so we've been able to use entry 0 in the Bits uint32 array directly as a register mask. Power64 has 64 registers, so this converts Bits to a uint64 array so we can continue to use entry 0 directly as a register mask on Power64. LGTM=rsc R=rsc CC=golang-codereviews https://golang.org/cl/169060043 --- src/cmd/5g/opt.h | 14 +++++++------- src/cmd/5g/reg.c | 48 +++++++++++++++++++++++------------------------ src/cmd/6g/opt.h | 14 +++++++------- src/cmd/6g/reg.c | 46 ++++++++++++++++++++++----------------------- src/cmd/8g/opt.h | 14 +++++++------- src/cmd/8g/reg.c | 46 ++++++++++++++++++++++----------------------- src/cmd/gc/bits.c | 34 ++++++++++++++++++++------------- src/cmd/gc/go.h | 12 +++++++----- 8 files changed, 119 insertions(+), 109 deletions(-) diff --git a/src/cmd/5g/opt.h b/src/cmd/5g/opt.h index 1946c1d33c..5016d1cc83 100644 --- a/src/cmd/5g/opt.h +++ b/src/cmd/5g/opt.h @@ -63,8 +63,8 @@ enum uint32 BLOAD(Reg*); uint32 BSTORE(Reg*); -uint32 LOAD(Reg*); -uint32 STORE(Reg*); +uint64 LOAD(Reg*); +uint64 STORE(Reg*); */ // A Reg is a wrapper around a single Prog (one instruction) that holds @@ -145,7 +145,7 @@ void synch(Reg*, Bits); uint32 allreg(uint32, Rgn*); void paint1(Reg*, int); uint32 paint2(Reg*, int); -void paint3(Reg*, int, int32, int); +void paint3(Reg*, int, uint32, int); void addreg(Adr*, int); void dumpit(char *str, Flow *r0, int); @@ -156,10 +156,10 @@ void peep(Prog*); void excise(Flow*); int copyu(Prog*, Adr*, Adr*); -int32 RtoB(int); -int32 FtoB(int); -int BtoR(int32); -int BtoF(int32); +uint32 RtoB(int); +uint32 FtoB(int); +int BtoR(uint32); +int BtoF(uint32); /* * prog.c diff --git a/src/cmd/5g/reg.c b/src/cmd/5g/reg.c index 8e49a2d9c8..5b25adaf45 100644 --- a/src/cmd/5g/reg.c +++ b/src/cmd/5g/reg.c @@ -35,7 +35,7 @@ #include "opt.h" #define NREGVAR 32 -#define REGBITS ((uint32)0xffffffff) +#define REGBITS ((uint64)0xffffffffull) /*c2go enum { NREGVAR = 32, REGBITS = 0xffffffff, @@ -86,7 +86,7 @@ setaddrs(Bits bit) i = bnum(bit); node = var[i].node; n = var[i].name; - bit.b[i/32] &= ~(1L<<(i%32)); + biclr(&bit, i); // disable all pieces of that variable for(i=0; i 1) print("\n"); paint1(r, i); - bit.b[i/32] &= ~(1L<<(i%32)); + biclr(&bit, i); if(change <= 0) { if(debug['R']) print("%L $%d: %Q\n", @@ -570,7 +570,7 @@ walkvardef(Node *n, Reg *r, int active) break; for(v=n->opt; v!=nil; v=v->nextinnode) { bn = v - var; - r1->act.b[bn/32] |= 1L << (bn%32); + biset(&r1->act, bn); } if(r1->f.prog->as == ABL) break; @@ -606,7 +606,7 @@ addsplits(void) ~(r->calahead.b[z] & addrs.b[z]); while(bany(&bit)) { i = bnum(bit); - bit.b[i/32] &= ~(1L << (i%32)); + biclr(&bit, i); } } } @@ -972,10 +972,10 @@ prop(Reg *r, Bits ref, Bits cal) for(z=0; z= nvar || ((cal.b[z]>>i)&1) == 0) + for(i=0; i<64; i++) { + if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0) continue; - v = var+z*32+i; + v = var+z*64+i; if(v->node->opt == nil) // v represents fixed register, not Go variable continue; @@ -991,10 +991,10 @@ prop(Reg *r, Bits ref, Bits cal) // This will set the bits at most twice, keeping the overall loop linear. v1 = v->node->opt; j = v1 - var; - if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { + if(v == v1 || !btest(&cal, j)) { for(; v1 != nil; v1 = v1->nextinnode) { j = v1 - var; - cal.b[j/32] |= 1<<(j&31); + biset(&cal, j); } } } @@ -1115,10 +1115,10 @@ paint1(Reg *r, int bn) Reg *r1; Prog *p; int z; - uint32 bb; + uint64 bb; - z = bn/32; - bb = 1L<<(bn%32); + z = bn/64; + bb = 1LL<<(bn%64); if(r->act.b[z] & bb) return; for(;;) { @@ -1193,10 +1193,10 @@ paint2(Reg *r, int bn) { Reg *r1; int z; - uint32 bb, vreg; + uint64 bb, vreg; - z = bn/32; - bb = 1L << (bn%32); + z = bn/64; + bb = 1LL << (bn%64); vreg = regbits; if(!(r->act.b[z] & bb)) return vreg; @@ -1240,15 +1240,15 @@ paint2(Reg *r, int bn) } void -paint3(Reg *r, int bn, int32 rb, int rn) +paint3(Reg *r, int bn, uint32 rb, int rn) { Reg *r1; Prog *p; int z; - uint32 bb; + uint64 bb; - z = bn/32; - bb = 1L << (bn%32); + z = bn/64; + bb = 1LL << (bn%64); if(r->act.b[z] & bb) return; for(;;) { @@ -1333,7 +1333,7 @@ addreg(Adr *a, int rn) * 10 R10 * 12 R12 */ -int32 +uint32 RtoB(int r) { if(r >= REGTMP-2 && r != 12) // excluded R9 and R10 for m and g, but not R12 @@ -1342,7 +1342,7 @@ RtoB(int r) } int -BtoR(int32 b) +BtoR(uint32 b) { b &= 0x11fcL; // excluded R9 and R10 for m and g, but not R12 if(b == 0) @@ -1357,7 +1357,7 @@ BtoR(int32 b) * ... ... * 31 F15 */ -int32 +uint32 FtoB(int f) { @@ -1367,7 +1367,7 @@ FtoB(int f) } int -BtoF(int32 b) +BtoF(uint32 b) { b &= 0xfffc0000L; diff --git a/src/cmd/6g/opt.h b/src/cmd/6g/opt.h index dbd039d89f..4c9bb89fc8 100644 --- a/src/cmd/6g/opt.h +++ b/src/cmd/6g/opt.h @@ -63,8 +63,8 @@ enum uint32 BLOAD(Reg*); uint32 BSTORE(Reg*); -uint32 LOAD(Reg*); -uint32 STORE(Reg*); +uint64 LOAD(Reg*); +uint64 STORE(Reg*); */ // A Reg is a wrapper around a single Prog (one instruction) that holds @@ -141,7 +141,7 @@ void synch(Reg*, Bits); uint32 allreg(uint32, Rgn*); void paint1(Reg*, int); uint32 paint2(Reg*, int); -void paint3(Reg*, int, int32, int); +void paint3(Reg*, int, uint32, int); void addreg(Adr*, int); void dumpone(Flow*, int); void dumpit(char*, Flow*, int); @@ -153,10 +153,10 @@ void peep(Prog*); void excise(Flow*); int copyu(Prog*, Adr*, Adr*); -int32 RtoB(int); -int32 FtoB(int); -int BtoR(int32); -int BtoF(int32); +uint32 RtoB(int); +uint32 FtoB(int); +int BtoR(uint32); +int BtoF(uint32); /* * prog.c diff --git a/src/cmd/6g/reg.c b/src/cmd/6g/reg.c index 1f757e1972..8d600d6433 100644 --- a/src/cmd/6g/reg.c +++ b/src/cmd/6g/reg.c @@ -34,7 +34,7 @@ #include "opt.h" #define NREGVAR 32 /* 16 general + 16 floating */ -#define REGBITS ((uint32)0xffffffff) +#define REGBITS ((uint64)0xffffffffull) /*c2go enum { NREGVAR = 32, REGBITS = 0xffffffff, @@ -71,7 +71,7 @@ setaddrs(Bits bit) i = bnum(bit); node = var[i].node; n = var[i].name; - bit.b[i/32] &= ~(1L<<(i%32)); + biclr(&bit, i); // disable all pieces of that variable for(i=0; ivarno = i; change = 0; paint1(r, i); - bit.b[i/32] &= ~(1L<<(i%32)); + biclr(&bit, i); if(change <= 0) continue; rgp->cost = change; @@ -477,7 +477,7 @@ walkvardef(Node *n, Reg *r, int active) break; for(v=n->opt; v!=nil; v=v->nextinnode) { bn = v - var; - r1->act.b[bn/32] |= 1L << (bn%32); + biset(&r1->act, bn); } if(r1->f.prog->as == ACALL) break; @@ -822,10 +822,10 @@ prop(Reg *r, Bits ref, Bits cal) for(z=0; z= nvar || ((cal.b[z]>>i)&1) == 0) + for(i=0; i<64; i++) { + if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0) continue; - v = var+z*32+i; + v = var+z*64+i; if(v->node->opt == nil) // v represents fixed register, not Go variable continue; @@ -841,10 +841,10 @@ prop(Reg *r, Bits ref, Bits cal) // This will set the bits at most twice, keeping the overall loop linear. v1 = v->node->opt; j = v1 - var; - if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { + if(v == v1 || !btest(&cal, j)) { for(; v1 != nil; v1 = v1->nextinnode) { j = v1 - var; - cal.b[j/32] |= 1UL<<(j&31); + biset(&cal, j); } } } @@ -959,10 +959,10 @@ paint1(Reg *r, int bn) { Reg *r1; int z; - uint32 bb; + uint64 bb; - z = bn/32; - bb = 1L<<(bn%32); + z = bn/64; + bb = 1LL<<(bn%64); if(r->act.b[z] & bb) return; for(;;) { @@ -1061,10 +1061,10 @@ paint2(Reg *r, int bn) { Reg *r1; int z; - uint32 bb, vreg, x; + uint64 bb, vreg, x; - z = bn/32; - bb = 1L << (bn%32); + z = bn/64; + bb = 1LL << (bn%64); vreg = regbits; if(!(r->act.b[z] & bb)) return vreg; @@ -1117,15 +1117,15 @@ paint2(Reg *r, int bn) } void -paint3(Reg *r, int bn, int32 rb, int rn) +paint3(Reg *r, int bn, uint32 rb, int rn) { Reg *r1; Prog *p; int z; - uint32 bb; + uint64 bb; - z = bn/32; - bb = 1L << (bn%32); + z = bn/64; + bb = 1LL << (bn%64); if(r->act.b[z] & bb) return; for(;;) { @@ -1198,7 +1198,7 @@ addreg(Adr *a, int rn) ostats.ncvtreg++; } -int32 +uint32 RtoB(int r) { @@ -1208,7 +1208,7 @@ RtoB(int r) } int -BtoR(int32 b) +BtoR(uint32 b) { b &= 0xffffL; if(nacl) @@ -1224,7 +1224,7 @@ BtoR(int32 b) * ... * 31 X15 */ -int32 +uint32 FtoB(int f) { if(f < D_X0 || f > D_X15) @@ -1233,7 +1233,7 @@ FtoB(int f) } int -BtoF(int32 b) +BtoF(uint32 b) { b &= 0xFFFF0000L; diff --git a/src/cmd/8g/opt.h b/src/cmd/8g/opt.h index 09f58c40ae..0e2d165b17 100644 --- a/src/cmd/8g/opt.h +++ b/src/cmd/8g/opt.h @@ -63,8 +63,8 @@ enum uint32 BLOAD(Reg*); uint32 BSTORE(Reg*); -uint32 LOAD(Reg*); -uint32 STORE(Reg*); +uint64 LOAD(Reg*); +uint64 STORE(Reg*); */ // A Reg is a wrapper around a single Prog (one instruction) that holds @@ -159,7 +159,7 @@ void synch(Reg*, Bits); uint32 allreg(uint32, Rgn*); void paint1(Reg*, int); uint32 paint2(Reg*, int); -void paint3(Reg*, int, int32, int); +void paint3(Reg*, int, uint32, int); void addreg(Adr*, int); void dumpone(Flow*, int); void dumpit(char*, Flow*, int); @@ -171,10 +171,10 @@ void peep(Prog*); void excise(Flow*); int copyu(Prog*, Adr*, Adr*); -int32 RtoB(int); -int32 FtoB(int); -int BtoR(int32); -int BtoF(int32); +uint32 RtoB(int); +uint32 FtoB(int); +int BtoR(uint32); +int BtoF(uint32); /* * prog.c diff --git a/src/cmd/8g/reg.c b/src/cmd/8g/reg.c index 302b273a1b..0fbe684821 100644 --- a/src/cmd/8g/reg.c +++ b/src/cmd/8g/reg.c @@ -34,7 +34,7 @@ #include "opt.h" #define NREGVAR 16 /* 8 integer + 8 floating */ -#define REGBITS ((uint32)0xffff) +#define REGBITS ((uint64)0xffffull) /*c2go enum { NREGVAR = 16, REGBITS = (1<varno = i; change = 0; paint1(r, i); - bit.b[i/32] &= ~(1L<<(i%32)); + biclr(&bit, i); if(change <= 0) continue; rgp->cost = change; @@ -446,7 +446,7 @@ walkvardef(Node *n, Reg *r, int active) break; for(v=n->opt; v!=nil; v=v->nextinnode) { bn = v - var; - r1->act.b[bn/32] |= 1L << (bn%32); + biset(&r1->act, bn); } if(r1->f.prog->as == ACALL) break; @@ -788,10 +788,10 @@ prop(Reg *r, Bits ref, Bits cal) for(z=0; z= nvar || ((cal.b[z]>>i)&1) == 0) + for(i=0; i<64; i++) { + if(z*64+i >= nvar || ((cal.b[z]>>i)&1) == 0) continue; - v = var+z*32+i; + v = var+z*64+i; if(v->node->opt == nil) // v represents fixed register, not Go variable continue; @@ -807,10 +807,10 @@ prop(Reg *r, Bits ref, Bits cal) // This will set the bits at most twice, keeping the overall loop linear. v1 = v->node->opt; j = v1 - var; - if(v == v1 || ((cal.b[j/32]>>(j&31))&1) == 0) { + if(v == v1 || !btest(&cal, j)) { for(; v1 != nil; v1 = v1->nextinnode) { j = v1 - var; - cal.b[j/32] |= 1<<(j&31); + biset(&cal, j); } } } @@ -926,10 +926,10 @@ paint1(Reg *r, int bn) Reg *r1; Prog *p; int z; - uint32 bb; + uint64 bb; - z = bn/32; - bb = 1L<<(bn%32); + z = bn/64; + bb = 1LL<<(bn%64); if(r->act.b[z] & bb) return; for(;;) { @@ -1038,10 +1038,10 @@ paint2(Reg *r, int bn) { Reg *r1; int z; - uint32 bb, vreg, x; + uint64 bb, vreg, x; - z = bn/32; - bb = 1L << (bn%32); + z = bn/64; + bb = 1LL << (bn%64); vreg = regbits; if(!(r->act.b[z] & bb)) return vreg; @@ -1094,15 +1094,15 @@ paint2(Reg *r, int bn) } void -paint3(Reg *r, int bn, int32 rb, int rn) +paint3(Reg *r, int bn, uint32 rb, int rn) { Reg *r1; Prog *p; int z; - uint32 bb; + uint64 bb; - z = bn/32; - bb = 1L << (bn%32); + z = bn/64; + bb = 1LL << (bn%64); if(r->act.b[z] & bb) return; for(;;) { @@ -1175,7 +1175,7 @@ addreg(Adr *a, int rn) ostats.ncvtreg++; } -int32 +uint32 RtoB(int r) { @@ -1185,7 +1185,7 @@ RtoB(int r) } int -BtoR(int32 b) +BtoR(uint32 b) { b &= 0xffL; @@ -1194,7 +1194,7 @@ BtoR(int32 b) return bitno(b) + D_AX; } -int32 +uint32 FtoB(int f) { if(f < D_X0 || f > D_X7) @@ -1203,7 +1203,7 @@ FtoB(int f) } int -BtoF(int32 b) +BtoF(uint32 b) { b &= 0xFF00L; if(b == 0) diff --git a/src/cmd/gc/bits.c b/src/cmd/gc/bits.c index 2e79f6f1de..fe9a168dcd 100644 --- a/src/cmd/gc/bits.c +++ b/src/cmd/gc/bits.c @@ -95,11 +95,11 @@ int bnum(Bits a) { int i; - int32 b; + uint64 b; for(i=0; ib[n/64] & (1LL << (n%64))) != 0; +} + +void +biset(Bits *a, uint n) +{ + a->b[n/64] |= 1LL << (n%64); +} + +void +biclr(Bits *a, uint n) +{ + a->b[n/64] &= ~(1LL << (n%64)); } -*/ int -bitno(int32 b) +bitno(uint64 b) { int i; - for(i=0; i<32; i++) - if(b & (1L< Date: Wed, 5 Nov 2014 15:36:47 -0500 Subject: [PATCH 16/26] [dev.power64] 6g: don't create variables for indirect addresses Previously, mkvar treated, for example, 0(AX) the same as AX. As a result, a move to an indirect address would be marked as *setting* the register, rather than just using it, resulting in unnecessary register moves. Fix this by not producing variables for indirect addresses. LGTM=rsc R=rsc, dave CC=golang-codereviews https://golang.org/cl/164610043 --- src/cmd/6g/reg.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/cmd/6g/reg.c b/src/cmd/6g/reg.c index 8d600d6433..afd3f1056e 100644 --- a/src/cmd/6g/reg.c +++ b/src/cmd/6g/reg.c @@ -621,6 +621,9 @@ mkvar(Reg *r, Adr *a) if(r != R) r->use1.b[0] |= doregbits(a->index); + if(t >= D_INDIR && t < 2*D_INDIR) + goto none; + switch(t) { default: regu = doregbits(t); From f45fd5753c4f1fd6ab472e219598523516855b40 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 6 Nov 2014 14:37:39 -0500 Subject: [PATCH 17/26] [dev.power64] gc: fix etype of strings The etype of references to strings was being incorrectly set to TINT32 on all platforms. Change it to TSTRING. It seems this doesn't matter for compilation, since x86 uses LEA instructions to load string addresses and arm and power64 disassemble the string into its constituent pieces (with the correct types), but it helps when debugging. LGTM=rsc R=rsc CC=golang-codereviews https://golang.org/cl/170100043 --- src/cmd/5g/gobj.c | 2 +- src/cmd/6g/gobj.c | 2 +- src/cmd/8g/gobj.c | 2 +- src/cmd/9g/gobj.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/src/cmd/5g/gobj.c b/src/cmd/5g/gobj.c index 5e988878f7..65f731685e 100644 --- a/src/cmd/5g/gobj.c +++ b/src/cmd/5g/gobj.c @@ -86,7 +86,7 @@ datagostring(Strlit *sval, Addr *a) sym = stringsym(sval->s, sval->len); a->type = D_OREG; a->name = D_EXTERN; - a->etype = TINT32; + a->etype = TSTRING; a->offset = 0; // header a->reg = NREG; a->sym = linksym(sym); diff --git a/src/cmd/6g/gobj.c b/src/cmd/6g/gobj.c index 04e837b138..dbb4ff62c4 100644 --- a/src/cmd/6g/gobj.c +++ b/src/cmd/6g/gobj.c @@ -81,7 +81,7 @@ datagostring(Strlit *sval, Addr *a) a->sym = linksym(sym); a->node = sym->def; a->offset = 0; // header - a->etype = TINT32; + a->etype = TSTRING; } void diff --git a/src/cmd/8g/gobj.c b/src/cmd/8g/gobj.c index fa0605e6c7..af287f7023 100644 --- a/src/cmd/8g/gobj.c +++ b/src/cmd/8g/gobj.c @@ -81,7 +81,7 @@ datagostring(Strlit *sval, Addr *a) a->sym = linksym(sym); a->node = sym->def; a->offset = 0; // header - a->etype = TINT32; + a->etype = TSTRING; } void diff --git a/src/cmd/9g/gobj.c b/src/cmd/9g/gobj.c index fdd7606bcd..3da55878a7 100644 --- a/src/cmd/9g/gobj.c +++ b/src/cmd/9g/gobj.c @@ -89,7 +89,7 @@ datagostring(Strlit *sval, Addr *a) a->reg = NREG; a->node = sym->def; a->offset = 0; // header - a->etype = TINT32; + a->etype = TSTRING; } void From 22c929f538483a02707b2caef91a1d3b55a72ef5 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 6 Nov 2014 14:41:44 -0500 Subject: [PATCH 18/26] [dev.power64] 9g: fix addr width calculation; enable MOV* width check 9g's naddr was missing assignments to a->width in several cases, so the optimizer was getting bogus width information. Add them. This correct width information also lets us enable the width check in gins for MOV*. LGTM=rsc R=rsc CC=golang-codereviews https://golang.org/cl/167310043 --- src/cmd/9g/gsubr.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/cmd/9g/gsubr.c b/src/cmd/9g/gsubr.c index d8b62b1da2..f7a4290818 100644 --- a/src/cmd/9g/gsubr.c +++ b/src/cmd/9g/gsubr.c @@ -1001,10 +1001,13 @@ hard: Prog* gins(int as, Node *f, Node *t) { - //int32 w; + int32 w; Prog *p; Addr af, at; + // TODO(austin): Add self-move test like in 6g (but be careful + // of truncation moves) + memset(&af, 0, sizeof af); memset(&at, 0, sizeof at); if(f != N) @@ -1021,9 +1024,6 @@ gins(int as, Node *f, Node *t) if(debug['g']) print("%P\n", p); - // TODO(minux): enable these. - // right now it fails on MOVD $type."".TypeAssertionError(SB) [width=1], R7 [width=8] - /* w = 0; switch(as) { case AMOVB: @@ -1049,12 +1049,11 @@ gins(int as, Node *f, Node *t) w = 8; break; } - if(w != 0 && ((f != N && af.width < w) || (t != N && at.width > w))) { + if(w != 0 && ((f != N && af.width < w) || (t != N && at.type != D_REG && at.width > w))) { dump("f", f); dump("t", t); fatal("bad width: %P (%d, %d)\n", p, af.width, at.width); } - */ return p; } @@ -1116,12 +1115,9 @@ naddr(Node *n, Addr *a, int canemitcode) case ONAME: a->etype = 0; - a->width = 0; a->reg = NREG; - if(n->type != T) { + if(n->type != T) a->etype = simtype[n->type->etype]; - a->width = n->type->width; - } a->offset = n->xoffset; s = n->sym; a->node = n->orig; @@ -1242,15 +1238,16 @@ naddr(Node *n, Addr *a, int canemitcode) naddr(n->left, a, canemitcode); a->etype = simtype[tptr]; if(a->type == D_CONST && a->offset == 0) - break; // len(nil) + break; // itab(nil) + a->width = widthptr; break; case OSPTR: // pointer in a string or slice naddr(n->left, a, canemitcode); + a->etype = simtype[tptr]; if(a->type == D_CONST && a->offset == 0) break; // ptr(nil) - a->etype = simtype[tptr]; a->offset += Array_array; a->width = widthptr; break; @@ -1262,6 +1259,7 @@ naddr(Node *n, Addr *a, int canemitcode) if(a->type == D_CONST && a->offset == 0) break; // len(nil) a->offset += Array_nel; + a->width = widthint; break; case OCAP: @@ -1271,11 +1269,13 @@ naddr(Node *n, Addr *a, int canemitcode) if(a->type == D_CONST && a->offset == 0) break; // cap(nil) a->offset += Array_cap; + a->width = widthint; break; case OADDR: naddr(n->left, a, canemitcode); a->etype = tptr; + a->width = widthptr; switch(a->type) { case D_OREG: a->type = D_CONST; @@ -1288,6 +1288,7 @@ naddr(Node *n, Addr *a, int canemitcode) default: fatal("naddr: OADDR %d\n", a->type); } + break; } } From e156f0e9971ca77ed6f9cb34e36ed73145bfa177 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 6 Nov 2014 15:35:53 -0500 Subject: [PATCH 19/26] [dev.power64] 5g: fix etype and width of itable Addrs For OITAB nodes, 5g's naddr was setting the wrong etype and failing to set the width of the resulting Addr. LGTM=rsc R=rsc CC=golang-codereviews https://golang.org/cl/171220043 --- src/cmd/5g/gsubr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cmd/5g/gsubr.c b/src/cmd/5g/gsubr.c index 06e274e14d..f09197963c 100644 --- a/src/cmd/5g/gsubr.c +++ b/src/cmd/5g/gsubr.c @@ -1353,9 +1353,10 @@ naddr(Node *n, Addr *a, int canemitcode) case OITAB: // itable of interface value naddr(n->left, a, canemitcode); - a->etype = TINT32; + a->etype = simtype[tptr]; if(a->type == D_CONST && a->offset == 0) break; // len(nil) + a->width = widthptr; break; case OSPTR: From 7739533f61616ba71ef691489c0eeab2a59bd9e9 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Fri, 7 Nov 2014 10:43:55 -0500 Subject: [PATCH 20/26] [dev.power64] 5g: fix mistaken bit-wise AND in regopt Replace a bit-wise AND with a logical one. This happened to work before because bany returns 0 or 1, but the intent here is clearly logical (and this makes 5g match with 6g and 8g). LGTM=rsc R=rsc CC=golang-codereviews https://golang.org/cl/172850043 --- src/cmd/5g/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cmd/5g/reg.c b/src/cmd/5g/reg.c index 5b25adaf45..712841329e 100644 --- a/src/cmd/5g/reg.c +++ b/src/cmd/5g/reg.c @@ -393,7 +393,7 @@ loop2: for(z=0; zrefahead.b[z] | r->calahead.b[z]) & ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); - if(bany(&bit) & !r->f.refset) { + if(bany(&bit) && !r->f.refset) { // should never happen - all variables are preset if(debug['w']) print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); From c1e8c57c3d0083fafaf451db7b9b018e16d3669b Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 12 Nov 2014 14:16:49 -0500 Subject: [PATCH 21/26] [dev.power64] 9g: fix width check and width calculation for OADDR LGTM=rsc R=rsc CC=golang-codereviews https://golang.org/cl/174970043 --- src/cmd/9g/gsubr.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/cmd/9g/gsubr.c b/src/cmd/9g/gsubr.c index f7a4290818..e5cd5ed4bf 100644 --- a/src/cmd/9g/gsubr.c +++ b/src/cmd/9g/gsubr.c @@ -1046,6 +1046,8 @@ gins(int as, Node *f, Node *t) break; case AMOVD: case AMOVDU: + if(af.type == D_CONST) + break; w = 8; break; } @@ -1275,7 +1277,6 @@ naddr(Node *n, Addr *a, int canemitcode) case OADDR: naddr(n->left, a, canemitcode); a->etype = tptr; - a->width = widthptr; switch(a->type) { case D_OREG: a->type = D_CONST; From 60f66aa817790ee55956552540ca49ea76fc9077 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 12 Nov 2014 14:58:43 -0500 Subject: [PATCH 22/26] [dev.power64] 9g: proginfo fixes For D_OREG addresses, store the used registers in regindex instead of reguse because they're really part of addressing. Add implicit register use/set for DUFFZERO/DUFFCOPY. LGTM=rsc R=rsc CC=golang-codereviews https://golang.org/cl/174050044 --- src/cmd/9g/prog.c | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/src/cmd/9g/prog.c b/src/cmd/9g/prog.c index 0a51a533a0..e3e50f28a9 100644 --- a/src/cmd/9g/prog.c +++ b/src/cmd/9g/prog.c @@ -96,11 +96,8 @@ static ProgInfo progtable[ALAST] = { [ABGT]= {Cjmp}, [ABLE]= {Cjmp}, [ARETURN]= {Break}, - // In addtion, duffzero reads R0,R2 and writes R2. This fact must be - // encoded in peep.c (TODO) + [ADUFFZERO]= {Call}, - // In addtion, duffcopy reads R0,R2,R3 and writes R2,R3. This fact must be - // encoded in peep.c (TODO) [ADUFFCOPY]= {Call}, }; @@ -118,14 +115,14 @@ proginfo(ProgInfo *info, Prog *p) info->flags |= /*CanRegRead |*/ RightRead; } - if(p->from.type == D_OREG && p->from.reg != NREG) { - info->reguse |= RtoB(p->from.reg); + if((p->from.type == D_OREG || p->from.type == D_CONST) && p->from.reg != NREG) { + info->regindex |= RtoB(p->from.reg); if(info->flags & PostInc) { info->regset |= RtoB(p->from.reg); } } - if(p->to.type == D_OREG && p->to.reg != NREG) { - info->reguse |= RtoB(p->to.reg); + if((p->to.type == D_OREG || p->to.type == D_CONST) && p->to.reg != NREG) { + info->regindex |= RtoB(p->to.reg); if(info->flags & PostInc) { info->regset |= RtoB(p->to.reg); } @@ -135,4 +132,13 @@ proginfo(ProgInfo *info, Prog *p) info->flags &= ~LeftRead; info->flags |= LeftAddr; } + + if(p->as == ADUFFZERO) { + info->reguse |= RtoB(0) | RtoB(2); + info->regset |= RtoB(2); + } + if(p->as == ADUFFCOPY) { + info->reguse |= RtoB(0) | RtoB(2) | RtoB(3); + info->regset |= RtoB(2) | RtoB(3); + } } From 8c060d9392d44916588780fe976c888f3ba8a60e Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Wed, 12 Nov 2014 17:19:02 -0500 Subject: [PATCH 23/26] [dev.power64] liblink: improve documentation of struct Prog LGTM=dave, rsc R=rsc, dave CC=golang-codereviews https://golang.org/cl/169460043 --- include/link.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/include/link.h b/include/link.h index c80f467580..06f3ebb489 100644 --- a/include/link.h +++ b/include/link.h @@ -54,7 +54,7 @@ struct Addr { char sval[8]; float64 dval; - Prog* branch; // for 5g, 6g, 8g + Prog* branch; // for 5g, 6g, 8g, 9g } u; LSym* sym; @@ -89,10 +89,13 @@ struct Prog int32 lineno; Prog* link; short as; - uchar reg; // arm, power64 only - uchar scond; // arm only + uchar scond; // arm only; condition codes + + // operands Addr from; - Addr from3; // power64 only, fma and rlwm + uchar reg; // arm, power64 only (e.g., ADD from, reg, to); + // also used for ADATA width on arm, power64 + Addr from3; // power64 only (e.g., RLWM/FMADD from, reg, from3, to) Addr to; // for 5g, 6g, 8g internal use From c3dadb3d190973b888f5f83de3a3cccdbe7fc949 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 13 Nov 2014 13:34:20 -0500 Subject: [PATCH 24/26] [dev.power64] 6g,8g: remove unnecessary and incorrect reg use scanning Previously, the 6g and 8g registerizers scanned for used registers beyond the end of a region being considered for registerization. This ancient artifact was copied from the C compilers, where it was probably necessary to track implicitly used registers. In the Go compilers it's harmless (because it can only over-restrict the set of available registers), but no longer necessary because the Go compilers correctly track register use/set information. The consequences of this extra scan were (at least) that 1) we would not consider allocating the AX register if there was a deferproc call in the future because deferproc uses AX as a return register, so we see the use of AX, but don't track that AX is set by the CALL, and 2) we could not consider allocating the DX register if there was a MUL in the future because MUL implicitly sets DX and (thanks to an abuse of copyu in this code) we would also consider DX used. This commit fixes these problems by nuking this code. LGTM=rsc R=rsc CC=golang-codereviews https://golang.org/cl/174110043 --- src/cmd/6g/reg.c | 50 +----------------------------------------------- src/cmd/8g/reg.c | 48 +--------------------------------------------- 2 files changed, 2 insertions(+), 96 deletions(-) diff --git a/src/cmd/6g/reg.c b/src/cmd/6g/reg.c index afd3f1056e..4ce2f4db00 100644 --- a/src/cmd/6g/reg.c +++ b/src/cmd/6g/reg.c @@ -1019,52 +1019,12 @@ paint1(Reg *r, int bn) } } -uint32 -regset(Reg *r, uint32 bb) -{ - uint32 b, set; - Adr v; - int c; - - set = 0; - v = zprog.from; - while(b = bb & ~(bb-1)) { - v.type = b & 0xFFFF? BtoR(b): BtoF(b); - if(v.type == 0) - fatal("zero v.type for %#ux", b); - c = copyu(r->f.prog, &v, nil); - if(c == 3) - set |= b; - bb &= ~b; - } - return set; -} - -uint32 -reguse(Reg *r, uint32 bb) -{ - uint32 b, set; - Adr v; - int c; - - set = 0; - v = zprog.from; - while(b = bb & ~(bb-1)) { - v.type = b & 0xFFFF? BtoR(b): BtoF(b); - c = copyu(r->f.prog, &v, nil); - if(c == 1 || c == 2 || c == 4) - set |= b; - bb &= ~b; - } - return set; -} - uint32 paint2(Reg *r, int bn) { Reg *r1; int z; - uint64 bb, vreg, x; + uint64 bb, vreg; z = bn/64; bb = 1LL << (bn%64); @@ -1108,14 +1068,6 @@ paint2(Reg *r, int bn) break; } - bb = vreg; - for(; r; r=(Reg*)r->f.s1) { - x = r->regu & ~bb; - if(x) { - vreg |= reguse(r, x); - bb |= regset(r, x); - } - } return vreg; } diff --git a/src/cmd/8g/reg.c b/src/cmd/8g/reg.c index 0fbe684821..79d60bed55 100644 --- a/src/cmd/8g/reg.c +++ b/src/cmd/8g/reg.c @@ -995,50 +995,12 @@ paint1(Reg *r, int bn) } } -uint32 -regset(Reg *r, uint32 bb) -{ - uint32 b, set; - Adr v; - int c; - - set = 0; - v = zprog.from; - while(b = bb & ~(bb-1)) { - v.type = b & 0xFF ? BtoR(b): BtoF(b); - c = copyu(r->f.prog, &v, nil); - if(c == 3) - set |= b; - bb &= ~b; - } - return set; -} - -uint32 -reguse(Reg *r, uint32 bb) -{ - uint32 b, set; - Adr v; - int c; - - set = 0; - v = zprog.from; - while(b = bb & ~(bb-1)) { - v.type = b & 0xFF ? BtoR(b): BtoF(b); - c = copyu(r->f.prog, &v, nil); - if(c == 1 || c == 2 || c == 4) - set |= b; - bb &= ~b; - } - return set; -} - uint32 paint2(Reg *r, int bn) { Reg *r1; int z; - uint64 bb, vreg, x; + uint64 bb, vreg; z = bn/64; bb = 1LL << (bn%64); @@ -1082,14 +1044,6 @@ paint2(Reg *r, int bn) break; } - bb = vreg; - for(; r; r=(Reg*)r->f.s1) { - x = r->regu & ~bb; - if(x) { - vreg |= reguse(r, x); - bb |= regset(r, x); - } - } return vreg; } From 231b8d61e9a7eec0e2145217828a4881392db230 Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 13 Nov 2014 13:48:59 -0500 Subject: [PATCH 25/26] [dev.power64] 9l: remove enum as's tag for c2go None of the other compilers have a tag for this enum. Cleaning all of this up to use proper types will happen after the conversion. LGTM=minux, rsc R=rsc, minux CC=golang-codereviews https://golang.org/cl/166690043 --- src/cmd/9l/9.out.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/cmd/9l/9.out.h b/src/cmd/9l/9.out.h index e494e90ca9..08a339318d 100644 --- a/src/cmd/9l/9.out.h +++ b/src/cmd/9l/9.out.h @@ -131,7 +131,7 @@ enum C_NCLASS, /* must be the last */ }; -enum as +enum { AXXX, AADD, @@ -501,7 +501,7 @@ enum D_R0 = 0, // type is D_REG D_F0 = D_R0+NREG, // type is D_FREG -/* reg names iff type is D_SPR */ +/* reg names in offset field iff type is D_SPR */ D_XER = 1, D_LR = 8, D_CTR = 9 From 743bdf612a63d631bac88a6b857094152ac33d5c Mon Sep 17 00:00:00 2001 From: Austin Clements Date: Thu, 13 Nov 2014 13:51:44 -0500 Subject: [PATCH 26/26] [dev.power64] 9g: implement regopt This adds registerization support to 9g equivalent to what the other compilers have. LGTM=rsc R=rsc, dave CC=golang-codereviews https://golang.org/cl/174980043 --- src/cmd/9g/opt.h | 49 +- src/cmd/9g/reg.c | 1214 +++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 1233 insertions(+), 30 deletions(-) diff --git a/src/cmd/9g/opt.h b/src/cmd/9g/opt.h index d3cbcb9570..7f15b5a69f 100644 --- a/src/cmd/9g/opt.h +++ b/src/cmd/9g/opt.h @@ -70,24 +70,40 @@ struct Reg { Flow f; - Bits set; // variables written by this instruction. - Bits use1; // variables read by prog->from. - Bits use2; // variables read by prog->to. + Bits set; // regopt variables written by this instruction. + Bits use1; // regopt variables read by prog->from. + Bits use2; // regopt variables read by prog->to. + // refahead/refbehind are the regopt variables whose current + // value may be used in the following/preceding instructions + // up to a CALL (or the value is clobbered). Bits refbehind; Bits refahead; + // calahead/calbehind are similar, but for variables in + // instructions that are reachable after hitting at least one + // CALL. Bits calbehind; Bits calahead; Bits regdiff; Bits act; - int32 regu; // register used bitmap + uint64 regu; // register used bitmap }; #define R ((Reg*)0) /*c2go extern Reg *R; */ #define NRGN 600 /*c2go enum { NRGN = 600 }; */ + +// A Rgn represents a single regopt variable over a region of code +// where a register could potentially be dedicated to that variable. +// The code encompassed by a Rgn is defined by the flow graph, +// starting at enter, flood-filling forward while varno is refahead +// and backward while varno is refbehind, and following branches. A +// single variable may be represented by multiple disjoint Rgns and +// each Rgn may choose a different register for that variable. +// Registers are allocated to regions greedily in order of descending +// cost. struct Rgn { Reg* enter; @@ -104,7 +120,7 @@ EXTERN Rgn* rgp; EXTERN int nregion; EXTERN int nvar; EXTERN int32 regbits; -EXTERN int32 exregbits; +EXTERN int32 exregbits; // TODO(austin) not used; remove EXTERN Bits externs; EXTERN Bits params; EXTERN Bits consts; @@ -118,10 +134,8 @@ EXTERN struct { int32 ncvtreg; int32 nspill; - int32 nreload; int32 ndelmov; int32 nvar; - int32 naddr; } ostats; /* @@ -133,10 +147,10 @@ void addmove(Reg*, int, int, int); Bits mkvar(Reg*, Adr*); void prop(Reg*, Bits, Bits); void synch(Reg*, Bits); -uint32 allreg(uint32, Rgn*); +uint64 allreg(uint64, Rgn*); void paint1(Reg*, int); -uint32 paint2(Reg*, int); -void paint3(Reg*, int, int32, int); +uint64 paint2(Reg*, int, int); +void paint3(Reg*, int, uint64, int); void addreg(Adr*, int); void dumpone(Flow*, int); void dumpit(char*, Flow*, int); @@ -160,8 +174,8 @@ typedef struct ProgInfo ProgInfo; struct ProgInfo { uint32 flags; // the bits below - uint64 reguse; // required registers used by this instruction - uint64 regset; // required registers set by this instruction + uint64 reguse; // registers implicitly used by this instruction + uint64 regset; // registers implicitly set by this instruction uint64 regindex; // registers used by addressing mode }; @@ -182,20 +196,21 @@ enum SizeF = 1<<7, // float aka float32 SizeD = 1<<8, // double aka float64 - // Left side: address taken, read, write. + // Left side (Prog.from): address taken, read, write. LeftAddr = 1<<9, LeftRead = 1<<10, LeftWrite = 1<<11, - - // Register in middle; never written. + + // Register in middle (Prog.reg); only ever read. RegRead = 1<<12, CanRegRead = 1<<13, - - // Right side: address taken, read, write. + + // Right side (Prog.to): address taken, read, write. RightAddr = 1<<14, RightRead = 1<<15, RightWrite = 1<<16, + // Instruction updates whichever of from/to is type D_OREG PostInc = 1<<17, // Instruction kinds diff --git a/src/cmd/9g/reg.c b/src/cmd/9g/reg.c index bbebf3fe03..b911a23998 100644 --- a/src/cmd/9g/reg.c +++ b/src/cmd/9g/reg.c @@ -33,14 +33,1197 @@ #include "gg.h" #include "opt.h" -void -regopt(Prog *p) +#define NREGVAR 64 /* 32 general + 32 floating */ +#define REGBITS ((uint64)0xffffffffffffffffull) +/*c2go enum { + NREGVAR = 64, + REGBITS = 0xffffffffffffffff, +}; +*/ + +static Reg* firstr; +static int first = 1; + +int +rcmp(const void *a1, const void *a2) { - USED(p); - // TODO(minux) + Rgn *p1, *p2; + int c1, c2; + + p1 = (Rgn*)a1; + p2 = (Rgn*)a2; + c1 = p2->cost; + c2 = p1->cost; + if(c1 -= c2) + return c1; + return p2->varno - p1->varno; +} + +static void +setaddrs(Bits bit) +{ + int i, n; + Var *v; + Node *node; + + while(bany(&bit)) { + // convert each bit to a variable + i = bnum(bit); + node = var[i].node; + n = var[i].name; + biclr(&bit, i); + + // disable all pieces of that variable + for(i=0; inode == node && v->name == n) + v->addr = 2; + } + } +} + +static char* regname[] = { + ".R0", + ".R1", + ".R2", + ".R3", + ".R4", + ".R5", + ".R6", + ".R7", + ".R8", + ".R9", + ".R10", + ".R11", + ".R12", + ".R13", + ".R14", + ".R15", + ".R16", + ".R17", + ".R18", + ".R19", + ".R20", + ".R21", + ".R22", + ".R23", + ".R24", + ".R25", + ".R26", + ".R27", + ".R28", + ".R29", + ".R30", + ".R31", + ".F0", + ".F1", + ".F2", + ".F3", + ".F4", + ".F5", + ".F6", + ".F7", + ".F8", + ".F9", + ".F10", + ".F11", + ".F12", + ".F13", + ".F14", + ".F15", + ".F16", + ".F17", + ".F18", + ".F19", + ".F20", + ".F21", + ".F22", + ".F23", + ".F24", + ".F25", + ".F26", + ".F27", + ".F28", + ".F29", + ".F30", + ".F31", +}; + +static Node* regnodes[NREGVAR]; + +static void walkvardef(Node *n, Reg *r, int active); + +void +regopt(Prog *firstp) +{ + Reg *r, *r1; + Prog *p; + Graph *g; + ProgInfo info; + int i, z, active; + uint64 vreg, usedreg; + Bits bit; + + if(first) { + fmtinstall('Q', Qconv); + first = 0; + } + + mergetemp(firstp); + + /* + * control flow is more complicated in generated go code + * than in generated c code. define pseudo-variables for + * registers, so we have complete register usage information. + */ + nvar = NREGVAR; + memset(var, 0, NREGVAR*sizeof var[0]); + for(i=0; iopt = nil; + return; + } + + firstr = (Reg*)g->start; + + for(r = firstr; r != R; r = (Reg*)r->f.link) { + p = r->f.prog; + if(p->as == AVARDEF || p->as == AVARKILL) + continue; + proginfo(&info, p); + + // Avoid making variables for direct-called functions. + if(p->as == ABL && p->to.name == D_EXTERN) + continue; + + // from vs to doesn't matter for registers + r->use1.b[0] |= info.reguse | info.regindex; + r->set.b[0] |= info.regset; + + // Compute used register for from + bit = mkvar(r, &p->from); + if(info.flags & LeftAddr) + setaddrs(bit); + if(info.flags & LeftRead) + for(z=0; zuse1.b[z] |= bit.b[z]; + + // Compute used register for reg + if(info.flags & RegRead) { + if(p->from.type != D_FREG) + r->use1.b[0] |= RtoB(p->reg); + else + r->use1.b[0] |= FtoB(D_F0+p->reg); + } + + // Currently we never generate three register forms. + // If we do, this will need to change. + if(p->from3.type != D_NONE) + fatal("regopt not implemented for from3"); + + // Compute used register for to + bit = mkvar(r, &p->to); + if(info.flags & RightAddr) + setaddrs(bit); + if(info.flags & RightRead) + for(z=0; zuse2.b[z] |= bit.b[z]; + if(info.flags & RightWrite) + for(z=0; zset.b[z] |= bit.b[z]; + } + + for(i=0; iaddr) { + bit = blsh(i); + for(z=0; zaddr, v->etype, v->width, v->node, v->offset); + } + + if(debug['R'] && debug['v']) + dumpit("pass1", &firstr->f, 1); + + /* + * pass 2 + * find looping structure + */ + flowrpo(g); + + if(debug['R'] && debug['v']) + dumpit("pass2", &firstr->f, 1); + + /* + * pass 2.5 + * iterate propagating fat vardef covering forward + * r->act records vars with a VARDEF since the last CALL. + * (r->act will be reused in pass 5 for something else, + * but we'll be done with it by then.) + */ + active = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) { + r->f.active = 0; + r->act = zbits; + } + for(r = firstr; r != R; r = (Reg*)r->f.link) { + p = r->f.prog; + if(p->as == AVARDEF && isfat(p->to.node->type) && p->to.node->opt != nil) { + active++; + walkvardef(p->to.node, r, active); + } + } + + /* + * pass 3 + * iterate propagating usage + * back until flow graph is complete + */ +loop1: + change = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) + r->f.active = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) + if(r->f.prog->as == ARET) + prop(r, zbits, zbits); +loop11: + /* pick up unreachable code */ + i = 0; + for(r = firstr; r != R; r = r1) { + r1 = (Reg*)r->f.link; + if(r1 && r1->f.active && !r->f.active) { + prop(r, zbits, zbits); + i = 1; + } + } + if(i) + goto loop11; + if(change) + goto loop1; + + if(debug['R'] && debug['v']) + dumpit("pass3", &firstr->f, 1); + + /* + * pass 4 + * iterate propagating register/variable synchrony + * forward until graph is complete + */ +loop2: + change = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) + r->f.active = 0; + synch(firstr, zbits); + if(change) + goto loop2; + + if(debug['R'] && debug['v']) + dumpit("pass4", &firstr->f, 1); + + /* + * pass 4.5 + * move register pseudo-variables into regu. + */ + for(r = firstr; r != R; r = (Reg*)r->f.link) { + r->regu = (r->refbehind.b[0] | r->set.b[0]) & REGBITS; + + r->set.b[0] &= ~REGBITS; + r->use1.b[0] &= ~REGBITS; + r->use2.b[0] &= ~REGBITS; + r->refbehind.b[0] &= ~REGBITS; + r->refahead.b[0] &= ~REGBITS; + r->calbehind.b[0] &= ~REGBITS; + r->calahead.b[0] &= ~REGBITS; + r->regdiff.b[0] &= ~REGBITS; + r->act.b[0] &= ~REGBITS; + } + + if(debug['R'] && debug['v']) + dumpit("pass4.5", &firstr->f, 1); + + /* + * pass 5 + * isolate regions + * calculate costs (paint1) + */ + r = firstr; + if(r) { + for(z=0; zrefahead.b[z] | r->calahead.b[z]) & + ~(externs.b[z] | params.b[z] | addrs.b[z] | consts.b[z]); + if(bany(&bit) && !r->f.refset) { + // should never happen - all variables are preset + if(debug['w']) + print("%L: used and not set: %Q\n", r->f.prog->lineno, bit); + r->f.refset = 1; + } + } + for(r = firstr; r != R; r = (Reg*)r->f.link) + r->act = zbits; + rgp = region; + nregion = 0; + for(r = firstr; r != R; r = (Reg*)r->f.link) { + for(z=0; zset.b[z] & + ~(r->refahead.b[z] | r->calahead.b[z] | addrs.b[z]); + if(bany(&bit) && !r->f.refset) { + if(debug['w']) + print("%L: set and not used: %Q\n", r->f.prog->lineno, bit); + r->f.refset = 1; + excise(&r->f); + } + for(z=0; zact.b[z] | addrs.b[z]); + while(bany(&bit)) { + i = bnum(bit); + rgp->enter = r; + rgp->varno = i; + change = 0; + paint1(r, i); + biclr(&bit, i); + if(change <= 0) + continue; + rgp->cost = change; + nregion++; + if(nregion >= NRGN) { + if(debug['R'] && debug['v']) + print("too many regions\n"); + goto brk; + } + rgp++; + } + } +brk: + qsort(region, nregion, sizeof(region[0]), rcmp); + + if(debug['R'] && debug['v']) + dumpit("pass5", &firstr->f, 1); + + /* + * pass 6 + * determine used registers (paint2) + * replace code (paint3) + */ + rgp = region; + if(debug['R'] && debug['v']) + print("\nregisterizing\n"); + for(i=0; icost, rgp->varno, rgp->enter->f.prog->pc); + bit = blsh(rgp->varno); + usedreg = paint2(rgp->enter, rgp->varno, 0); + vreg = allreg(usedreg, rgp); + if(rgp->regno != 0) { + if(debug['R'] && debug['v']) { + Var *v; + + v = var + rgp->varno; + print("registerize %N+%lld (bit=%2d et=%2E) in %R usedreg=%llx vreg=%llx\n", + v->node, v->offset, rgp->varno, v->etype, rgp->regno, usedreg, vreg); + } + paint3(rgp->enter, rgp->varno, vreg, rgp->regno); + } + rgp++; + } + + /* + * free aux structures. peep allocates new ones. + */ + for(i=0; iopt = nil; + flowend(g); + firstr = R; + + if(debug['R'] && debug['v']) { + // Rebuild flow graph, since we inserted instructions + g = flowstart(firstp, sizeof(Reg)); + firstr = (Reg*)g->start; + dumpit("pass6", &firstr->f, 1); + flowend(g); + firstr = R; + } + + /* + * pass 7 + * peep-hole on basic block + */ + if(!debug['R'] || debug['P']) + peep(firstp); + + /* + * eliminate nops + */ + for(p=firstp; p!=P; p=p->link) { + while(p->link != P && p->link->as == ANOP) + p->link = p->link->link; + if(p->to.type == D_BRANCH) + while(p->to.u.branch != P && p->to.u.branch->as == ANOP) + p->to.u.branch = p->to.u.branch->link; + } + + if(debug['R']) { + if(ostats.ncvtreg || + ostats.nspill || + ostats.ndelmov || + ostats.nvar || + 0) + print("\nstats\n"); + + if(ostats.ncvtreg) + print(" %4d cvtreg\n", ostats.ncvtreg); + if(ostats.nspill) + print(" %4d spill\n", ostats.nspill); + if(ostats.ndelmov) + print(" %4d delmov\n", ostats.ndelmov); + if(ostats.nvar) + print(" %4d var\n", ostats.nvar); + + memset(&ostats, 0, sizeof(ostats)); + } + return; } +static void +walkvardef(Node *n, Reg *r, int active) +{ + Reg *r1, *r2; + int bn; + Var *v; + + for(r1=r; r1!=R; r1=(Reg*)r1->f.s1) { + if(r1->f.active == active) + break; + r1->f.active = active; + if(r1->f.prog->as == AVARKILL && r1->f.prog->to.node == n) + break; + for(v=n->opt; v!=nil; v=v->nextinnode) { + bn = v - var; + biset(&r1->act, bn); + } + if(r1->f.prog->as == ABL) + break; + } + + for(r2=r; r2!=r1; r2=(Reg*)r2->f.s1) + if(r2->f.s2 != nil) + walkvardef(n, (Reg*)r2->f.s2, active); +} + +/* + * add mov b,rn + * just after r + */ +void +addmove(Reg *r, int bn, int rn, int f) +{ + Prog *p, *p1, *p2; + Adr *a; + Var *v; + + p1 = mal(sizeof(*p1)); + *p1 = zprog; + p = r->f.prog; + + // If there's a stack fixup coming (ADD $n,R1 after BL newproc or BL deferproc), + // delay the load until after the fixup. + p2 = p->link; + if(p2 && p2->as == AADD && p2->to.reg == REGSP && p2->to.type == D_REG) + p = p2; + + p1->link = p->link; + p->link = p1; + p1->lineno = p->lineno; + + v = var + bn; + + a = &p1->to; + a->name = v->name; + a->node = v->node; + a->sym = linksym(v->node->sym); + a->offset = v->offset; + a->etype = v->etype; + a->type = D_OREG; + if(a->etype == TARRAY || a->sym == nil) + a->type = D_CONST; + + if(v->addr) + fatal("addmove: shouldn't be doing this %A\n", a); + + switch(v->etype) { + default: + print("What is this %E\n", v->etype); + + case TINT8: + p1->as = AMOVB; + break; + case TBOOL: + case TUINT8: +//print("movbu %E %d %S\n", v->etype, bn, v->sym); + p1->as = AMOVBZ; + break; + case TINT16: + p1->as = AMOVH; + break; + case TUINT16: + p1->as = AMOVHZ; + break; + case TINT32: + p1->as = AMOVW; + break; + case TUINT32: + case TPTR32: + p1->as = AMOVWZ; + break; + case TINT64: + case TUINT64: + case TPTR64: + p1->as = AMOVD; + break; + case TFLOAT32: + p1->as = AFMOVS; + break; + case TFLOAT64: + p1->as = AFMOVD; + break; + } + + p1->from.type = D_REG; + p1->from.reg = rn; + if(rn >= NREG) { + p1->from.type = D_FREG; + p1->from.reg = rn-NREG; + } + if(!f) { + p1->from = *a; + *a = zprog.from; + a->type = D_REG; + a->reg = rn; + if(rn >= NREG) { + a->type = D_FREG; + a->reg = rn-NREG; + } + if(v->etype == TUINT8 || v->etype == TBOOL) + p1->as = AMOVBZ; + if(v->etype == TUINT16) + p1->as = AMOVHZ; + } + if(debug['R']) + print("%P\t.a%P\n", p, p1); + ostats.nspill++; +} + +static int +overlap(int64 o1, int w1, int64 o2, int w2) +{ + int64 t1, t2; + + t1 = o1+w1; + t2 = o2+w2; + + if(!(t1 > o2 && t2 > o1)) + return 0; + + return 1; +} + +Bits +mkvar(Reg *r, Adr *a) +{ + USED(r); + Var *v; + int i, t, n, et, z, flag; + int64 w; + int64 o; + Bits bit; + Node *node; + + // mark registers used + t = a->type; + switch(t) { + default: + print("type %d %d %D\n", t, a->name, a); + goto none; + + case D_NONE: + goto none; + + case D_BRANCH: + case D_CONST: + case D_FCONST: + case D_SCONST: + case D_SPR: + case D_OREG: + break; + + case D_REG: + if(a->reg != NREG) { + bit = zbits; + bit.b[0] = RtoB(a->reg); + return bit; + } + break; + + case D_FREG: + if(a->reg != NREG) { + bit = zbits; + bit.b[0] = FtoB(D_F0+a->reg); + return bit; + } + break; + } + + switch(a->name) { + default: + goto none; + + case D_EXTERN: + case D_STATIC: + case D_AUTO: + case D_PARAM: + n = a->name; + break; + } + + node = a->node; + if(node == N || node->op != ONAME || node->orig == N) + goto none; + node = node->orig; + if(node->orig != node) + fatal("%D: bad node", a); + if(node->sym == S || node->sym->name[0] == '.') + goto none; + et = a->etype; + o = a->offset; + w = a->width; + if(w < 0) + fatal("bad width %lld for %D", w, a); + + flag = 0; + for(i=0; inode == node && v->name == n) { + if(v->offset == o) + if(v->etype == et) + if(v->width == w) + return blsh(i); + + // if they overlap, disable both + if(overlap(v->offset, v->width, o, w)) { + v->addr = 1; + flag = 1; + } + } + } + + switch(et) { + case 0: + case TFUNC: + goto none; + } + + if(nvar >= NVAR) { + if(debug['w'] > 1 && node != N) + fatal("variable not optimized: %#N", node); + + // If we're not tracking a word in a variable, mark the rest as + // having its address taken, so that we keep the whole thing + // live at all calls. otherwise we might optimize away part of + // a variable but not all of it. + for(i=0; inode == node) + v->addr = 1; + } + goto none; + } + + i = nvar; + nvar++; + v = var+i; + v->offset = o; + v->name = n; + v->etype = et; + v->width = w; + v->addr = flag; // funny punning + v->node = node; + + // node->opt is the head of a linked list + // of Vars within the given Node, so that + // we can start at a Var and find all the other + // Vars in the same Go variable. + v->nextinnode = node->opt; + node->opt = v; + + bit = blsh(i); + if(n == D_EXTERN || n == D_STATIC) + for(z=0; zclass == PPARAM) + for(z=0; zclass == PPARAMOUT) + for(z=0; zaddrtaken) + v->addr = 1; + + // Disable registerization for globals, because: + // (1) we might panic at any time and we want the recovery code + // to see the latest values (issue 1304). + // (2) we don't know what pointers might point at them and we want + // loads via those pointers to see updated values and vice versa (issue 7995). + // + // Disable registerization for results if using defer, because the deferred func + // might recover and return, causing the current values to be used. + if(node->class == PEXTERN || (hasdefer && node->class == PPARAMOUT)) + v->addr = 1; + + if(debug['R']) + print("bit=%2d et=%2E w=%lld+%lld %#N %D flag=%d\n", i, et, o, w, node, a, v->addr); + ostats.nvar++; + + return bit; + +none: + return zbits; +} + +void +prop(Reg *r, Bits ref, Bits cal) +{ + Reg *r1, *r2; + int z, i, j; + Var *v, *v1; + + for(r1 = r; r1 != R; r1 = (Reg*)r1->f.p1) { + for(z=0; zrefahead.b[z]; + if(ref.b[z] != r1->refahead.b[z]) { + r1->refahead.b[z] = ref.b[z]; + change++; + } + cal.b[z] |= r1->calahead.b[z]; + if(cal.b[z] != r1->calahead.b[z]) { + r1->calahead.b[z] = cal.b[z]; + change++; + } + } + switch(r1->f.prog->as) { + case ABL: + if(noreturn(r1->f.prog)) + break; + + // Mark all input variables (ivar) as used, because that's what the + // liveness bitmaps say. The liveness bitmaps say that so that a + // panic will not show stale values in the parameter dump. + // Mark variables with a recent VARDEF (r1->act) as used, + // so that the optimizer flushes initializations to memory, + // so that if a garbage collection happens during this CALL, + // the collector will see initialized memory. Again this is to + // match what the liveness bitmaps say. + for(z=0; zact.b[z]; + ref.b[z] = 0; + } + + // cal.b is the current approximation of what's live across the call. + // Every bit in cal.b is a single stack word. For each such word, + // find all the other tracked stack words in the same Go variable + // (struct/slice/string/interface) and mark them live too. + // This is necessary because the liveness analysis for the garbage + // collector works at variable granularity, not at word granularity. + // It is fundamental for slice/string/interface: the garbage collector + // needs the whole value, not just some of the words, in order to + // interpret the other bits correctly. Specifically, slice needs a consistent + // ptr and cap, string needs a consistent ptr and len, and interface + // needs a consistent type word and data word. + for(z=0; z= nvar || ((cal.b[z]>>i)&1) == 0) + continue; + v = var+z*64+i; + if(v->node->opt == nil) // v represents fixed register, not Go variable + continue; + + // v->node->opt is the head of a linked list of Vars + // corresponding to tracked words from the Go variable v->node. + // Walk the list and set all the bits. + // For a large struct this could end up being quadratic: + // after the first setting, the outer loop (for z, i) would see a 1 bit + // for all of the remaining words in the struct, and for each such + // word would go through and turn on all the bits again. + // To avoid the quadratic behavior, we only turn on the bits if + // v is the head of the list or if the head's bit is not yet turned on. + // This will set the bits at most twice, keeping the overall loop linear. + v1 = v->node->opt; + j = v1 - var; + if(v == v1 || !btest(&cal, j)) { + for(; v1 != nil; v1 = v1->nextinnode) { + j = v1 - var; + biset(&cal, j); + } + } + } + } + break; + + case ATEXT: + for(z=0; zset.b[z]) | + r1->use1.b[z] | r1->use2.b[z]; + cal.b[z] &= ~(r1->set.b[z] | r1->use1.b[z] | r1->use2.b[z]); + r1->refbehind.b[z] = ref.b[z]; + r1->calbehind.b[z] = cal.b[z]; + } + if(r1->f.active) + break; + r1->f.active = 1; + } + for(; r != r1; r = (Reg*)r->f.p1) + for(r2 = (Reg*)r->f.p2; r2 != R; r2 = (Reg*)r2->f.p2link) + prop(r2, r->refbehind, r->calbehind); +} + +void +synch(Reg *r, Bits dif) +{ + Reg *r1; + int z; + + for(r1 = r; r1 != R; r1 = (Reg*)r1->f.s1) { + for(z=0; zrefbehind.b[z] & r1->refahead.b[z])) | + r1->set.b[z] | r1->regdiff.b[z]; + if(dif.b[z] != r1->regdiff.b[z]) { + r1->regdiff.b[z] = dif.b[z]; + change++; + } + } + if(r1->f.active) + break; + r1->f.active = 1; + for(z=0; zcalbehind.b[z] & r1->calahead.b[z]); + if(r1->f.s2 != nil) + synch((Reg*)r1->f.s2, dif); + } +} + +uint64 +allreg(uint64 b, Rgn *r) +{ + Var *v; + int i; + + v = var + r->varno; + r->regno = 0; + switch(v->etype) { + + default: + fatal("unknown etype %d/%E", bitno(b), v->etype); + break; + + case TINT8: + case TUINT8: + case TINT16: + case TUINT16: + case TINT32: + case TUINT32: + case TINT64: + case TUINT64: + case TINT: + case TUINT: + case TUINTPTR: + case TBOOL: + case TPTR32: + case TPTR64: + i = BtoR(~b); + if(i && r->cost > 0) { + r->regno = i; + return RtoB(i); + } + break; + + case TFLOAT32: + case TFLOAT64: + i = BtoF(~b); + if(i && r->cost > 0) { + r->regno = i; + return FtoB(i); + } + break; + } + return 0; +} + +void +paint1(Reg *r, int bn) +{ + Reg *r1; + int z; + uint64 bb; + + z = bn/64; + bb = 1LL<<(bn%64); + if(r->act.b[z] & bb) + return; + for(;;) { + if(!(r->refbehind.b[z] & bb)) + break; + r1 = (Reg*)r->f.p1; + if(r1 == R) + break; + if(!(r1->refahead.b[z] & bb)) + break; + if(r1->act.b[z] & bb) + break; + r = r1; + } + + if(LOAD(r) & ~(r->set.b[z]&~(r->use1.b[z]|r->use2.b[z])) & bb) { + change -= CLOAD * r->f.loop; + } + for(;;) { + r->act.b[z] |= bb; + + if(r->f.prog->as != ANOP) { // don't give credit for NOPs + if(r->use1.b[z] & bb) + change += CREF * r->f.loop; + if((r->use2.b[z]|r->set.b[z]) & bb) + change += CREF * r->f.loop; + } + + if(STORE(r) & r->regdiff.b[z] & bb) { + change -= CLOAD * r->f.loop; + } + + if(r->refbehind.b[z] & bb) + for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) + if(r1->refahead.b[z] & bb) + paint1(r1, bn); + + if(!(r->refahead.b[z] & bb)) + break; + r1 = (Reg*)r->f.s2; + if(r1 != R) + if(r1->refbehind.b[z] & bb) + paint1(r1, bn); + r = (Reg*)r->f.s1; + if(r == R) + break; + if(r->act.b[z] & bb) + break; + if(!(r->refbehind.b[z] & bb)) + break; + } +} + +uint64 +paint2(Reg *r, int bn, int depth) +{ + Reg *r1; + int z; + uint64 bb, vreg; + + z = bn/64; + bb = 1LL << (bn%64); + vreg = regbits; + if(!(r->act.b[z] & bb)) + return vreg; + for(;;) { + if(!(r->refbehind.b[z] & bb)) + break; + r1 = (Reg*)r->f.p1; + if(r1 == R) + break; + if(!(r1->refahead.b[z] & bb)) + break; + if(!(r1->act.b[z] & bb)) + break; + r = r1; + } + for(;;) { + if(debug['R'] && debug['v']) + print(" paint2 %d %P\n", depth, r->f.prog); + + r->act.b[z] &= ~bb; + + vreg |= r->regu; + + if(r->refbehind.b[z] & bb) + for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) + if(r1->refahead.b[z] & bb) + vreg |= paint2(r1, bn, depth+1); + + if(!(r->refahead.b[z] & bb)) + break; + r1 = (Reg*)r->f.s2; + if(r1 != R) + if(r1->refbehind.b[z] & bb) + vreg |= paint2(r1, bn, depth+1); + r = (Reg*)r->f.s1; + if(r == R) + break; + if(!(r->act.b[z] & bb)) + break; + if(!(r->refbehind.b[z] & bb)) + break; + } + return vreg; +} + +void +paint3(Reg *r, int bn, uint64 rb, int rn) +{ + Reg *r1; + Prog *p; + int z; + uint64 bb; + + z = bn/64; + bb = 1LL << (bn%64); + if(r->act.b[z] & bb) + return; + for(;;) { + if(!(r->refbehind.b[z] & bb)) + break; + r1 = (Reg*)r->f.p1; + if(r1 == R) + break; + if(!(r1->refahead.b[z] & bb)) + break; + if(r1->act.b[z] & bb) + break; + r = r1; + } + + if(LOAD(r) & ~(r->set.b[z] & ~(r->use1.b[z]|r->use2.b[z])) & bb) + addmove(r, bn, rn, 0); + for(;;) { + r->act.b[z] |= bb; + p = r->f.prog; + + if(r->use1.b[z] & bb) { + if(debug['R'] && debug['v']) + print("%P", p); + addreg(&p->from, rn); + if(debug['R'] && debug['v']) + print(" ===change== %P\n", p); + } + if((r->use2.b[z]|r->set.b[z]) & bb) { + if(debug['R'] && debug['v']) + print("%P", p); + addreg(&p->to, rn); + if(debug['R'] && debug['v']) + print(" ===change== %P\n", p); + } + + if(STORE(r) & r->regdiff.b[z] & bb) + addmove(r, bn, rn, 1); + r->regu |= rb; + + if(r->refbehind.b[z] & bb) + for(r1 = (Reg*)r->f.p2; r1 != R; r1 = (Reg*)r1->f.p2link) + if(r1->refahead.b[z] & bb) + paint3(r1, bn, rb, rn); + + if(!(r->refahead.b[z] & bb)) + break; + r1 = (Reg*)r->f.s2; + if(r1 != R) + if(r1->refbehind.b[z] & bb) + paint3(r1, bn, rb, rn); + r = (Reg*)r->f.s1; + if(r == R) + break; + if(r->act.b[z] & bb) + break; + if(!(r->refbehind.b[z] & bb)) + break; + } +} + +void +addreg(Adr *a, int rn) +{ + a->sym = nil; + a->node = nil; + a->name = D_NONE; + a->type = D_REG; + a->reg = rn; + if(rn >= NREG) { + a->type = D_FREG; + a->reg = rn-NREG; + } + + ostats.ncvtreg++; +} + /* * track register variables including external registers: * bit reg @@ -56,7 +1239,7 @@ regopt(Prog *p) uint64 RtoB(int r) { - if(r >= D_R0 && r <= D_R0+31) + if(r > D_R0 && r <= D_R0+31) return 1ULL << (r - D_R0); return 0; } @@ -64,7 +1247,7 @@ RtoB(int r) int BtoR(uint64 b) { - b &= 0xffffffff; + b &= 0xffffffffull; if(b == 0) return 0; return bitno(b) + D_R0; @@ -139,6 +1322,7 @@ void dumpit(char *str, Flow *r0, int isreg) { Flow *r, *r1; + int s1v, s2v; print("\n%s\n", str); for(r = r0; r != nil; r = r->link) { @@ -150,12 +1334,16 @@ dumpit(char *str, Flow *r0, int isreg) print(" %.4ud", (int)r1->prog->pc); print("\n"); } -// r1 = r->s1; -// if(r1 != R) { -// print(" succ:"); -// for(; r1 != R; r1 = r1->s1) -// print(" %.4ud", (int)r1->prog->pc); -// print("\n"); -// } + // If at least one successor is "interesting", print both + s1v = (r->s1 != nil) && (r->s1->prog != r->prog->link); + s2v = (r->s2 != nil) && (r->s2->prog != r->prog->link); + if(s1v || s2v) { + print(" succ:"); + if(r->s1 != nil) + print(" %.4ud", (int)r->s1->prog->pc); + if(r->s2 != nil) + print(" %.4ud", (int)r->s2->prog->pc); + print("\n"); + } } }