cmd/compile: on PPC64, fold offset into some loads/stores only when offset is 4-aligned

On PPC64, MOVWload, MOVDload, and MOVDstore are assembled to a
"DS from" instruction which requiers the offset is a multiple of
4. Only fold offset to such instructions if it is a multiple of 4.

Fixes #36723.

"GOARCH=ppc64 GOOS=linux go build -gcflags=all=-d=ssa/check/on std cmd"
passes now.

Change-Id: I67f2a6ac02f0d33d470f68ff54936c289a4c765b
Reviewed-on: https://go-review.googlesource.com/c/go/+/216379
Reviewed-by: Carlos Eduardo Seo <cseo@linux.vnet.ibm.com>
This commit is contained in:
Cherry Zhang 2020-01-24 14:11:04 -05:00
parent 4615b39514
commit 6fbdfe4804
3 changed files with 70 additions and 41 deletions

View file

@ -813,7 +813,7 @@
(MFVSRD x:(FMOVDload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) -> @x.Block (MOVDload [off] {sym} ptr mem)
// Fold offsets for stores.
(MOVDstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVDstore [off1+off2] {sym} x val mem)
(MOVDstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) && (off1+off2)%4 == 0 -> (MOVDstore [off1+off2] {sym} x val mem)
(MOVWstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVWstore [off1+off2] {sym} x val mem)
(MOVHstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVHstore [off1+off2] {sym} x val mem)
(MOVBstore [off1] {sym} (ADDconst [off2] x) val mem) && is16Bit(off1+off2) -> (MOVBstore [off1+off2] {sym} x val mem)
@ -836,7 +836,7 @@
&& (ptr.Op != OpSB || p.Uses == 1) ->
(MOVWstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(MOVDstore [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
&& (ptr.Op != OpSB || p.Uses == 1) ->
&& (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0 ->
(MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
(FMOVSstore [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) val mem) && canMergeSym(sym1,sym2)
@ -856,13 +856,13 @@
&& (ptr.Op != OpSB || p.Uses == 1) ->
(MOVHZload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVWload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
&& (ptr.Op != OpSB || p.Uses == 1) ->
&& (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0 ->
(MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVWZload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
&& (ptr.Op != OpSB || p.Uses == 1) ->
(MOVWZload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(MOVDload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
&& (ptr.Op != OpSB || p.Uses == 1) ->
&& (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0 ->
(MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
(FMOVSload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem) && canMergeSym(sym1,sym2)
&& (ptr.Op != OpSB || p.Uses == 1) ->
@ -875,8 +875,8 @@
(FMOVSload [off1] {sym} (ADDconst [off2] ptr) mem) && is16Bit(off1+off2) -> (FMOVSload [off1+off2] {sym} ptr mem)
(FMOVDload [off1] {sym} (ADDconst [off2] ptr) mem) && is16Bit(off1+off2) -> (FMOVDload [off1+off2] {sym} ptr mem)
(MOVDload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) -> (MOVDload [off1+off2] {sym} x mem)
(MOVWload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) -> (MOVWload [off1+off2] {sym} x mem)
(MOVDload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) && (off1+off2)%4 == 0 -> (MOVDload [off1+off2] {sym} x mem)
(MOVWload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) && (off1+off2)%4 == 0 -> (MOVWload [off1+off2] {sym} x mem)
(MOVWZload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) -> (MOVWZload [off1+off2] {sym} x mem)
(MOVHload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) -> (MOVHload [off1+off2] {sym} x mem)
(MOVHZload [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) -> (MOVHZload [off1+off2] {sym} x mem)
@ -886,9 +886,10 @@
(MOV(D|W|WZ|H|HZ|BZ)load [0] {sym} p:(ADD ptr idx) mem) && sym == nil && p.Uses == 1 -> (MOV(D|W|WZ|H|HZ|BZ)loadidx ptr idx mem)
// Determine indexed loads with constant values that can be done without index
(MOV(D|W|WZ|H|HZ|BZ)loadidx ptr (MOVDconst [c]) mem) && is16Bit(c) -> (MOV(D|W|WZ|H|HZ|BZ)load [c] ptr mem)
(MOV(D|W|WZ|H|HZ|BZ)loadidx (MOVDconst [c]) ptr mem) && is16Bit(c) -> (MOV(D|W|WZ|H|HZ|BZ)load [c] ptr mem)
(MOV(D|W)loadidx ptr (MOVDconst [c]) mem) && is16Bit(c) && c%4 == 0 -> (MOV(D|W)load [c] ptr mem)
(MOV(WZ|H|HZ|BZ)loadidx ptr (MOVDconst [c]) mem) && is16Bit(c) -> (MOV(WZ|H|HZ|BZ)load [c] ptr mem)
(MOV(D|W)loadidx (MOVDconst [c]) ptr mem) && is16Bit(c) && c%4 == 0 -> (MOV(D|W)load [c] ptr mem)
(MOV(WZ|H|HZ|BZ)loadidx (MOVDconst [c]) ptr mem) && is16Bit(c) -> (MOV(WZ|H|HZ|BZ)load [c] ptr mem)
// Store of zero -> storezero
(MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVDstorezero [off] {sym} ptr mem)
@ -897,7 +898,7 @@
(MOVBstore [off] {sym} ptr (MOVDconst [0]) mem) -> (MOVBstorezero [off] {sym} ptr mem)
// Fold offsets for storezero
(MOVDstorezero [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) ->
(MOVDstorezero [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) && (off1+off2)%4 == 0 ->
(MOVDstorezero [off1+off2] {sym} x mem)
(MOVWstorezero [off1] {sym} (ADDconst [off2] x) mem) && is16Bit(off1+off2) ->
(MOVWstorezero [off1+off2] {sym} x mem)
@ -910,12 +911,14 @@
(MOV(D|W|H|B)store [off] {sym} p:(ADD ptr idx) val mem) && off == 0 && sym == nil && p.Uses == 1 -> (MOV(D|W|H|B)storeidx ptr idx val mem)
// Stores with constant index values can be done without indexed instructions
(MOV(D|W|H|B)storeidx ptr (MOVDconst [c]) val mem) && is16Bit(c) -> (MOV(D|W|H|B)store [c] ptr val mem)
(MOV(D|W|H|B)storeidx (MOVDconst [c]) ptr val mem) && is16Bit(c) -> (MOV(D|W|H|B)store [c] ptr val mem)
(MOVDstoreidx ptr (MOVDconst [c]) val mem) && is16Bit(c) && c%4 == 0 -> (MOVDstore [c] ptr val mem)
(MOV(W|H|B)storeidx ptr (MOVDconst [c]) val mem) && is16Bit(c) -> (MOV(W|H|B)store [c] ptr val mem)
(MOVDstoreidx (MOVDconst [c]) ptr val mem) && is16Bit(c) && c%4 == 0 -> (MOVDstore [c] ptr val mem)
(MOV(W|H|B)storeidx (MOVDconst [c]) ptr val mem) && is16Bit(c) -> (MOV(W|H|B)store [c] ptr val mem)
// Fold symbols into storezero
(MOVDstorezero [off1] {sym1} p:(MOVDaddr [off2] {sym2} x) mem) && canMergeSym(sym1,sym2)
&& (x.Op != OpSB || p.Uses == 1) ->
&& (x.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0 ->
(MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} x mem)
(MOVWstorezero [off1] {sym1} p:(MOVDaddr [off2] {sym2} x) mem) && canMergeSym(sym1,sym2)
&& (x.Op != OpSB || p.Uses == 1) ->

View file

@ -9480,7 +9480,7 @@ func rewriteValuePPC64_OpPPC64MOVDload_0(v *Value) bool {
return true
}
// match: (MOVDload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem)
// cond: canMergeSym(sym1,sym2) && (ptr.Op != OpSB || p.Uses == 1)
// cond: canMergeSym(sym1,sym2) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0
// result: (MOVDload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
for {
off1 := v.AuxInt
@ -9493,7 +9493,7 @@ func rewriteValuePPC64_OpPPC64MOVDload_0(v *Value) bool {
off2 := p.AuxInt
sym2 := p.Aux
ptr := p.Args[0]
if !(canMergeSym(sym1, sym2) && (ptr.Op != OpSB || p.Uses == 1)) {
if !(canMergeSym(sym1, sym2) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0) {
break
}
v.reset(OpPPC64MOVDload)
@ -9504,7 +9504,7 @@ func rewriteValuePPC64_OpPPC64MOVDload_0(v *Value) bool {
return true
}
// match: (MOVDload [off1] {sym} (ADDconst [off2] x) mem)
// cond: is16Bit(off1+off2)
// cond: is16Bit(off1+off2) && (off1+off2)%4 == 0
// result: (MOVDload [off1+off2] {sym} x mem)
for {
off1 := v.AuxInt
@ -9516,7 +9516,7 @@ func rewriteValuePPC64_OpPPC64MOVDload_0(v *Value) bool {
}
off2 := v_0.AuxInt
x := v_0.Args[0]
if !(is16Bit(off1 + off2)) {
if !(is16Bit(off1+off2) && (off1+off2)%4 == 0) {
break
}
v.reset(OpPPC64MOVDload)
@ -9554,7 +9554,7 @@ func rewriteValuePPC64_OpPPC64MOVDload_0(v *Value) bool {
}
func rewriteValuePPC64_OpPPC64MOVDloadidx_0(v *Value) bool {
// match: (MOVDloadidx ptr (MOVDconst [c]) mem)
// cond: is16Bit(c)
// cond: is16Bit(c) && c%4 == 0
// result: (MOVDload [c] ptr mem)
for {
mem := v.Args[2]
@ -9564,7 +9564,7 @@ func rewriteValuePPC64_OpPPC64MOVDloadidx_0(v *Value) bool {
break
}
c := v_1.AuxInt
if !(is16Bit(c)) {
if !(is16Bit(c) && c%4 == 0) {
break
}
v.reset(OpPPC64MOVDload)
@ -9574,7 +9574,7 @@ func rewriteValuePPC64_OpPPC64MOVDloadidx_0(v *Value) bool {
return true
}
// match: (MOVDloadidx (MOVDconst [c]) ptr mem)
// cond: is16Bit(c)
// cond: is16Bit(c) && c%4 == 0
// result: (MOVDload [c] ptr mem)
for {
mem := v.Args[2]
@ -9584,7 +9584,7 @@ func rewriteValuePPC64_OpPPC64MOVDloadidx_0(v *Value) bool {
}
c := v_0.AuxInt
ptr := v.Args[1]
if !(is16Bit(c)) {
if !(is16Bit(c) && c%4 == 0) {
break
}
v.reset(OpPPC64MOVDload)
@ -9617,7 +9617,7 @@ func rewriteValuePPC64_OpPPC64MOVDstore_0(v *Value) bool {
return true
}
// match: (MOVDstore [off1] {sym} (ADDconst [off2] x) val mem)
// cond: is16Bit(off1+off2)
// cond: is16Bit(off1+off2) && (off1+off2)%4 == 0
// result: (MOVDstore [off1+off2] {sym} x val mem)
for {
off1 := v.AuxInt
@ -9630,7 +9630,7 @@ func rewriteValuePPC64_OpPPC64MOVDstore_0(v *Value) bool {
off2 := v_0.AuxInt
x := v_0.Args[0]
val := v.Args[1]
if !(is16Bit(off1 + off2)) {
if !(is16Bit(off1+off2) && (off1+off2)%4 == 0) {
break
}
v.reset(OpPPC64MOVDstore)
@ -9642,7 +9642,7 @@ func rewriteValuePPC64_OpPPC64MOVDstore_0(v *Value) bool {
return true
}
// match: (MOVDstore [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) val mem)
// cond: canMergeSym(sym1,sym2) && (ptr.Op != OpSB || p.Uses == 1)
// cond: canMergeSym(sym1,sym2) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0
// result: (MOVDstore [off1+off2] {mergeSym(sym1,sym2)} ptr val mem)
for {
off1 := v.AuxInt
@ -9656,7 +9656,7 @@ func rewriteValuePPC64_OpPPC64MOVDstore_0(v *Value) bool {
sym2 := p.Aux
ptr := p.Args[0]
val := v.Args[1]
if !(canMergeSym(sym1, sym2) && (ptr.Op != OpSB || p.Uses == 1)) {
if !(canMergeSym(sym1, sym2) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0) {
break
}
v.reset(OpPPC64MOVDstore)
@ -9713,7 +9713,7 @@ func rewriteValuePPC64_OpPPC64MOVDstore_0(v *Value) bool {
}
func rewriteValuePPC64_OpPPC64MOVDstoreidx_0(v *Value) bool {
// match: (MOVDstoreidx ptr (MOVDconst [c]) val mem)
// cond: is16Bit(c)
// cond: is16Bit(c) && c%4 == 0
// result: (MOVDstore [c] ptr val mem)
for {
mem := v.Args[3]
@ -9724,7 +9724,7 @@ func rewriteValuePPC64_OpPPC64MOVDstoreidx_0(v *Value) bool {
}
c := v_1.AuxInt
val := v.Args[2]
if !(is16Bit(c)) {
if !(is16Bit(c) && c%4 == 0) {
break
}
v.reset(OpPPC64MOVDstore)
@ -9735,7 +9735,7 @@ func rewriteValuePPC64_OpPPC64MOVDstoreidx_0(v *Value) bool {
return true
}
// match: (MOVDstoreidx (MOVDconst [c]) ptr val mem)
// cond: is16Bit(c)
// cond: is16Bit(c) && c%4 == 0
// result: (MOVDstore [c] ptr val mem)
for {
mem := v.Args[3]
@ -9746,7 +9746,7 @@ func rewriteValuePPC64_OpPPC64MOVDstoreidx_0(v *Value) bool {
c := v_0.AuxInt
ptr := v.Args[1]
val := v.Args[2]
if !(is16Bit(c)) {
if !(is16Bit(c) && c%4 == 0) {
break
}
v.reset(OpPPC64MOVDstore)
@ -9760,7 +9760,7 @@ func rewriteValuePPC64_OpPPC64MOVDstoreidx_0(v *Value) bool {
}
func rewriteValuePPC64_OpPPC64MOVDstorezero_0(v *Value) bool {
// match: (MOVDstorezero [off1] {sym} (ADDconst [off2] x) mem)
// cond: is16Bit(off1+off2)
// cond: is16Bit(off1+off2) && (off1+off2)%4 == 0
// result: (MOVDstorezero [off1+off2] {sym} x mem)
for {
off1 := v.AuxInt
@ -9772,7 +9772,7 @@ func rewriteValuePPC64_OpPPC64MOVDstorezero_0(v *Value) bool {
}
off2 := v_0.AuxInt
x := v_0.Args[0]
if !(is16Bit(off1 + off2)) {
if !(is16Bit(off1+off2) && (off1+off2)%4 == 0) {
break
}
v.reset(OpPPC64MOVDstorezero)
@ -9783,7 +9783,7 @@ func rewriteValuePPC64_OpPPC64MOVDstorezero_0(v *Value) bool {
return true
}
// match: (MOVDstorezero [off1] {sym1} p:(MOVDaddr [off2] {sym2} x) mem)
// cond: canMergeSym(sym1,sym2) && (x.Op != OpSB || p.Uses == 1)
// cond: canMergeSym(sym1,sym2) && (x.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0
// result: (MOVDstorezero [off1+off2] {mergeSym(sym1,sym2)} x mem)
for {
off1 := v.AuxInt
@ -9796,7 +9796,7 @@ func rewriteValuePPC64_OpPPC64MOVDstorezero_0(v *Value) bool {
off2 := p.AuxInt
sym2 := p.Aux
x := p.Args[0]
if !(canMergeSym(sym1, sym2) && (x.Op != OpSB || p.Uses == 1)) {
if !(canMergeSym(sym1, sym2) && (x.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0) {
break
}
v.reset(OpPPC64MOVDstorezero)
@ -11548,7 +11548,7 @@ func rewriteValuePPC64_OpPPC64MOVWZreg_20(v *Value) bool {
}
func rewriteValuePPC64_OpPPC64MOVWload_0(v *Value) bool {
// match: (MOVWload [off1] {sym1} p:(MOVDaddr [off2] {sym2} ptr) mem)
// cond: canMergeSym(sym1,sym2) && (ptr.Op != OpSB || p.Uses == 1)
// cond: canMergeSym(sym1,sym2) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0
// result: (MOVWload [off1+off2] {mergeSym(sym1,sym2)} ptr mem)
for {
off1 := v.AuxInt
@ -11561,7 +11561,7 @@ func rewriteValuePPC64_OpPPC64MOVWload_0(v *Value) bool {
off2 := p.AuxInt
sym2 := p.Aux
ptr := p.Args[0]
if !(canMergeSym(sym1, sym2) && (ptr.Op != OpSB || p.Uses == 1)) {
if !(canMergeSym(sym1, sym2) && (ptr.Op != OpSB || p.Uses == 1) && (off1+off2)%4 == 0) {
break
}
v.reset(OpPPC64MOVWload)
@ -11572,7 +11572,7 @@ func rewriteValuePPC64_OpPPC64MOVWload_0(v *Value) bool {
return true
}
// match: (MOVWload [off1] {sym} (ADDconst [off2] x) mem)
// cond: is16Bit(off1+off2)
// cond: is16Bit(off1+off2) && (off1+off2)%4 == 0
// result: (MOVWload [off1+off2] {sym} x mem)
for {
off1 := v.AuxInt
@ -11584,7 +11584,7 @@ func rewriteValuePPC64_OpPPC64MOVWload_0(v *Value) bool {
}
off2 := v_0.AuxInt
x := v_0.Args[0]
if !(is16Bit(off1 + off2)) {
if !(is16Bit(off1+off2) && (off1+off2)%4 == 0) {
break
}
v.reset(OpPPC64MOVWload)
@ -11622,7 +11622,7 @@ func rewriteValuePPC64_OpPPC64MOVWload_0(v *Value) bool {
}
func rewriteValuePPC64_OpPPC64MOVWloadidx_0(v *Value) bool {
// match: (MOVWloadidx ptr (MOVDconst [c]) mem)
// cond: is16Bit(c)
// cond: is16Bit(c) && c%4 == 0
// result: (MOVWload [c] ptr mem)
for {
mem := v.Args[2]
@ -11632,7 +11632,7 @@ func rewriteValuePPC64_OpPPC64MOVWloadidx_0(v *Value) bool {
break
}
c := v_1.AuxInt
if !(is16Bit(c)) {
if !(is16Bit(c) && c%4 == 0) {
break
}
v.reset(OpPPC64MOVWload)
@ -11642,7 +11642,7 @@ func rewriteValuePPC64_OpPPC64MOVWloadidx_0(v *Value) bool {
return true
}
// match: (MOVWloadidx (MOVDconst [c]) ptr mem)
// cond: is16Bit(c)
// cond: is16Bit(c) && c%4 == 0
// result: (MOVWload [c] ptr mem)
for {
mem := v.Args[2]
@ -11652,7 +11652,7 @@ func rewriteValuePPC64_OpPPC64MOVWloadidx_0(v *Value) bool {
}
c := v_0.AuxInt
ptr := v.Args[1]
if !(is16Bit(c)) {
if !(is16Bit(c) && c%4 == 0) {
break
}
v.reset(OpPPC64MOVWload)

View file

@ -0,0 +1,26 @@
// compile -d=ssa/check/on
// Copyright 2020 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Issue 36723: fail to compile on PPC64 when SSA check is on.
package p
import "unsafe"
type T struct {
a, b, c, d uint8
x [10]int32
}
func F(p *T, i uintptr) int32 {
// load p.x[i] using unsafe, derived from runtime.pcdatastart
_ = *p
return *(*int32)(add(unsafe.Pointer(&p.d), unsafe.Sizeof(p.d)+i*unsafe.Sizeof(p.x[0])))
}
func add(p unsafe.Pointer, x uintptr) unsafe.Pointer {
return unsafe.Pointer(uintptr(p) + x)
}