From 09932f95f5619f44fa874c780dbbddc073be990a Mon Sep 17 00:00:00 2001 From: Wayne Zuo Date: Fri, 10 Jun 2022 19:06:30 +0800 Subject: [PATCH] cmd/compile: combine more constant stores on amd64 Fixes #53324 Change-Id: I06149d860f858b082235e9d80bf0ea494679b386 Reviewed-on: https://go-review.googlesource.com/c/go/+/411614 Reviewed-by: Keith Randall Run-TryBot: Wayne Zuo Reviewed-by: Keith Randall TryBot-Result: Gopher Robot Reviewed-by: Ian Lance Taylor --- src/cmd/compile/internal/ssa/gen/AMD64.rules | 48 ++++----- src/cmd/compile/internal/ssa/rewriteAMD64.go | 104 ++++++++++--------- test/codegen/memcombine.go | 15 +++ 3 files changed, 95 insertions(+), 72 deletions(-) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index d96a37f7ce..6a96b2d61e 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -1929,52 +1929,52 @@ => (MOVQstore [i] {s} p0 (BSWAPQ w) mem) // Combine constant stores into larger (unaligned) stores. -(MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) +(MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem)) && x.Uses == 1 - && a.Off() + 1 == c.Off() + && sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off())) && clobber(x) - => (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem) -(MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) + => (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem) +(MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem)) && x.Uses == 1 - && a.Off() + 1 == c.Off() + && sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off())) && clobber(x) - => (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem) -(MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) + => (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem) +(MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem)) && x.Uses == 1 - && a.Off() + 2 == c.Off() + && sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off())) && clobber(x) - => (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem) -(MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) + => (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem) +(MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem)) && x.Uses == 1 - && a.Off() + 2 == c.Off() + && sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off())) && clobber(x) - => (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem) -(MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) + => (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem) +(MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem)) && x.Uses == 1 - && a.Off() + 4 == c.Off() + && sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off())) && clobber(x) - => (MOVQstore [a.Off()] {s} p (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem) -(MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem)) + => (MOVQstore [a.Off()] {s} p0 (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem) +(MOVLstoreconst [a] {s} p0 x:(MOVLstoreconst [c] {s} p1 mem)) && x.Uses == 1 - && a.Off() + 4 == c.Off() + && sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off())) && clobber(x) - => (MOVQstore [a.Off()] {s} p (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem) -(MOVQstoreconst [c] {s} p x:(MOVQstoreconst [a] {s} p mem)) + => (MOVQstore [a.Off()] {s} p0 (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem) +(MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [a] {s} p0 mem)) && config.useSSE && x.Uses == 1 - && a.Off() + 8 == c.Off() + && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x) - => (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p mem) -(MOVQstoreconst [a] {s} p x:(MOVQstoreconst [c] {s} p mem)) + => (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) +(MOVQstoreconst [a] {s} p0 x:(MOVQstoreconst [c] {s} p1 mem)) && config.useSSE && x.Uses == 1 - && a.Off() + 8 == c.Off() + && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x) - => (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p mem) + => (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) // Combine stores into larger (unaligned) stores. Little endian. (MOVBstore [i] {s} p (SHR(W|L|Q)const [8] w) x:(MOVBstore [i-1] {s} p w mem)) diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 434496bd2f..9d8ce8708b 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -11402,13 +11402,13 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVBstoreconst [c] {s} p x:(MOVBstoreconst [a] {s} p mem)) - // cond: x.Uses == 1 && a.Off() + 1 == c.Off() && clobber(x) - // result: (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem) + // match: (MOVBstoreconst [c] {s} p1 x:(MOVBstoreconst [a] {s} p0 mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off())) && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem) for { c := auxIntToValAndOff(v.AuxInt) s := auxToSym(v.Aux) - p := v_0 + p1 := v_0 x := v_1 if x.Op != OpAMD64MOVBstoreconst { break @@ -11418,22 +11418,23 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { break } mem := x.Args[1] - if p != x.Args[0] || !(x.Uses == 1 && a.Off()+1 == c.Off() && clobber(x)) { + p0 := x.Args[0] + if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off())) && clobber(x)) { break } v.reset(OpAMD64MOVWstoreconst) v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xff|c.Val()<<8, a.Off())) v.Aux = symToAux(s) - v.AddArg2(p, mem) + v.AddArg2(p0, mem) return true } - // match: (MOVBstoreconst [a] {s} p x:(MOVBstoreconst [c] {s} p mem)) - // cond: x.Uses == 1 && a.Off() + 1 == c.Off() && clobber(x) - // result: (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p mem) + // match: (MOVBstoreconst [a] {s} p0 x:(MOVBstoreconst [c] {s} p1 mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off())) && clobber(x) + // result: (MOVWstoreconst [makeValAndOff(a.Val()&0xff | c.Val()<<8, a.Off())] {s} p0 mem) for { a := auxIntToValAndOff(v.AuxInt) s := auxToSym(v.Aux) - p := v_0 + p0 := v_0 x := v_1 if x.Op != OpAMD64MOVBstoreconst { break @@ -11443,13 +11444,14 @@ func rewriteValueAMD64_OpAMD64MOVBstoreconst(v *Value) bool { break } mem := x.Args[1] - if p != x.Args[0] || !(x.Uses == 1 && a.Off()+1 == c.Off() && clobber(x)) { + p1 := x.Args[0] + if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+1-c.Off())) && clobber(x)) { break } v.reset(OpAMD64MOVWstoreconst) v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xff|c.Val()<<8, a.Off())) v.Aux = symToAux(s) - v.AddArg2(p, mem) + v.AddArg2(p0, mem) return true } return false @@ -12632,13 +12634,13 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVLstoreconst [c] {s} p x:(MOVLstoreconst [a] {s} p mem)) - // cond: x.Uses == 1 && a.Off() + 4 == c.Off() && clobber(x) - // result: (MOVQstore [a.Off()] {s} p (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem) + // match: (MOVLstoreconst [c] {s} p1 x:(MOVLstoreconst [a] {s} p0 mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off())) && clobber(x) + // result: (MOVQstore [a.Off()] {s} p0 (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem) for { c := auxIntToValAndOff(v.AuxInt) s := auxToSym(v.Aux) - p := v_0 + p1 := v_0 x := v_1 if x.Op != OpAMD64MOVLstoreconst { break @@ -12648,7 +12650,8 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { break } mem := x.Args[1] - if p != x.Args[0] || !(x.Uses == 1 && a.Off()+4 == c.Off() && clobber(x)) { + p0 := x.Args[0] + if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off())) && clobber(x)) { break } v.reset(OpAMD64MOVQstore) @@ -12656,16 +12659,16 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { v.Aux = symToAux(s) v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64) v0.AuxInt = int64ToAuxInt(a.Val64()&0xffffffff | c.Val64()<<32) - v.AddArg3(p, v0, mem) + v.AddArg3(p0, v0, mem) return true } - // match: (MOVLstoreconst [a] {s} p x:(MOVLstoreconst [c] {s} p mem)) - // cond: x.Uses == 1 && a.Off() + 4 == c.Off() && clobber(x) - // result: (MOVQstore [a.Off()] {s} p (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem) + // match: (MOVLstoreconst [a] {s} p0 x:(MOVLstoreconst [c] {s} p1 mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off())) && clobber(x) + // result: (MOVQstore [a.Off()] {s} p0 (MOVQconst [a.Val64()&0xffffffff | c.Val64()<<32]) mem) for { a := auxIntToValAndOff(v.AuxInt) s := auxToSym(v.Aux) - p := v_0 + p0 := v_0 x := v_1 if x.Op != OpAMD64MOVLstoreconst { break @@ -12675,7 +12678,8 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { break } mem := x.Args[1] - if p != x.Args[0] || !(x.Uses == 1 && a.Off()+4 == c.Off() && clobber(x)) { + p1 := x.Args[0] + if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+4-c.Off())) && clobber(x)) { break } v.reset(OpAMD64MOVQstore) @@ -12683,7 +12687,7 @@ func rewriteValueAMD64_OpAMD64MOVLstoreconst(v *Value) bool { v.Aux = symToAux(s) v0 := b.NewValue0(x.Pos, OpAMD64MOVQconst, typ.UInt64) v0.AuxInt = int64ToAuxInt(a.Val64()&0xffffffff | c.Val64()<<32) - v.AddArg3(p, v0, mem) + v.AddArg3(p0, v0, mem) return true } return false @@ -13593,13 +13597,13 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVQstoreconst [c] {s} p x:(MOVQstoreconst [a] {s} p mem)) - // cond: config.useSSE && x.Uses == 1 && a.Off() + 8 == c.Off() && a.Val() == 0 && c.Val() == 0 && clobber(x) - // result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p mem) + // match: (MOVQstoreconst [c] {s} p1 x:(MOVQstoreconst [a] {s} p0 mem)) + // cond: config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x) + // result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) for { c := auxIntToValAndOff(v.AuxInt) s := auxToSym(v.Aux) - p := v_0 + p1 := v_0 x := v_1 if x.Op != OpAMD64MOVQstoreconst { break @@ -13609,22 +13613,23 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { break } mem := x.Args[1] - if p != x.Args[0] || !(config.useSSE && x.Uses == 1 && a.Off()+8 == c.Off() && a.Val() == 0 && c.Val() == 0 && clobber(x)) { + p0 := x.Args[0] + if !(config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)) { break } v.reset(OpAMD64MOVOstoreconst) v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, a.Off())) v.Aux = symToAux(s) - v.AddArg2(p, mem) + v.AddArg2(p0, mem) return true } - // match: (MOVQstoreconst [a] {s} p x:(MOVQstoreconst [c] {s} p mem)) - // cond: config.useSSE && x.Uses == 1 && a.Off() + 8 == c.Off() && a.Val() == 0 && c.Val() == 0 && clobber(x) - // result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p mem) + // match: (MOVQstoreconst [a] {s} p0 x:(MOVQstoreconst [c] {s} p1 mem)) + // cond: config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x) + // result: (MOVOstoreconst [makeValAndOff(0,a.Off())] {s} p0 mem) for { a := auxIntToValAndOff(v.AuxInt) s := auxToSym(v.Aux) - p := v_0 + p0 := v_0 x := v_1 if x.Op != OpAMD64MOVQstoreconst { break @@ -13634,13 +13639,14 @@ func rewriteValueAMD64_OpAMD64MOVQstoreconst(v *Value) bool { break } mem := x.Args[1] - if p != x.Args[0] || !(config.useSSE && x.Uses == 1 && a.Off()+8 == c.Off() && a.Val() == 0 && c.Val() == 0 && clobber(x)) { + p1 := x.Args[0] + if !(config.useSSE && x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+8-c.Off())) && a.Val() == 0 && c.Val() == 0 && clobber(x)) { break } v.reset(OpAMD64MOVOstoreconst) v.AuxInt = valAndOffToAuxInt(makeValAndOff(0, a.Off())) v.Aux = symToAux(s) - v.AddArg2(p, mem) + v.AddArg2(p0, mem) return true } return false @@ -14724,13 +14730,13 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool { v.AddArg2(ptr, mem) return true } - // match: (MOVWstoreconst [c] {s} p x:(MOVWstoreconst [a] {s} p mem)) - // cond: x.Uses == 1 && a.Off() + 2 == c.Off() && clobber(x) - // result: (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem) + // match: (MOVWstoreconst [c] {s} p1 x:(MOVWstoreconst [a] {s} p0 mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off())) && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem) for { c := auxIntToValAndOff(v.AuxInt) s := auxToSym(v.Aux) - p := v_0 + p1 := v_0 x := v_1 if x.Op != OpAMD64MOVWstoreconst { break @@ -14740,22 +14746,23 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool { break } mem := x.Args[1] - if p != x.Args[0] || !(x.Uses == 1 && a.Off()+2 == c.Off() && clobber(x)) { + p0 := x.Args[0] + if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off())) && clobber(x)) { break } v.reset(OpAMD64MOVLstoreconst) v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xffff|c.Val()<<16, a.Off())) v.Aux = symToAux(s) - v.AddArg2(p, mem) + v.AddArg2(p0, mem) return true } - // match: (MOVWstoreconst [a] {s} p x:(MOVWstoreconst [c] {s} p mem)) - // cond: x.Uses == 1 && a.Off() + 2 == c.Off() && clobber(x) - // result: (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p mem) + // match: (MOVWstoreconst [a] {s} p0 x:(MOVWstoreconst [c] {s} p1 mem)) + // cond: x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off())) && clobber(x) + // result: (MOVLstoreconst [makeValAndOff(a.Val()&0xffff | c.Val()<<16, a.Off())] {s} p0 mem) for { a := auxIntToValAndOff(v.AuxInt) s := auxToSym(v.Aux) - p := v_0 + p0 := v_0 x := v_1 if x.Op != OpAMD64MOVWstoreconst { break @@ -14765,13 +14772,14 @@ func rewriteValueAMD64_OpAMD64MOVWstoreconst(v *Value) bool { break } mem := x.Args[1] - if p != x.Args[0] || !(x.Uses == 1 && a.Off()+2 == c.Off() && clobber(x)) { + p1 := x.Args[0] + if !(x.Uses == 1 && sequentialAddresses(p0, p1, int64(a.Off()+2-c.Off())) && clobber(x)) { break } v.reset(OpAMD64MOVLstoreconst) v.AuxInt = valAndOffToAuxInt(makeValAndOff(a.Val()&0xffff|c.Val()<<16, a.Off())) v.Aux = symToAux(s) - v.AddArg2(p, mem) + v.AddArg2(p0, mem) return true } return false diff --git a/test/codegen/memcombine.go b/test/codegen/memcombine.go index 6e0132744c..8143b6bed5 100644 --- a/test/codegen/memcombine.go +++ b/test/codegen/memcombine.go @@ -377,6 +377,11 @@ func store_le64_idx2(dst []byte, d, length, offset int) []byte { return dst } +func store_le64_idx_const(b []byte, idx int) { + // amd64:`MOVQ\s\$123, \(.*\)\(.*\*1\)$` + binary.LittleEndian.PutUint64(b[idx:], 123) +} + func store_le64_load(b []byte, x *[8]byte) { _ = b[8] // amd64:-`MOV[BWL]` @@ -402,6 +407,11 @@ func store_le32_idx(b []byte, x uint32, idx int) { binary.LittleEndian.PutUint32(b[idx:], x) } +func store_le32_idx_const(b []byte, idx int) { + // amd64:`MOVL\s\$123, \(.*\)\(.*\*1\)$` + binary.LittleEndian.PutUint32(b[idx:], 123) +} + func store_le16(b []byte, x uint16) { // amd64:`MOVW\s` // arm64:`MOVH`,-`MOVB` @@ -418,6 +428,11 @@ func store_le16_idx(b []byte, x uint16, idx int) { binary.LittleEndian.PutUint16(b[idx:], x) } +func store_le16_idx_const(b []byte, idx int) { + // amd64:`MOVW\s\$123, \(.*\)\(.*\*1\)$` + binary.LittleEndian.PutUint16(b[idx:], 123) +} + func store_be64(b []byte, x uint64) { // amd64/v1,amd64/v2:`BSWAPQ`,-`SHR.` // amd64/v3: `MOVBEQ`