go/test/codegen/memops_bigoffset.go
Lynn Boger 4481042c43 cmd/compile: update rules to generate more prefixed instructions
This modifies some existing rules to allow more prefixed instructions
to be generated when using GOPPC64=power10. Some rules also check
if PCRel is available, which is currently supported for linux/ppc64le
and linux/ppc64 (internal linking only).

Prior to p10, DS-offset loads and stores had a 16 bit size limit for
the offset field. If the offset of the data for load or store was
beyond this range then an indexed load or store would be selected by
the rules.

In p10 the assembler can generate prefixed instructions in this case,
but does not if an indexed instruction was selected during the lowering
pass.

This allows many more cases to use prefixed loads or stores, reducing
function sizes and improving performance in some cases where the code
change happens in key loops.

For example in strconv BenchmarkAppendQuoteRune before:

  12c5e4:       15 00 10 06     pla     r10,1425660
  12c5e8:       fc c0 40 39
  12c5ec:       00 00 6a e8     ld      r3,0(r10)
  12c5f0:       10 00 aa e8     ld      r5,16(r10)

After this change:

  12a828:       15 00 10 04     pld     r3,1433272
  12a82c:       b8 de 60 e4
  12a830:       15 00 10 04     pld     r5,1433280
  12a834:       c0 de a0 e4

Performs better in the second case.

A testcase was added to verify that the rules correctly select a load or
store based on the offset and whether power10 or earlier.

Change-Id: I4335fed0bd9b8aba8a4f84d69b89f819cc464846
Reviewed-on: https://go-review.googlesource.com/c/go/+/477398
Reviewed-by: Heschi Kreinick <heschi@google.com>
Reviewed-by: Archana Ravindar <aravind5@in.ibm.com>
Reviewed-by: Cherry Mui <cherryyz@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com>
Reviewed-by: Paul Murphy <murp@ibm.com>
2023-05-15 18:20:54 +00:00

72 lines
2.5 KiB
Go

// asmcheck
// Copyright 2023 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
type big1 struct {
w [1<<30 - 1]uint32
}
type big2 struct {
d [1<<29 - 1]uint64
}
func loadLargeOffset(sw *big1, sd *big2) (uint32, uint64) {
// ppc64x:`MOVWZ\s+[0-9]+\(R[0-9]+\)`,-`ADD`
a3 := sw.w[1<<10]
// ppc64le/power10:`MOVWZ\s+[0-9]+\(R[0-9]+\),\sR[0-9]+`,-`ADD`
// ppc64x/power9:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
// ppc64x/power8:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
b3 := sw.w[1<<16]
// ppc64le/power10:`MOVWZ\s+[0-9]+\(R[0-9]+\),\sR[0-9]+`,-`ADD`
// ppc64x/power9:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
// ppc64x/power8:`ADD`,`MOVWZ\s+\(R[0-9]+\),\sR[0-9]+`
c3 := sw.w[1<<28]
// ppc64x:`MOVWZ\s+\(R[0-9]+\)\(R[0-9]+\),\sR[0-9]+`
d3 := sw.w[1<<29]
// ppc64x:`MOVD\s+[0-9]+\(R[0-9]+\)`,-`ADD`
a4 := sd.d[1<<10]
// ppc64le/power10:`MOVD\s+[0-9]+\(R[0-9]+\)`,-`ADD`
// ppc64x/power9:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
// ppc64x/power8:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
b4 := sd.d[1<<16]
// ppc64le/power10`:`MOVD\s+[0-9]+\(R[0-9]+\)`,-`ADD`
// ppc64x/power9:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
// ppc64x/power8:`ADD`,`MOVD\s+\(R[0-9]+\),\sR[0-9]+`
c4 := sd.d[1<<27]
// ppc64x:`MOVD\s+\(R[0-9]+\)\(R[0-9]+\),\sR[0-9]+`
d4 := sd.d[1<<28]
return a3 + b3 + c3 + d3, a4 + b4 + c4 + d4
}
func storeLargeOffset(sw *big1, sd *big2) {
// ppc64x:`MOVW\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
sw.w[1<<10] = uint32(10)
// ppc64le/power10:`MOVW\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
// ppc64x/power9:`MOVW\s+R[0-9]+\,\s\(R[0-9]+\)`,`ADD`
// ppc64x/power8:`MOVW\s+R[0-9]+\,\s\(R[0-9]+\)`,`ADD`
sw.w[1<<16] = uint32(20)
// ppc64le/power10:`MOVW\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
// ppc64x/power9:`MOVW\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
// ppc64x/power8:`MOVW\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
sw.w[1<<28] = uint32(30)
// ppc64x:`MOVW\s+R[0-9]+,\s\(R[0-9]+\)`
sw.w[1<<29] = uint32(40)
// ppc64x:`MOVD\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
sd.d[1<<10] = uint64(40)
// ppc64le/power10:`MOVD\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
// ppc64x/power9:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
// ppc64x/power8:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
sd.d[1<<16] = uint64(50)
// ppc64le/power10`:`MOVD\s+R[0-9]+,\s[0-9]+\(R[0-9]+\)`,-`ADD`
// ppc64x/power9:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
// ppc64x/power8:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`,`ADD`
sd.d[1<<27] = uint64(60)
// ppc64x:`MOVD\s+R[0-9]+,\s\(R[0-9]+\)`
sd.d[1<<28] = uint64(70)
}