go/test/codegen/compare_and_branch.go
Jorropo bac4e2f241 cmd/compile: try to rewrite loops to count down
Fixes #61629

This reduce the pressure on regalloc because then the loop only keep alive
one value (the iterator) instead of the iterator and the upper bound since
the comparison now acts against an immediate, often zero which can be skipped.

This optimize things like:
  for i := 0; i < n; i++ {
Or a range over a slice where the index is not used:
  for _, v := range someSlice {
Or the new range over int from #61405:
  for range n {

It is hit in 975 unique places while doing ./make.bash.

Change-Id: I5facff8b267a0b60ea3c1b9a58c4d74cdb38f03f
Reviewed-on: https://go-review.googlesource.com/c/go/+/512935
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Jorropo <jorropo.pgm@gmail.com>
Reviewed-by: Keith Randall <khr@google.com>
Reviewed-by: David Chase <drchase@google.com>
Reviewed-by: Keith Randall <khr@golang.org>
Auto-Submit: Keith Randall <khr@golang.org>
2023-07-31 18:33:29 +00:00

245 lines
4.5 KiB
Go

// asmcheck
// Copyright 2019 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package codegen
//go:noinline
func dummy() {}
// Signed 64-bit compare-and-branch.
func si64(x, y chan int64) {
// s390x:"CGRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
for <-x < <-y {
dummy()
}
// s390x:"CL?GRJ\t[$]8, R[0-9]+, R[0-9]+, "
for <-x == <-y {
dummy()
}
}
// Signed 64-bit compare-and-branch with 8-bit immediate.
func si64x8(doNotOptimize int64) {
// take in doNotOptimize as an argument to avoid the loops being rewritten to count down
// s390x:"CGIJ\t[$]12, R[0-9]+, [$]127, "
for i := doNotOptimize; i < 128; i++ {
dummy()
}
// s390x:"CGIJ\t[$]10, R[0-9]+, [$]-128, "
for i := doNotOptimize; i > -129; i-- {
dummy()
}
// s390x:"CGIJ\t[$]2, R[0-9]+, [$]127, "
for i := doNotOptimize; i >= 128; i++ {
dummy()
}
// s390x:"CGIJ\t[$]4, R[0-9]+, [$]-128, "
for i := doNotOptimize; i <= -129; i-- {
dummy()
}
}
// Unsigned 64-bit compare-and-branch.
func ui64(x, y chan uint64) {
// s390x:"CLGRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
for <-x > <-y {
dummy()
}
// s390x:"CL?GRJ\t[$]6, R[0-9]+, R[0-9]+, "
for <-x != <-y {
dummy()
}
}
// Unsigned 64-bit comparison with 8-bit immediate.
func ui64x8() {
// s390x:"CLGIJ\t[$]4, R[0-9]+, [$]128, "
for i := uint64(0); i < 128; i++ {
dummy()
}
// s390x:"CLGIJ\t[$]12, R[0-9]+, [$]255, "
for i := uint64(0); i < 256; i++ {
dummy()
}
// s390x:"CLGIJ\t[$]2, R[0-9]+, [$]255, "
for i := uint64(0); i >= 256; i-- {
dummy()
}
// s390x:"CLGIJ\t[$]2, R[0-9]+, [$]0, "
for i := uint64(1024); i > 0; i-- {
dummy()
}
}
// Signed 32-bit compare-and-branch.
func si32(x, y chan int32) {
// s390x:"CRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
for <-x < <-y {
dummy()
}
// s390x:"CL?RJ\t[$]8, R[0-9]+, R[0-9]+, "
for <-x == <-y {
dummy()
}
}
// Signed 32-bit compare-and-branch with 8-bit immediate.
func si32x8(doNotOptimize int32) {
// take in doNotOptimize as an argument to avoid the loops being rewritten to count down
// s390x:"CIJ\t[$]12, R[0-9]+, [$]127, "
for i := doNotOptimize; i < 128; i++ {
dummy()
}
// s390x:"CIJ\t[$]10, R[0-9]+, [$]-128, "
for i := doNotOptimize; i > -129; i-- {
dummy()
}
// s390x:"CIJ\t[$]2, R[0-9]+, [$]127, "
for i := doNotOptimize; i >= 128; i++ {
dummy()
}
// s390x:"CIJ\t[$]4, R[0-9]+, [$]-128, "
for i := doNotOptimize; i <= -129; i-- {
dummy()
}
}
// Unsigned 32-bit compare-and-branch.
func ui32(x, y chan uint32) {
// s390x:"CLRJ\t[$](2|4), R[0-9]+, R[0-9]+, "
for <-x > <-y {
dummy()
}
// s390x:"CL?RJ\t[$]6, R[0-9]+, R[0-9]+, "
for <-x != <-y {
dummy()
}
}
// Unsigned 32-bit comparison with 8-bit immediate.
func ui32x8() {
// s390x:"CLIJ\t[$]4, R[0-9]+, [$]128, "
for i := uint32(0); i < 128; i++ {
dummy()
}
// s390x:"CLIJ\t[$]12, R[0-9]+, [$]255, "
for i := uint32(0); i < 256; i++ {
dummy()
}
// s390x:"CLIJ\t[$]2, R[0-9]+, [$]255, "
for i := uint32(0); i >= 256; i-- {
dummy()
}
// s390x:"CLIJ\t[$]2, R[0-9]+, [$]0, "
for i := uint32(1024); i > 0; i-- {
dummy()
}
}
// Signed 64-bit comparison with unsigned 8-bit immediate.
func si64xu8(x chan int64) {
// s390x:"CLGIJ\t[$]8, R[0-9]+, [$]128, "
for <-x == 128 {
dummy()
}
// s390x:"CLGIJ\t[$]6, R[0-9]+, [$]255, "
for <-x != 255 {
dummy()
}
}
// Signed 32-bit comparison with unsigned 8-bit immediate.
func si32xu8(x chan int32) {
// s390x:"CLIJ\t[$]8, R[0-9]+, [$]255, "
for <-x == 255 {
dummy()
}
// s390x:"CLIJ\t[$]6, R[0-9]+, [$]128, "
for <-x != 128 {
dummy()
}
}
// Unsigned 64-bit comparison with signed 8-bit immediate.
func ui64xu8(x chan uint64) {
// s390x:"CGIJ\t[$]8, R[0-9]+, [$]-1, "
for <-x == ^uint64(0) {
dummy()
}
// s390x:"CGIJ\t[$]6, R[0-9]+, [$]-128, "
for <-x != ^uint64(127) {
dummy()
}
}
// Unsigned 32-bit comparison with signed 8-bit immediate.
func ui32xu8(x chan uint32) {
// s390x:"CIJ\t[$]8, R[0-9]+, [$]-128, "
for <-x == ^uint32(127) {
dummy()
}
// s390x:"CIJ\t[$]6, R[0-9]+, [$]-1, "
for <-x != ^uint32(0) {
dummy()
}
}
// Signed 64-bit comparison with 1/-1 to comparison with 0.
func si64x0(x chan int64) {
// riscv64:"BGTZ"
for <-x >= 1 {
dummy()
}
// riscv64:"BLEZ"
for <-x < 1 {
dummy()
}
// riscv64:"BLTZ"
for <-x <= -1 {
dummy()
}
// riscv64:"BGEZ"
for <-x > -1 {
dummy()
}
}
// Unsigned 64-bit comparison with 1 to comparison with 0.
func ui64x0(x chan uint64) {
// riscv64:"BNEZ"
for <-x >= 1 {
dummy()
}
// riscv64:"BEQZ"
for <-x < 1 {
dummy()
}
}