go/test/armimm.go
Ben Shi 6897030fe3 cmd/internal/obj: continue to optimize ARM's constant pool
Both Keith's https://go-review.googlesource.com/c/41612/ and
and Ben's https://go-review.googlesource.com/c/41679/ optimized ARM's
constant pool. But neither was complete.

First, BIC was forgotten.
1. "BIC $0xff00ff00, Reg" can be optimized to
   "BIC $0xff000000, Reg
    BIC $0x0000ff00, Reg"
2. "BIC $0xffff00ff, Reg" can be optimized to
   "AND $0x0000ff00, Reg"
3. "AND $0xffff00ff, Reg" can be optimized to
   "BIC $0x0000ff00, Reg"

Second, break a non-ARMImmRot to the subtraction of two ARMImmRots was
left as TODO.
1. "ADD $0x00fffff0, Reg" can be optimized to
   "ADD $0x01000000, Reg
    SUB $0x00000010, Reg"
2. "SUB $0x00fffff0, Reg" can be optimized to
   "SUB $0x01000000, Reg
    ADD $0x00000010, Reg"

This patch fixes them and issue #19844.

The go1 benchmark shows improvements.

name                     old time/op    new time/op    delta
BinaryTree17-4              41.4s ± 1%     41.7s ± 1%  +0.54%  (p=0.000 n=50+49)
Fannkuch11-4                24.7s ± 1%     25.1s ± 0%  +1.70%  (p=0.000 n=50+49)
FmtFprintfEmpty-4           853ns ± 1%     852ns ± 1%    ~     (p=0.833 n=50+50)
FmtFprintfString-4         1.33µs ± 1%    1.33µs ± 1%    ~     (p=0.163 n=50+50)
FmtFprintfInt-4            1.40µs ± 1%    1.40µs ± 0%    ~     (p=0.293 n=50+35)
FmtFprintfIntInt-4         2.09µs ± 1%    2.08µs ± 1%  -0.39%  (p=0.000 n=50+49)
FmtFprintfPrefixedInt-4    2.43µs ± 1%    2.43µs ± 1%    ~     (p=0.552 n=50+50)
FmtFprintfFloat-4          4.57µs ± 1%    4.42µs ± 1%  -3.18%  (p=0.000 n=50+50)
FmtManyArgs-4              8.62µs ± 1%    8.52µs ± 0%  -1.08%  (p=0.000 n=50+50)
GobDecode-4                 101ms ± 1%     101ms ± 2%  +0.45%  (p=0.001 n=49+49)
GobEncode-4                90.7ms ± 1%    91.1ms ± 2%  +0.51%  (p=0.001 n=50+50)
Gzip-4                      4.23s ± 1%     4.21s ± 1%  -0.62%  (p=0.000 n=50+50)
Gunzip-4                    623ms ± 1%     619ms ± 0%  -0.63%  (p=0.000 n=50+42)
HTTPClientServer-4          721µs ± 5%     683µs ± 3%  -5.25%  (p=0.000 n=50+47)
JSONEncode-4                251ms ± 1%     253ms ± 1%  +0.54%  (p=0.000 n=49+50)
JSONDecode-4                941ms ± 1%     944ms ± 1%  +0.30%  (p=0.001 n=49+50)
Mandelbrot200-4            49.3ms ± 1%    49.3ms ± 0%    ~     (p=0.918 n=50+48)
GoParse-4                  47.1ms ± 1%    47.2ms ± 1%  +0.18%  (p=0.025 n=50+50)
RegexpMatchEasy0_32-4      1.23µs ± 1%    1.24µs ± 1%  +0.30%  (p=0.000 n=49+50)
RegexpMatchEasy0_1K-4      7.74µs ± 7%    7.76µs ± 5%    ~     (p=0.888 n=50+50)
RegexpMatchEasy1_32-4      1.32µs ± 1%    1.32µs ± 1%  +0.23%  (p=0.003 n=50+50)
RegexpMatchEasy1_1K-4      10.6µs ± 2%    10.5µs ± 3%  -1.29%  (p=0.000 n=49+50)
RegexpMatchMedium_32-4     2.19µs ± 1%    2.10µs ± 1%  -3.79%  (p=0.000 n=49+49)
RegexpMatchMedium_1K-4      544µs ± 0%     545µs ± 0%    ~     (p=0.123 n=41+50)
RegexpMatchHard_32-4       28.8µs ± 0%    28.8µs ± 1%    ~     (p=0.580 n=46+50)
RegexpMatchHard_1K-4        863µs ± 1%     865µs ± 1%  +0.31%  (p=0.027 n=47+50)
Revcomp-4                  82.2ms ± 2%    82.3ms ± 2%    ~     (p=0.894 n=48+49)
Template-4                  1.06s ± 1%     1.04s ± 1%  -1.18%  (p=0.000 n=50+49)
TimeParse-4                7.25µs ± 1%    7.35µs ± 0%  +1.48%  (p=0.000 n=50+50)
TimeFormat-4               13.3µs ± 1%    13.2µs ± 1%  -0.13%  (p=0.007 n=50+50)
[Geo mean]                  736µs          733µs       -0.37%

name                     old speed      new speed      delta
GobDecode-4              7.60MB/s ± 1%  7.56MB/s ± 2%  -0.46%  (p=0.001 n=49+49)
GobEncode-4              8.47MB/s ± 1%  8.42MB/s ± 2%  -0.50%  (p=0.001 n=50+50)
Gzip-4                   4.58MB/s ± 1%  4.61MB/s ± 1%  +0.59%  (p=0.000 n=50+50)
Gunzip-4                 31.2MB/s ± 1%  31.4MB/s ± 0%  +0.63%  (p=0.000 n=50+42)
JSONEncode-4             7.73MB/s ± 1%  7.69MB/s ± 1%  -0.53%  (p=0.000 n=49+50)
JSONDecode-4             2.06MB/s ± 1%  2.06MB/s ± 1%    ~     (p=0.052 n=44+50)
GoParse-4                1.23MB/s ± 0%  1.23MB/s ± 2%    ~     (p=0.526 n=26+50)
RegexpMatchEasy0_32-4    25.9MB/s ± 1%  25.9MB/s ± 1%  -0.30%  (p=0.000 n=49+50)
RegexpMatchEasy0_1K-4     132MB/s ± 7%   132MB/s ± 6%    ~     (p=0.885 n=50+50)
RegexpMatchEasy1_32-4    24.2MB/s ± 1%  24.1MB/s ± 1%  -0.22%  (p=0.003 n=50+50)
RegexpMatchEasy1_1K-4    96.4MB/s ± 2%  97.8MB/s ± 3%  +1.36%  (p=0.000 n=50+50)
RegexpMatchMedium_32-4    460kB/s ± 0%   476kB/s ± 1%  +3.43%  (p=0.000 n=49+50)
RegexpMatchMedium_1K-4   1.88MB/s ± 0%  1.88MB/s ± 0%    ~     (all equal)
RegexpMatchHard_32-4     1.11MB/s ± 0%  1.11MB/s ± 1%  +0.34%  (p=0.000 n=45+50)
RegexpMatchHard_1K-4     1.19MB/s ± 1%  1.18MB/s ± 1%  -0.34%  (p=0.033 n=50+50)
Revcomp-4                30.9MB/s ± 2%  30.9MB/s ± 2%    ~     (p=0.894 n=48+49)
Template-4               1.84MB/s ± 1%  1.86MB/s ± 2%  +1.19%  (p=0.000 n=48+50)
[Geo mean]               6.63MB/s       6.65MB/s       +0.26%


Fixes #19844.

Change-Id: I5ad16cc0b29267bb4579aca3dcc10a0b8ade1aa4
Reviewed-on: https://go-review.googlesource.com/42430
Run-TryBot: Cherry Zhang <cherryyz@google.com>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Cherry Zhang <cherryyz@google.com>
2017-05-11 13:53:54 +00:00

179 lines
3.9 KiB
Go

// run
// Copyright 2017 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// This file tests the splitting of constants into
// multiple immediates on arm.
package main
import "fmt"
const c32a = 0x00aa00dd
const c32s = 0x00ffff00
const c64a = 0x00aa00dd55000066
const c64s = 0x00ffff00004fff00
//go:noinline
func add32a(x uint32) uint32 {
return x + c32a
}
//go:noinline
func add32s(x uint32) uint32 {
return x + c32s
}
//go:noinline
func sub32a(x uint32) uint32 {
return x - c32a
}
//go:noinline
func sub32s(x uint32) uint32 {
return x - c32s
}
//go:noinline
func or32(x uint32) uint32 {
return x | c32a
}
//go:noinline
func xor32(x uint32) uint32 {
return x ^ c32a
}
//go:noinline
func subr32a(x uint32) uint32 {
return c32a - x
}
//go:noinline
func subr32s(x uint32) uint32 {
return c32s - x
}
//go:noinline
func bic32(x uint32) uint32 {
return x &^ c32a
}
//go:noinline
func add64a(x uint64) uint64 {
return x + c64a
}
//go:noinline
func add64s(x uint64) uint64 {
return x + c64s
}
//go:noinline
func sub64a(x uint64) uint64 {
return x - c64a
}
//go:noinline
func sub64s(x uint64) uint64 {
return x - c64s
}
//go:noinline
func or64(x uint64) uint64 {
return x | c64a
}
//go:noinline
func xor64(x uint64) uint64 {
return x ^ c64a
}
//go:noinline
func subr64a(x uint64) uint64 {
return c64a - x
}
//go:noinline
func subr64s(x uint64) uint64 {
return c64s - x
}
//go:noinline
func bic64(x uint64) uint64 {
return x &^ c64a
}
// Note: x-c gets rewritten to x+(-c), so SUB and SBC are not directly testable.
// I disabled that rewrite rule before running this test.
func main() {
test32()
test64()
}
func test32() {
var a uint32 = 0x11111111
var want, got uint32
if want, got = a+c32a, add32a(a); got != want {
panic(fmt.Sprintf("add32a(%x) = %x, want %x", a, got, want))
}
if want, got = a+c32s, add32s(a); got != want {
panic(fmt.Sprintf("add32s(%x) = %x, want %x", a, got, want))
}
if want, got = a-c32a, sub32a(a); got != want {
panic(fmt.Sprintf("sub32a(%x) = %x, want %x", a, got, want))
}
if want, got = a-c32s, sub32s(a); got != want {
panic(fmt.Sprintf("sub32s(%x) = %x, want %x", a, got, want))
}
if want, got = a|c32a, or32(a); got != want {
panic(fmt.Sprintf("or32(%x) = %x, want %x", a, got, want))
}
if want, got = a^c32a, xor32(a); got != want {
panic(fmt.Sprintf("xor32(%x) = %x, want %x", a, got, want))
}
if want, got = c32a-a, subr32a(a); got != want {
panic(fmt.Sprintf("subr32a(%x) = %x, want %x", a, got, want))
}
if want, got = c32s-a, subr32s(a); got != want {
panic(fmt.Sprintf("subr32s(%x) = %x, want %x", a, got, want))
}
if want, got = a&^c32a, bic32(a); got != want {
panic(fmt.Sprintf("bic32(%x) = %x, want %x", a, got, want))
}
}
func test64() {
var a uint64 = 0x1111111111111111
var want, got uint64
if want, got = a+c64a, add64a(a); got != want {
panic(fmt.Sprintf("add64a(%x) = %x, want %x", a, got, want))
}
if want, got = a+c64s, add64s(a); got != want {
panic(fmt.Sprintf("add64s(%x) = %x, want %x", a, got, want))
}
if want, got = a-c64a, sub64a(a); got != want {
panic(fmt.Sprintf("sub64a(%x) = %x, want %x", a, got, want))
}
if want, got = a-c64s, sub64s(a); got != want {
panic(fmt.Sprintf("sub64s(%x) = %x, want %x", a, got, want))
}
if want, got = a|c64a, or64(a); got != want {
panic(fmt.Sprintf("or64(%x) = %x, want %x", a, got, want))
}
if want, got = a^c64a, xor64(a); got != want {
panic(fmt.Sprintf("xor64(%x) = %x, want %x", a, got, want))
}
if want, got = c64a-a, subr64a(a); got != want {
panic(fmt.Sprintf("subr64a(%x) = %x, want %x", a, got, want))
}
if want, got = c64s-a, subr64s(a); got != want {
panic(fmt.Sprintf("subr64s(%x) = %x, want %x", a, got, want))
}
if want, got = a&^c64a, bic64(a); got != want {
panic(fmt.Sprintf("bic64(%x) = %x, want %x", a, got, want))
}
}