cmd/compile: generic SSA rules for simplifying 2 and 3 operand integer arithmetic expressions

This applies the following generic integer addition/subtraction transformations:

x - (x + y) = -y
(x - y) - x = -y
y + (x - y) = x
y + (z + (x - y) = x + z

There's over 40 unique functions matching in Go. Hits 2 funcs in the runtime itself:

runtime.stackfree()
runtime.runqdrain()

Go binary size reduced by 0.05% on Linux x86_64.

StackCopy bench (perflocked Cascade Lake x86):

name                old time/op  new time/op  delta
StackCopyPtr-8      87.3ms ± 1%  86.9ms ± 0%  -0.45%  (p=0.000 n=20+20)
StackCopy-8         77.6ms ± 1%  77.0ms ± 0%  -0.76%  (p=0.000 n=20+20)
StackCopyNoCache-8  2.28ms ± 2%  2.26ms ± 2%  -0.93%  (p=0.008 n=19+20)

test/bench/go1 benchmarks (perflocked Cascade Lake x86):

name                     old time/op    new time/op    delta
BinaryTree17-8              1.88s ± 1%     1.88s ± 0%    ~     (p=0.373 n=15+12)
Fannkuch11-8                2.31s ± 0%     2.35s ± 0%  +1.52%  (p=0.000 n=15+14)
FmtFprintfEmpty-8          26.6ns ± 0%    26.6ns ± 0%    ~     (p=0.081 n=14+13)
FmtFprintfString-8         48.6ns ± 0%    50.0ns ± 0%  +2.86%  (p=0.000 n=15+14)
FmtFprintfInt-8            56.9ns ± 0%    54.8ns ± 0%  -3.70%  (p=0.000 n=15+15)
FmtFprintfIntInt-8         90.4ns ± 0%    88.8ns ± 0%  -1.78%  (p=0.000 n=15+15)
FmtFprintfPrefixedInt-8     104ns ± 0%     104ns ± 0%    ~     (p=0.905 n=14+13)
FmtFprintfFloat-8           148ns ± 0%     144ns ± 0%  -2.19%  (p=0.000 n=14+15)
FmtManyArgs-8               389ns ± 0%     390ns ± 0%  +0.35%  (p=0.000 n=12+15)
GobDecode-8                3.90ms ± 1%    3.88ms ± 0%  -0.49%  (p=0.000 n=15+14)
GobEncode-8                2.73ms ± 0%    2.73ms ± 0%    ~     (p=0.425 n=15+14)
Gzip-8                      169ms ± 0%     168ms ± 0%  -0.52%  (p=0.000 n=13+13)
Gunzip-8                   24.7ms ± 0%    24.8ms ± 0%  +0.61%  (p=0.000 n=15+15)
HTTPClientServer-8         60.5µs ± 6%    60.4µs ± 7%    ~     (p=0.595 n=15+15)
JSONEncode-8               6.97ms ± 1%    6.93ms ± 0%  -0.69%  (p=0.000 n=14+14)
JSONDecode-8               31.2ms ± 1%    30.8ms ± 1%  -1.27%  (p=0.000 n=14+14)
Mandelbrot200-8            3.87ms ± 0%    3.87ms ± 0%    ~     (p=0.652 n=15+14)
GoParse-8                  2.65ms ± 2%    2.64ms ± 1%    ~     (p=0.202 n=15+15)
RegexpMatchEasy0_32-8      45.1ns ± 0%    45.9ns ± 0%  +1.68%  (p=0.000 n=14+15)
RegexpMatchEasy0_1K-8       140ns ± 0%     139ns ± 0%  -0.44%  (p=0.000 n=15+14)
RegexpMatchEasy1_32-8      40.9ns ± 3%    40.5ns ± 0%  -0.88%  (p=0.000 n=15+13)
RegexpMatchEasy1_1K-8       215ns ± 1%     220ns ± 1%  +2.27%  (p=0.000 n=15+15)
RegexpMatchMedium_32-8      783ns ± 7%     738ns ± 0%    ~     (p=0.361 n=15+15)
RegexpMatchMedium_1K-8     24.1µs ± 6%    23.4µs ± 6%  -2.94%  (p=0.004 n=15+15)
RegexpMatchHard_32-8       1.10µs ± 1%    1.09µs ± 1%  -0.40%  (p=0.006 n=15+14)
RegexpMatchHard_1K-8       33.0µs ± 0%    33.0µs ± 0%    ~     (p=0.535 n=12+14)
Revcomp-8                   354ms ± 0%     353ms ± 0%  -0.23%  (p=0.002 n=15+13)
Template-8                 42.0ms ± 1%    41.8ms ± 2%  -0.37%  (p=0.023 n=14+15)
TimeParse-8                 181ns ± 0%     180ns ± 1%  -0.18%  (p=0.014 n=12+13)
TimeFormat-8                240ns ± 0%     242ns ± 1%  +0.69%  (p=0.000 n=12+15)
[Geo mean]                 35.2µs         35.1µs       -0.43%

name                     old speed      new speed      delta
GobDecode-8               197MB/s ± 1%   198MB/s ± 0%  +0.49%  (p=0.000 n=15+14)
GobEncode-8               281MB/s ± 0%   281MB/s ± 0%    ~     (p=0.419 n=15+14)
Gzip-8                    115MB/s ± 0%   115MB/s ± 0%  +0.52%  (p=0.000 n=13+13)
Gunzip-8                  786MB/s ± 0%   781MB/s ± 0%  -0.60%  (p=0.000 n=15+15)
JSONEncode-8              278MB/s ± 1%   280MB/s ± 0%  +0.69%  (p=0.000 n=14+14)
JSONDecode-8             62.3MB/s ± 1%  63.1MB/s ± 1%  +1.29%  (p=0.000 n=14+14)
GoParse-8                21.9MB/s ± 2%  22.0MB/s ± 1%    ~     (p=0.205 n=15+15)
RegexpMatchEasy0_32-8     709MB/s ± 0%   697MB/s ± 0%  -1.65%  (p=0.000 n=14+15)
RegexpMatchEasy0_1K-8    7.34GB/s ± 0%  7.37GB/s ± 0%  +0.43%  (p=0.000 n=15+15)
RegexpMatchEasy1_32-8     783MB/s ± 2%   790MB/s ± 0%  +0.88%  (p=0.000 n=15+13)
RegexpMatchEasy1_1K-8    4.77GB/s ± 1%  4.66GB/s ± 1%  -2.23%  (p=0.000 n=15+15)
RegexpMatchMedium_32-8   41.0MB/s ± 7%  43.3MB/s ± 0%    ~     (p=0.360 n=15+15)
RegexpMatchMedium_1K-8   42.5MB/s ± 6%  43.8MB/s ± 6%  +3.07%  (p=0.004 n=15+15)
RegexpMatchHard_32-8     29.2MB/s ± 1%  29.3MB/s ± 1%  +0.41%  (p=0.006 n=15+14)
RegexpMatchHard_1K-8     31.1MB/s ± 0%  31.1MB/s ± 0%    ~     (p=0.495 n=12+14)
Revcomp-8                 718MB/s ± 0%   720MB/s ± 0%  +0.23%  (p=0.002 n=15+13)
Template-8               46.3MB/s ± 1%  46.4MB/s ± 2%  +0.38%  (p=0.021 n=14+15)
[Geo mean]                205MB/s        206MB/s       +0.57%

Change-Id: Ibd1afdf8b6c0b08087dcc3acd8f943637eb95ac0
Reviewed-on: https://go-review.googlesource.com/c/go/+/344930
Reviewed-by: Keith Randall <khr@golang.org>
Run-TryBot: Keith Randall <khr@golang.org>
TryBot-Result: Go Bot <gobot@golang.org>
Trust: Josh Bleecher Snyder <josharian@gmail.com>
This commit is contained in:
Jake Ciolek 2021-08-25 12:36:17 +02:00 committed by Keith Randall
parent 5baf60d472
commit 6cf1d5d0fa
3 changed files with 356 additions and 0 deletions

View file

@ -590,6 +590,10 @@
// simplifications often used for lengths. e.g. len(s[i:i+5])==5
(Sub(64|32|16|8) (Add(64|32|16|8) x y) x) => y
(Sub(64|32|16|8) (Add(64|32|16|8) x y) y) => x
(Sub(64|32|16|8) (Sub(64|32|16|8) x y) x) => (Neg(64|32|16|8) y)
(Sub(64|32|16|8) x (Add(64|32|16|8) x y)) => (Neg(64|32|16|8) y)
(Add(64|32|16|8) x (Sub(64|32|16|8) y x)) => y
(Add(64|32|16|8) x (Add(64|32|16|8) y (Sub(64|32|16|8) z x))) => (Add(64|32|16|8) y z)
// basic phi simplifications
(Phi (Const8 [c]) (Const8 [c])) => (Const8 [c])

View file

@ -533,6 +533,52 @@ func rewriteValuegeneric_OpAdd16(v *Value) bool {
}
break
}
// match: (Add16 x (Sub16 y x))
// result: y
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpSub16 {
continue
}
_ = v_1.Args[1]
y := v_1.Args[0]
if x != v_1.Args[1] {
continue
}
v.copyOf(y)
return true
}
break
}
// match: (Add16 x (Add16 y (Sub16 z x)))
// result: (Add16 y z)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpAdd16 {
continue
}
_ = v_1.Args[1]
v_1_0 := v_1.Args[0]
v_1_1 := v_1.Args[1]
for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
y := v_1_0
if v_1_1.Op != OpSub16 {
continue
}
_ = v_1_1.Args[1]
z := v_1_1.Args[0]
if x != v_1_1.Args[1] {
continue
}
v.reset(OpAdd16)
v.AddArg2(y, z)
return true
}
}
break
}
// match: (Add16 (Add16 i:(Const16 <t>) z) x)
// cond: (z.Op != OpConst16 && x.Op != OpConst16)
// result: (Add16 i (Add16 <t> z x))
@ -732,6 +778,52 @@ func rewriteValuegeneric_OpAdd32(v *Value) bool {
}
break
}
// match: (Add32 x (Sub32 y x))
// result: y
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpSub32 {
continue
}
_ = v_1.Args[1]
y := v_1.Args[0]
if x != v_1.Args[1] {
continue
}
v.copyOf(y)
return true
}
break
}
// match: (Add32 x (Add32 y (Sub32 z x)))
// result: (Add32 y z)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpAdd32 {
continue
}
_ = v_1.Args[1]
v_1_0 := v_1.Args[0]
v_1_1 := v_1.Args[1]
for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
y := v_1_0
if v_1_1.Op != OpSub32 {
continue
}
_ = v_1_1.Args[1]
z := v_1_1.Args[0]
if x != v_1_1.Args[1] {
continue
}
v.reset(OpAdd32)
v.AddArg2(y, z)
return true
}
}
break
}
// match: (Add32 (Add32 i:(Const32 <t>) z) x)
// cond: (z.Op != OpConst32 && x.Op != OpConst32)
// result: (Add32 i (Add32 <t> z x))
@ -958,6 +1050,52 @@ func rewriteValuegeneric_OpAdd64(v *Value) bool {
}
break
}
// match: (Add64 x (Sub64 y x))
// result: y
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpSub64 {
continue
}
_ = v_1.Args[1]
y := v_1.Args[0]
if x != v_1.Args[1] {
continue
}
v.copyOf(y)
return true
}
break
}
// match: (Add64 x (Add64 y (Sub64 z x)))
// result: (Add64 y z)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpAdd64 {
continue
}
_ = v_1.Args[1]
v_1_0 := v_1.Args[0]
v_1_1 := v_1.Args[1]
for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
y := v_1_0
if v_1_1.Op != OpSub64 {
continue
}
_ = v_1_1.Args[1]
z := v_1_1.Args[0]
if x != v_1_1.Args[1] {
continue
}
v.reset(OpAdd64)
v.AddArg2(y, z)
return true
}
}
break
}
// match: (Add64 (Add64 i:(Const64 <t>) z) x)
// cond: (z.Op != OpConst64 && x.Op != OpConst64)
// result: (Add64 i (Add64 <t> z x))
@ -1184,6 +1322,52 @@ func rewriteValuegeneric_OpAdd8(v *Value) bool {
}
break
}
// match: (Add8 x (Sub8 y x))
// result: y
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpSub8 {
continue
}
_ = v_1.Args[1]
y := v_1.Args[0]
if x != v_1.Args[1] {
continue
}
v.copyOf(y)
return true
}
break
}
// match: (Add8 x (Add8 y (Sub8 z x)))
// result: (Add8 y z)
for {
for _i0 := 0; _i0 <= 1; _i0, v_0, v_1 = _i0+1, v_1, v_0 {
x := v_0
if v_1.Op != OpAdd8 {
continue
}
_ = v_1.Args[1]
v_1_0 := v_1.Args[0]
v_1_1 := v_1.Args[1]
for _i1 := 0; _i1 <= 1; _i1, v_1_0, v_1_1 = _i1+1, v_1_1, v_1_0 {
y := v_1_0
if v_1_1.Op != OpSub8 {
continue
}
_ = v_1_1.Args[1]
z := v_1_1.Args[0]
if x != v_1_1.Args[1] {
continue
}
v.reset(OpAdd8)
v.AddArg2(y, z)
return true
}
}
break
}
// match: (Add8 (Add8 i:(Const8 <t>) z) x)
// cond: (z.Op != OpConst8 && x.Op != OpConst8)
// result: (Add8 i (Add8 <t> z x))
@ -22590,6 +22774,42 @@ func rewriteValuegeneric_OpSub16(v *Value) bool {
}
break
}
// match: (Sub16 (Sub16 x y) x)
// result: (Neg16 y)
for {
if v_0.Op != OpSub16 {
break
}
y := v_0.Args[1]
x := v_0.Args[0]
if x != v_1 {
break
}
v.reset(OpNeg16)
v.AddArg(y)
return true
}
// match: (Sub16 x (Add16 x y))
// result: (Neg16 y)
for {
x := v_0
if v_1.Op != OpAdd16 {
break
}
_ = v_1.Args[1]
v_1_0 := v_1.Args[0]
v_1_1 := v_1.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
if x != v_1_0 {
continue
}
y := v_1_1
v.reset(OpNeg16)
v.AddArg(y)
return true
}
break
}
// match: (Sub16 x (Sub16 i:(Const16 <t>) z))
// cond: (z.Op != OpConst16 && x.Op != OpConst16)
// result: (Sub16 (Add16 <t> x z) i)
@ -22869,6 +23089,42 @@ func rewriteValuegeneric_OpSub32(v *Value) bool {
}
break
}
// match: (Sub32 (Sub32 x y) x)
// result: (Neg32 y)
for {
if v_0.Op != OpSub32 {
break
}
y := v_0.Args[1]
x := v_0.Args[0]
if x != v_1 {
break
}
v.reset(OpNeg32)
v.AddArg(y)
return true
}
// match: (Sub32 x (Add32 x y))
// result: (Neg32 y)
for {
x := v_0
if v_1.Op != OpAdd32 {
break
}
_ = v_1.Args[1]
v_1_0 := v_1.Args[0]
v_1_1 := v_1.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
if x != v_1_0 {
continue
}
y := v_1_1
v.reset(OpNeg32)
v.AddArg(y)
return true
}
break
}
// match: (Sub32 x (Sub32 i:(Const32 <t>) z))
// cond: (z.Op != OpConst32 && x.Op != OpConst32)
// result: (Sub32 (Add32 <t> x z) i)
@ -23172,6 +23428,42 @@ func rewriteValuegeneric_OpSub64(v *Value) bool {
}
break
}
// match: (Sub64 (Sub64 x y) x)
// result: (Neg64 y)
for {
if v_0.Op != OpSub64 {
break
}
y := v_0.Args[1]
x := v_0.Args[0]
if x != v_1 {
break
}
v.reset(OpNeg64)
v.AddArg(y)
return true
}
// match: (Sub64 x (Add64 x y))
// result: (Neg64 y)
for {
x := v_0
if v_1.Op != OpAdd64 {
break
}
_ = v_1.Args[1]
v_1_0 := v_1.Args[0]
v_1_1 := v_1.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
if x != v_1_0 {
continue
}
y := v_1_1
v.reset(OpNeg64)
v.AddArg(y)
return true
}
break
}
// match: (Sub64 x (Sub64 i:(Const64 <t>) z))
// cond: (z.Op != OpConst64 && x.Op != OpConst64)
// result: (Sub64 (Add64 <t> x z) i)
@ -23475,6 +23767,42 @@ func rewriteValuegeneric_OpSub8(v *Value) bool {
}
break
}
// match: (Sub8 (Sub8 x y) x)
// result: (Neg8 y)
for {
if v_0.Op != OpSub8 {
break
}
y := v_0.Args[1]
x := v_0.Args[0]
if x != v_1 {
break
}
v.reset(OpNeg8)
v.AddArg(y)
return true
}
// match: (Sub8 x (Add8 x y))
// result: (Neg8 y)
for {
x := v_0
if v_1.Op != OpAdd8 {
break
}
_ = v_1.Args[1]
v_1_0 := v_1.Args[0]
v_1_1 := v_1.Args[1]
for _i0 := 0; _i0 <= 1; _i0, v_1_0, v_1_1 = _i0+1, v_1_1, v_1_0 {
if x != v_1_0 {
continue
}
y := v_1_1
v.reset(OpNeg8)
v.AddArg(y)
return true
}
break
}
// match: (Sub8 x (Sub8 i:(Const8 <t>) z))
// cond: (z.Op != OpConst8 && x.Op != OpConst8)
// result: (Sub8 (Add8 <t> x z) i)

View file

@ -84,6 +84,30 @@ func NegAddFromConstNeg(a int) int {
return c
}
func SubSubNegSimplify(a, b int) int {
// amd64:"NEGQ"
r := (a - b) - a
return r
}
func SubAddSimplify(a, b int) int {
// amd64:-"SUBQ",-"ADDQ"
r := a + (b - a)
return r
}
func SubAddNegSimplify(a, b int) int {
// amd64:"NEGQ",-"ADDQ",-"SUBQ"
r := a - (b + a)
return r
}
func AddAddSubSimplify(a, b, c int) int {
// amd64:-"SUBQ"
r := a + (b + (c - a))
return r
}
// -------------------- //
// Multiplication //
// -------------------- //