From 359f44910f470578b2aed43f057b6fa8b4b41bcc Mon Sep 17 00:00:00 2001 From: Pat Gavlin Date: Fri, 26 Mar 2021 17:48:42 +0000 Subject: [PATCH] cmd/compile: fix long RMW bit operations on AMD64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Under certain circumstances, the existing rules for bit operations can produce code that writes beyond its intended bounds. For example, consider the following code: func repro(b []byte, addr, bit int32) { _ = b[3] v := uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 | 1<<(bit&31) b[0] = byte(v) b[1] = byte(v >> 8) b[2] = byte(v >> 16) b[3] = byte(v >> 24) } Roughly speaking: 1. The expression `1 << (bit & 31)` is rewritten into `(SHLL 1 bit)` 2. The expression `uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24` is rewritten into `(MOVLload &b[0])` 3. The statements `b[0] = byte(v) ... b[3] = byte(v >> 24)` are rewritten into `(MOVLstore &b[0], v)` 4. `(ORL (SHLL 1, bit) (MOVLload &b[0]))` is rewritten into `(BTSL (MOVLload &b[0]) bit)`. This is a valid transformation because the destination is a register: in this case, the bit offset is masked by the number of bits in the destination register. This is identical to the masking performed by `SHL`. 5. `(MOVLstore &b[0] (BTSL (MOVLload &b[0]) bit))` is rewritten into `(BTSLmodify &b[0] bit)`. This is an invalid transformation because the destination is memory: in this case, the bit offset is not masked, and the chosen instruction may write outside its intended 32-bit location. These changes fix the invalid rewrite performed in step (5) by explicitly maksing the bit offset operand to `BT(S|R|C)(L|Q)modify`. In the example above, the adjusted rules produce `(BTSLmodify &b[0] (ANDLconst [31] bit))` in step (5). These changes also add several new rules to rewrite bit sets, toggles, and clears that are rooted at `(OR|XOR|AND)(L|Q)modify` operators into appropriate `BT(S|R|C)(L|Q)modify` operators. These rules catch cases where `MOV(L|Q)store ((OR|XOR|AND)(L|Q) ...)` is rewritten to `(OR|XOR|AND)(L|Q)modify` before the `(OR|XOR|AND)(L|Q) ...` can be rewritten to `BT(S|R|C)(L|Q) ...`. Overall, compilecmp reports small improvements in code size on darwin/amd64 when the changes to the compiler itself are exlcuded: file before after Δ % runtime.s 536464 536412 -52 -0.010% bytes.s 32629 32593 -36 -0.110% strings.s 44565 44529 -36 -0.081% os/signal.s 7967 7959 -8 -0.100% cmd/vendor/golang.org/x/sys/unix.s 81686 81678 -8 -0.010% math/big.s 188235 188253 +18 +0.010% cmd/link/internal/loader.s 89295 89056 -239 -0.268% cmd/link/internal/ld.s 633551 633232 -319 -0.050% cmd/link/internal/arm.s 18934 18928 -6 -0.032% cmd/link/internal/arm64.s 31814 31801 -13 -0.041% cmd/link/internal/riscv64.s 7347 7345 -2 -0.027% cmd/compile/internal/ssa.s 4029173 4033066 +3893 +0.097% total 21298280 21301472 +3192 +0.015% Change-Id: I2e560548b515865129e1724e150e30540e9d29ce GitHub-Last-Rev: 9a42bd29a55b3917651aecab6932074df96535ae GitHub-Pull-Request: golang/go#45242 Reviewed-on: https://go-review.googlesource.com/c/go/+/304869 Reviewed-by: Keith Randall Trust: Josh Bleecher Snyder --- src/cmd/compile/internal/ssa/gen/AMD64.rules | 24 +- src/cmd/compile/internal/ssa/gen/AMD64Ops.go | 5 + src/cmd/compile/internal/ssa/rewriteAMD64.go | 229 +++++++++++++++++-- test/codegen/bits.go | 10 +- test/fixedbugs/issue45242.go | 28 +++ 5 files changed, 272 insertions(+), 24 deletions(-) create mode 100644 test/fixedbugs/issue45242.go diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 7dc381bd81..98cd865182 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -624,6 +624,14 @@ // Recognize bit setting (a |= 1< (BTS(Q|L) x y) (XOR(Q|L) (SHL(Q|L) (MOV(Q|L)const [1]) y) x) => (BTC(Q|L) x y) +(ORLmodify [off] {sym} ptr s:(SHLL (MOVLconst [1]) x) mem) => + (BTSLmodify [off] {sym} ptr (ANDLconst [31] x) mem) +(ORQmodify [off] {sym} ptr s:(SHLQ (MOVQconst [1]) x) mem) => + (BTSQmodify [off] {sym} ptr (ANDQconst [63] x) mem) +(XORLmodify [off] {sym} ptr s:(SHLL (MOVLconst [1]) x) mem) => + (BTCLmodify [off] {sym} ptr (ANDLconst [31] x) mem) +(XORQmodify [off] {sym} ptr s:(SHLQ (MOVQconst [1]) x) mem) => + (BTCQmodify [off] {sym} ptr (ANDQconst [63] x) mem) // Convert ORconst into BTS, if the code gets smaller, with boundary being // (ORL $40,AX is 3 bytes, ORL $80,AX is 6 bytes). @@ -646,6 +654,10 @@ => (BTRQconst [int8(log64(^c))] x) (ANDL (MOVLconst [c]) x) && isUint32PowerOfTwo(int64(^c)) && uint64(^c) >= 128 => (BTRLconst [int8(log32(^c))] x) +(ANDLmodify [off] {sym} ptr (NOTL s:(SHLL (MOVLconst [1]) x)) mem) => + (BTRLmodify [off] {sym} ptr (ANDLconst [31] x) mem) +(ANDQmodify [off] {sym} ptr (NOTQ s:(SHLQ (MOVQconst [1]) x)) mem) => + (BTRQmodify [off] {sym} ptr (ANDQconst [63] x) mem) // Special-case bit patterns on first/last bit. // generic.rules changes ANDs of high-part/low-part masks into a couple of shifts, @@ -2064,11 +2076,15 @@ ((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) ((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) -(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => - ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Lmodify [off] {sym} ptr x mem) +(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => + ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) +(MOVLstore {sym} [off] ptr y:((BTC|BTR|BTS)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => + ((BTC|BTR|BTS)Lmodify [off] {sym} ptr (ANDLconst [31] x) mem) (MOVQstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Qload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) -(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => - ((ADD|SUB|AND|OR|XOR|BTC|BTR|BTS)Qmodify [off] {sym} ptr x mem) +(MOVQstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => + ((ADD|SUB|AND|OR|XOR)Qmodify [off] {sym} ptr x mem) +(MOVQstore {sym} [off] ptr y:((BTC|BTR|BTS)Q l:(MOVQload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => + ((BTC|BTR|BTS)Qmodify [off] {sym} ptr (ANDQconst [63] x) mem) // Merge ADDQconst and LEAQ into atomic loads. (MOV(Q|L|B)atomicload [off1] {sym} (ADDQconst [off2] ptr) mem) && is32Bit(int64(off1)+int64(off2)) => diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 6c3fe1d192..af53cc4f9d 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -363,6 +363,11 @@ func init() { {name: "BTSQconst", argLength: 1, reg: gp11, asm: "BTSQ", resultInArg0: true, clobberFlags: true, aux: "Int8"}, // set bit auxint in arg0, 0 <= auxint < 64 // direct bit operation on memory operand + // + // Note that these operations do not mask the bit offset (arg1), and will write beyond their expected + // bounds if that argument is larger than 64/32 (for BT*Q and BT*L, respectively). If the compiler + // cannot prove that arg1 is in range, it must be explicitly masked (see e.g. the patterns that produce + // BT*modify from (MOVstore (BT* (MOVLload ptr mem) x) mem)). {name: "BTCQmodify", argLength: 3, reg: gpstore, asm: "BTCQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit arg1 in 64-bit arg0+auxint+aux, arg2=mem {name: "BTCLmodify", argLength: 3, reg: gpstore, asm: "BTCL", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // complement bit arg1 in 32-bit arg0+auxint+aux, arg2=mem {name: "BTSQmodify", argLength: 3, reg: gpstore, asm: "BTSQ", aux: "SymOff", typ: "Mem", clobberFlags: true, faultOnNilArg0: true, symEffect: "Read,Write"}, // set bit arg1 in 64-bit arg0+auxint+aux, arg2=mem diff --git a/src/cmd/compile/internal/ssa/rewriteAMD64.go b/src/cmd/compile/internal/ssa/rewriteAMD64.go index 36f872d0c4..ce94fdb952 100644 --- a/src/cmd/compile/internal/ssa/rewriteAMD64.go +++ b/src/cmd/compile/internal/ssa/rewriteAMD64.go @@ -3001,6 +3001,36 @@ func rewriteValueAMD64_OpAMD64ANDLmodify(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] + b := v.Block + // match: (ANDLmodify [off] {sym} ptr (NOTL s:(SHLL (MOVLconst [1]) x)) mem) + // result: (BTRLmodify [off] {sym} ptr (ANDLconst [31] x) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64NOTL { + break + } + s := v_1.Args[0] + if s.Op != OpAMD64SHLL { + break + } + t := s.Type + x := s.Args[1] + s_0 := s.Args[0] + if s_0.Op != OpAMD64MOVLconst || auxIntToInt32(s_0.AuxInt) != 1 { + break + } + mem := v_2 + v.reset(OpAMD64BTRLmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, t) + v0.AuxInt = int32ToAuxInt(31) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true + } // match: (ANDLmodify [off1] {sym} (ADDQconst [off2] base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) // result: (ANDLmodify [off1+off2] {sym} base val mem) @@ -3380,6 +3410,36 @@ func rewriteValueAMD64_OpAMD64ANDQmodify(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] + b := v.Block + // match: (ANDQmodify [off] {sym} ptr (NOTQ s:(SHLQ (MOVQconst [1]) x)) mem) + // result: (BTRQmodify [off] {sym} ptr (ANDQconst [63] x) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpAMD64NOTQ { + break + } + s := v_1.Args[0] + if s.Op != OpAMD64SHLQ { + break + } + t := s.Type + x := s.Args[1] + s_0 := s.Args[0] + if s_0.Op != OpAMD64MOVQconst || auxIntToInt64(s_0.AuxInt) != 1 { + break + } + mem := v_2 + v.reset(OpAMD64BTRQmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64ANDQconst, t) + v0.AuxInt = int32ToAuxInt(63) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true + } // match: (ANDQmodify [off1] {sym} (ADDQconst [off2] base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) // result: (ANDQmodify [off1+off2] {sym} base val mem) @@ -12750,9 +12810,9 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { } break } - // match: (MOVLstore {sym} [off] ptr y:(BTCL l:(MOVLload [off] {sym} ptr mem) x) mem) + // match: (MOVLstore {sym} [off] ptr y:(BTCL l:(MOVLload [off] {sym} ptr mem) x) mem) // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (BTCLmodify [off] {sym} ptr x mem) + // result: (BTCLmodify [off] {sym} ptr (ANDLconst [31] x) mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -12761,6 +12821,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { if y.Op != OpAMD64BTCL { break } + t := y.Type x := y.Args[1] l := y.Args[0] if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { @@ -12773,12 +12834,15 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { v.reset(OpAMD64BTCLmodify) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v0 := b.NewValue0(l.Pos, OpAMD64ANDLconst, t) + v0.AuxInt = int32ToAuxInt(31) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) return true } - // match: (MOVLstore {sym} [off] ptr y:(BTRL l:(MOVLload [off] {sym} ptr mem) x) mem) + // match: (MOVLstore {sym} [off] ptr y:(BTRL l:(MOVLload [off] {sym} ptr mem) x) mem) // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (BTRLmodify [off] {sym} ptr x mem) + // result: (BTRLmodify [off] {sym} ptr (ANDLconst [31] x) mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -12787,6 +12851,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { if y.Op != OpAMD64BTRL { break } + t := y.Type x := y.Args[1] l := y.Args[0] if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { @@ -12799,12 +12864,15 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { v.reset(OpAMD64BTRLmodify) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v0 := b.NewValue0(l.Pos, OpAMD64ANDLconst, t) + v0.AuxInt = int32ToAuxInt(31) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) return true } - // match: (MOVLstore {sym} [off] ptr y:(BTSL l:(MOVLload [off] {sym} ptr mem) x) mem) + // match: (MOVLstore {sym} [off] ptr y:(BTSL l:(MOVLload [off] {sym} ptr mem) x) mem) // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (BTSLmodify [off] {sym} ptr x mem) + // result: (BTSLmodify [off] {sym} ptr (ANDLconst [31] x) mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -12813,6 +12881,7 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { if y.Op != OpAMD64BTSL { break } + t := y.Type x := y.Args[1] l := y.Args[0] if l.Op != OpAMD64MOVLload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { @@ -12825,7 +12894,10 @@ func rewriteValueAMD64_OpAMD64MOVLstore(v *Value) bool { v.reset(OpAMD64BTSLmodify) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v0 := b.NewValue0(l.Pos, OpAMD64ANDLconst, t) + v0.AuxInt = int32ToAuxInt(31) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) return true } // match: (MOVLstore [off] {sym} ptr a:(ADDLconst [c] l:(MOVLload [off] {sym} ptr2 mem)) mem) @@ -13566,6 +13638,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] + b := v.Block // match: (MOVQstore [off1] {sym} (ADDQconst [off2] ptr) val mem) // cond: is32Bit(int64(off1)+int64(off2)) // result: (MOVQstore [off1+off2] {sym} ptr val mem) @@ -13931,9 +14004,9 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { } break } - // match: (MOVQstore {sym} [off] ptr y:(BTCQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // match: (MOVQstore {sym} [off] ptr y:(BTCQ l:(MOVQload [off] {sym} ptr mem) x) mem) // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (BTCQmodify [off] {sym} ptr x mem) + // result: (BTCQmodify [off] {sym} ptr (ANDQconst [63] x) mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -13942,6 +14015,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { if y.Op != OpAMD64BTCQ { break } + t := y.Type x := y.Args[1] l := y.Args[0] if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { @@ -13954,12 +14028,15 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { v.reset(OpAMD64BTCQmodify) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v0 := b.NewValue0(l.Pos, OpAMD64ANDQconst, t) + v0.AuxInt = int32ToAuxInt(63) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) return true } - // match: (MOVQstore {sym} [off] ptr y:(BTRQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // match: (MOVQstore {sym} [off] ptr y:(BTRQ l:(MOVQload [off] {sym} ptr mem) x) mem) // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (BTRQmodify [off] {sym} ptr x mem) + // result: (BTRQmodify [off] {sym} ptr (ANDQconst [63] x) mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -13968,6 +14045,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { if y.Op != OpAMD64BTRQ { break } + t := y.Type x := y.Args[1] l := y.Args[0] if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { @@ -13980,12 +14058,15 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { v.reset(OpAMD64BTRQmodify) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v0 := b.NewValue0(l.Pos, OpAMD64ANDQconst, t) + v0.AuxInt = int32ToAuxInt(63) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) return true } - // match: (MOVQstore {sym} [off] ptr y:(BTSQ l:(MOVQload [off] {sym} ptr mem) x) mem) + // match: (MOVQstore {sym} [off] ptr y:(BTSQ l:(MOVQload [off] {sym} ptr mem) x) mem) // cond: y.Uses==1 && l.Uses==1 && clobber(y, l) - // result: (BTSQmodify [off] {sym} ptr x mem) + // result: (BTSQmodify [off] {sym} ptr (ANDQconst [63] x) mem) for { off := auxIntToInt32(v.AuxInt) sym := auxToSym(v.Aux) @@ -13994,6 +14075,7 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { if y.Op != OpAMD64BTSQ { break } + t := y.Type x := y.Args[1] l := y.Args[0] if l.Op != OpAMD64MOVQload || auxIntToInt32(l.AuxInt) != off || auxToSym(l.Aux) != sym { @@ -14006,7 +14088,10 @@ func rewriteValueAMD64_OpAMD64MOVQstore(v *Value) bool { v.reset(OpAMD64BTSQmodify) v.AuxInt = int32ToAuxInt(off) v.Aux = symToAux(sym) - v.AddArg3(ptr, x, mem) + v0 := b.NewValue0(l.Pos, OpAMD64ANDQconst, t) + v0.AuxInt = int32ToAuxInt(63) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) return true } // match: (MOVQstore [off] {sym} ptr a:(ADDQconst [c] l:(MOVQload [off] {sym} ptr2 mem)) mem) @@ -18391,6 +18476,33 @@ func rewriteValueAMD64_OpAMD64ORLmodify(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] + b := v.Block + // match: (ORLmodify [off] {sym} ptr s:(SHLL (MOVLconst [1]) x) mem) + // result: (BTSLmodify [off] {sym} ptr (ANDLconst [31] x) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + s := v_1 + if s.Op != OpAMD64SHLL { + break + } + t := s.Type + x := s.Args[1] + s_0 := s.Args[0] + if s_0.Op != OpAMD64MOVLconst || auxIntToInt32(s_0.AuxInt) != 1 { + break + } + mem := v_2 + v.reset(OpAMD64BTSLmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, t) + v0.AuxInt = int32ToAuxInt(31) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true + } // match: (ORLmodify [off1] {sym} (ADDQconst [off2] base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) // result: (ORLmodify [off1+off2] {sym} base val mem) @@ -20066,6 +20178,33 @@ func rewriteValueAMD64_OpAMD64ORQmodify(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] + b := v.Block + // match: (ORQmodify [off] {sym} ptr s:(SHLQ (MOVQconst [1]) x) mem) + // result: (BTSQmodify [off] {sym} ptr (ANDQconst [63] x) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + s := v_1 + if s.Op != OpAMD64SHLQ { + break + } + t := s.Type + x := s.Args[1] + s_0 := s.Args[0] + if s_0.Op != OpAMD64MOVQconst || auxIntToInt64(s_0.AuxInt) != 1 { + break + } + mem := v_2 + v.reset(OpAMD64BTSQmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64ANDQconst, t) + v0.AuxInt = int32ToAuxInt(63) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true + } // match: (ORQmodify [off1] {sym} (ADDQconst [off2] base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) // result: (ORQmodify [off1+off2] {sym} base val mem) @@ -28155,6 +28294,33 @@ func rewriteValueAMD64_OpAMD64XORLmodify(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] + b := v.Block + // match: (XORLmodify [off] {sym} ptr s:(SHLL (MOVLconst [1]) x) mem) + // result: (BTCLmodify [off] {sym} ptr (ANDLconst [31] x) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + s := v_1 + if s.Op != OpAMD64SHLL { + break + } + t := s.Type + x := s.Args[1] + s_0 := s.Args[0] + if s_0.Op != OpAMD64MOVLconst || auxIntToInt32(s_0.AuxInt) != 1 { + break + } + mem := v_2 + v.reset(OpAMD64BTCLmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64ANDLconst, t) + v0.AuxInt = int32ToAuxInt(31) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true + } // match: (XORLmodify [off1] {sym} (ADDQconst [off2] base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) // result: (XORLmodify [off1+off2] {sym} base val mem) @@ -28523,6 +28689,33 @@ func rewriteValueAMD64_OpAMD64XORQmodify(v *Value) bool { v_2 := v.Args[2] v_1 := v.Args[1] v_0 := v.Args[0] + b := v.Block + // match: (XORQmodify [off] {sym} ptr s:(SHLQ (MOVQconst [1]) x) mem) + // result: (BTCQmodify [off] {sym} ptr (ANDQconst [63] x) mem) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + s := v_1 + if s.Op != OpAMD64SHLQ { + break + } + t := s.Type + x := s.Args[1] + s_0 := s.Args[0] + if s_0.Op != OpAMD64MOVQconst || auxIntToInt64(s_0.AuxInt) != 1 { + break + } + mem := v_2 + v.reset(OpAMD64BTCQmodify) + v.AuxInt = int32ToAuxInt(off) + v.Aux = symToAux(sym) + v0 := b.NewValue0(v.Pos, OpAMD64ANDQconst, t) + v0.AuxInt = int32ToAuxInt(63) + v0.AddArg(x) + v.AddArg3(ptr, v0, mem) + return true + } // match: (XORQmodify [off1] {sym} (ADDQconst [off2] base) val mem) // cond: is32Bit(int64(off1)+int64(off2)) // result: (XORQmodify [off1+off2] {sym} base val mem) diff --git a/test/codegen/bits.go b/test/codegen/bits.go index 806dad13c8..d41383f42c 100644 --- a/test/codegen/bits.go +++ b/test/codegen/bits.go @@ -262,8 +262,8 @@ func bitcompl32(a, b uint32) (n uint32) { return n } -// check direct operation on memory with constant source -func bitOpOnMem(a []uint32) { +// check direct operation on memory with constant and shifted constant sources +func bitOpOnMem(a []uint32, b, c, d uint32) { // amd64:`ANDL\s[$]200,\s\([A-Z]+\)` a[0] &= 200 // amd64:`ORL\s[$]220,\s4\([A-Z]+\)` @@ -276,6 +276,12 @@ func bitOpOnMem(a []uint32) { a[4] |= 0x4000 // amd64:`BTCL\s[$]13,\s20\([A-Z]+\)`,-`XORL` a[5] ^= 0x2000 + // amd64:`BTRL\s[A-Z]+,\s24\([A-Z]+\)` + a[6] &^= 1 << (b & 31) + // amd64:`BTSL\s[A-Z]+,\s28\([A-Z]+\)` + a[7] |= 1 << (c & 31) + // amd64:`BTCL\s[A-Z]+,\s32\([A-Z]+\)` + a[8] ^= 1 << (d & 31) } func bitcheckMostNegative(b uint8) bool { diff --git a/test/fixedbugs/issue45242.go b/test/fixedbugs/issue45242.go new file mode 100644 index 0000000000..b99722120d --- /dev/null +++ b/test/fixedbugs/issue45242.go @@ -0,0 +1,28 @@ +// run + +// Copyright 2021 The Go Authors. All rights reserved. +// Use of this source code is governed by a BSD-style +// license that can be found in the LICENSE file. + +package main + +import "fmt" + +//go:noinline +func repro(b []byte, bit int32) { + _ = b[3] + v := uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 | 1<<(bit&31) + b[0] = byte(v) + b[1] = byte(v >> 8) + b[2] = byte(v >> 16) + b[3] = byte(v >> 24) +} + +func main() { + var b [8]byte + repro(b[:], 32) + want := [8]byte{1, 0, 0, 0, 0, 0, 0, 0} + if b != want { + panic(fmt.Sprintf("got %v, want %v\n", b, want)) + } +}