mirror of
https://github.com/golang/go
synced 2024-11-02 11:50:30 +00:00
hash/crc32: improve performance of ppc64SlicingUpdateBy8 on ppc64le
Reduce the number of instructions in the short loop of ppc64SlicingUpdateBy8 function by combining MOVWZ and SRD into a SRD with appropriate parameters performing the same operation and remove MOVWZ R7,R7 from the loop This change produces the following improvements on POWER9. None of the other tests regress. Improvments on other POWERPC platforms similar. name old time/op new time/op delta CRC32/poly=IEEE/size=15/align=0 80.5ns ± 0% 70.6ns ± 0% -12% CRC32/poly=IEEE/size=15/align=1 80.5ns ± 0% 70.6ns ± 0% -12% CRC32/poly=IEEE/size=512/align=1 151ns ± 0% 139ns ± 0% -7% CRC32/poly=IEEE/size=1kB/align=1 167ns ± 0% 155ns ± 0% -7% CRC32/poly=Castagnoli/size=15/align=0 80.2ns ± 0% 70.5ns ± 0% -12% CRC32/poly=Castagnoli/size=15/align=1 80.2ns ± 0% 70.5ns ± 0% -12% CRC32/poly=Castagnoli/size=512/align=1 150ns ± 0% 139ns ± 0% -7% CRC32/poly=Castagnoli/size=1kB/align=1 166ns ± 0% 155ns ± 0% -6% Change-Id: I424709041c30d1c637b595d0845e3ae78dc3e0a0 Reviewed-on: https://go-review.googlesource.com/c/go/+/350989 Reviewed-by: Lynn Boger <laboger@linux.vnet.ibm.com> Reviewed-by: Cherry Mui <cherryyz@google.com> Run-TryBot: Lynn Boger <laboger@linux.vnet.ibm.com> TryBot-Result: Go Bot <gobot@golang.org>
This commit is contained in:
parent
74ba70be68
commit
e128749be8
1 changed files with 2 additions and 4 deletions
|
@ -112,19 +112,17 @@ loop:
|
|||
ANDCC $7,R6,R8 // any leftover bytes
|
||||
BEQ done // none --> done
|
||||
MOVD R8,CTR // byte count
|
||||
|
||||
PCALIGN $16 // align short loop
|
||||
short:
|
||||
MOVBZ 0(R5),R8 // get v
|
||||
MOVBZ R7,R9 // byte(crc) -> R8 BE vs LE?
|
||||
MOVWZ R7,R14
|
||||
SRD $8,R14,R14 // crc>>8
|
||||
SRD $8,R7,R14 // crc>>8
|
||||
XOR R8,R9,R8 // byte(crc)^v -> R8
|
||||
ADD $1,R5 // ptr to next v
|
||||
SLD $2,R8 // convert index-> bytes
|
||||
ADD R8,R4,R9 // &tab[byte(crc)^v]
|
||||
MOVWZ 0(R9),R10 // tab[byte(crc)^v]
|
||||
XOR R10,R14,R7 // loop crc in R7
|
||||
MOVWZ R7,R7 // 32 bits
|
||||
BC 16,0,short
|
||||
done:
|
||||
NOR R7,R7,R7 // ^crc
|
||||
|
|
Loading…
Reference in a new issue