hash/fnv: use bits.Mul64 for 128-bit hash

Replace the 128-bit multiplication in 4 parts with bits.Mul64
and two single-width multiplications.  This simplifies the code
and increases throughput by ~50% on amd64.

name         old time/op   new time/op   delta
Fnv128KB-4    9.64µs ± 0%   6.09µs ± 0%  -36.89%  (p=0.016 n=4+5)
Fnv128aKB-4   9.11µs ± 0%   6.17µs ± 5%  -32.32%  (p=0.008 n=5+5)

name         old speed     new speed     delta
Fnv128KB-4   106MB/s ± 0%  168MB/s ± 0%  +58.44%  (p=0.016 n=4+5)
Fnv128aKB-4  112MB/s ± 0%  166MB/s ± 5%  +47.85%  (p=0.008 n=5+5)

Change-Id: Id752f2a20ea3de23a41e08db89eecf2bb60b7e6d
Reviewed-on: https://go-review.googlesource.com/c/133936
Run-TryBot: Matt Layher <mdlayher@gmail.com>
Run-TryBot: Robert Griesemer <gri@golang.org>
TryBot-Result: Gobot Gobot <gobot@golang.org>
Reviewed-by: Matt Layher <mdlayher@gmail.com>
Reviewed-by: Robert Griesemer <gri@golang.org>
This commit is contained in:
Brian Kessler 2018-09-06 12:26:31 -06:00 committed by Robert Griesemer
parent ba69df63eb
commit 1ccb66d1ef

View file

@ -15,6 +15,7 @@ package fnv
import (
"errors"
"hash"
"math/bits"
)
type (
@ -137,18 +138,12 @@ func (s *sum64a) Write(data []byte) (int, error) {
func (s *sum128) Write(data []byte) (int, error) {
for _, c := range data {
// Compute the multiplication in 4 parts to simplify carrying
s1l := (s[1] & 0xffffffff) * prime128Lower
s1h := (s[1] >> 32) * prime128Lower
s0l := (s[0]&0xffffffff)*prime128Lower + (s[1]&0xffffffff)<<prime128Shift
s0h := (s[0]>>32)*prime128Lower + (s[1]>>32)<<prime128Shift
// Carries
s1h += s1l >> 32
s0l += s1h >> 32
s0h += s0l >> 32
// Compute the multiplication
s0, s1 := bits.Mul64(prime128Lower, s[1])
s0 += s[1]<<prime128Shift + prime128Lower*s[0]
// Update the values
s[1] = (s1l & 0xffffffff) + (s1h << 32)
s[0] = (s0l & 0xffffffff) + (s0h << 32)
s[1] = s1
s[0] = s0
s[1] ^= uint64(c)
}
return len(data), nil
@ -157,18 +152,12 @@ func (s *sum128) Write(data []byte) (int, error) {
func (s *sum128a) Write(data []byte) (int, error) {
for _, c := range data {
s[1] ^= uint64(c)
// Compute the multiplication in 4 parts to simplify carrying
s1l := (s[1] & 0xffffffff) * prime128Lower
s1h := (s[1] >> 32) * prime128Lower
s0l := (s[0]&0xffffffff)*prime128Lower + (s[1]&0xffffffff)<<prime128Shift
s0h := (s[0]>>32)*prime128Lower + (s[1]>>32)<<prime128Shift
// Carries
s1h += s1l >> 32
s0l += s1h >> 32
s0h += s0l >> 32
// Compute the multiplication
s0, s1 := bits.Mul64(prime128Lower, s[1])
s0 += s[1]<<prime128Shift + prime128Lower*s[0]
// Update the values
s[1] = (s1l & 0xffffffff) + (s1h << 32)
s[0] = (s0l & 0xffffffff) + (s0h << 32)
s[1] = s1
s[0] = s0
}
return len(data), nil
}