crypto/rsa: use R*R multiplication to get into the Montgomery domain

This is faster than the current code because computing RR involves
one more shiftIn and using it involves an extra multiplication, but each
exponentiation was doing montgomeryRepresentation twice, once for x and
once for 1, and now they share the RR precomputation.

More importantly, it allows precomputing the value and attaching it to
the private key in a future CL.

name                    old time/op  new time/op  delta
DecryptPKCS1v15/2048-8  1.46ms ± 0%  1.40ms ± 7%   -3.69%  (p=0.003 n=10+9)
DecryptPKCS1v15/3072-8  4.23ms ± 0%  4.13ms ± 4%   -2.36%  (p=0.004 n=9+9)
DecryptPKCS1v15/4096-8  9.42ms ± 0%  9.08ms ± 3%   -3.69%  (p=0.000 n=9+10)
EncryptPKCS1v15/2048-8   221µs ± 0%   137µs ± 1%  -37.91%  (p=0.000 n=9+10)
DecryptOAEP/2048-8      1.46ms ± 0%  1.39ms ± 1%   -4.97%  (p=0.000 n=9+10)
EncryptOAEP/2048-8       221µs ± 0%   138µs ± 0%  -37.71%  (p=0.000 n=8+10)
SignPKCS1v15/2048-8     1.68ms ± 0%  1.53ms ± 1%   -8.85%  (p=0.000 n=9+10)
VerifyPKCS1v15/2048-8    220µs ± 0%   137µs ± 1%  -37.84%  (p=0.000 n=9+10)
SignPSS/2048-8          1.68ms ± 0%  1.52ms ± 1%   -9.16%  (p=0.000 n=8+8)
VerifyPSS/2048-8         234µs ±12%   138µs ± 1%  -40.87%  (p=0.000 n=10+9)

Change-Id: I6c650bad9019765d793fd37a529ca186cf1eeef7
Reviewed-on: https://go-review.googlesource.com/c/go/+/445019
Reviewed-by: Roland Shoemaker <roland@golang.org>
Reviewed-by: Michael Knyszek <mknyszek@google.com>
TryBot-Result: Gopher Robot <gobot@golang.org>
Run-TryBot: Filippo Valsorda <filippo@golang.org>
This commit is contained in:
Filippo Valsorda 2022-10-23 19:13:04 +02:00
parent 5aa6313e58
commit 72d2c4c635

View file

@ -301,6 +301,21 @@ type modulus struct {
nat *nat
leading int // number of leading zeros in the modulus
m0inv uint // -nat.limbs[0]⁻¹ mod _W
RR *nat // R*R for montgomeryRepresentation
}
// rr returns R*R with R = 2^(_W * n) and n = len(m.nat.limbs).
func rr(m *modulus) *nat {
rr := new(nat).expandFor(m)
// R*R is 2^(2 * _W * n). We can safely get 2^(_W * (n - 1)) by setting the
// most significant limb to 1. We then get to R*R by shifting left by _W
// n + 1 times.
n := len(rr.limbs)
rr.limbs[n-1] = 1
for i := n - 1; i < 2*n; i++ {
rr.shiftIn(0, m) // x = x * 2^_W mod m
}
return rr
}
// minusInverseModW computes -x⁻¹ mod _W with x odd.
@ -335,6 +350,7 @@ func modulusFromNat(nat *nat) *modulus {
m.nat.limbs = m.nat.limbs[:size]
m.leading = _W - bitLen(m.nat.limbs[size-1])
m.m0inv = minusInverseModW(m.nat.limbs[0])
m.RR = rr(m)
return m
}
@ -510,10 +526,23 @@ func (x *nat) modAdd(y *nat, m *modulus) *nat {
//
// This assumes that x is already reduced mod m.
func (x *nat) montgomeryRepresentation(m *modulus) *nat {
for i := 0; i < len(m.nat.limbs); i++ {
x.shiftIn(0, m) // x = x * 2^_W mod m
}
return x
// A Montgomery multiplication (which computes a * b / R) by R * R works out
// to a multiplication by R, which takes the value out of the Montgomery domain.
return x.montgomeryMul(x.clone(), m.RR, m)
}
// montgomeryReduction calculates x = x / R mod m, with R = 2^(_W * n) and
// n = len(m.nat.limbs).
//
// This assumes that x is already reduced mod m.
func (x *nat) montgomeryReduction(m *modulus) *nat {
// By Montgomery multiplying with 1 not in Montgomery representation, we
// convert out back from Montgomery representation, because it works out to
// dividing by R.
t0 := x.clone()
t1 := new(nat).expandFor(m)
t1.limbs[0] = 1
return x.montgomeryMul(t0, t1, m)
}
// montgomeryMul calculates d = a * b / R mod m, with R = 2^(_W * n) and
@ -614,13 +643,5 @@ func (out *nat) exp(x *nat, e []byte, m *modulus) *nat {
}
}
// By Montgomery multiplying with 1 not in Montgomery representation, we
// convert out back from Montgomery representation, because it works out to
// dividing by R.
t0.assign(yes, out)
t1.resetFor(m)
t1.limbs[0] = 1
out.montgomeryMul(t0, t1, m)
return out
return out.montgomeryReduction(m)
}