mirror of
https://github.com/golang/go
synced 2024-11-05 18:36:08 +00:00
big: potential bug fix, cleanups
- implemented setWord, use it where setUint64 is wrong - divLarge: use fast mulWW, divWW; implemented mulWW, divWW - better assembly code for addMulVVW R=rsc CC=golang-dev https://golang.org/cl/1258042
This commit is contained in:
parent
5b8117b98a
commit
a688eb6ee4
4 changed files with 61 additions and 19 deletions
|
@ -56,6 +56,7 @@ func subWW_g(x, y, c Word) (z1, z0 Word) {
|
|||
|
||||
|
||||
// z1<<_W + z0 = x*y
|
||||
func mulWW(x, y Word) (z1, z0 Word)
|
||||
func mulWW_g(x, y Word) (z1, z0 Word) {
|
||||
// Split x and y into 2 halfWords each, multiply
|
||||
// the halfWords separately while avoiding overflow,
|
||||
|
@ -242,6 +243,7 @@ func leadingZeros(x Word) uint {
|
|||
|
||||
|
||||
// q = (x1<<_W + x0 - r)/y
|
||||
func divWW(x1, x0, y Word) (q, r Word)
|
||||
func divWW_g(x1, x0, y Word) (q, r Word) {
|
||||
if x1 == 0 {
|
||||
q, r = x0/y, x0%y
|
||||
|
|
|
@ -5,6 +5,25 @@
|
|||
// This file provides fast assembly versions for the elementary
|
||||
// arithmetic operations on vectors implemented in arith.go.
|
||||
|
||||
// func mulWW(x, y Word) (z1, z0 Word)
|
||||
TEXT ·mulWW(SB),7,$0
|
||||
MOVL x+0(FP), AX
|
||||
MULL y+4(FP)
|
||||
MOVL DX, z1+8(FP)
|
||||
MOVL AX, z0+12(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func divWW(x1, x0, y Word) (q, r Word)
|
||||
TEXT ·divWW(SB),7,$0
|
||||
MOVL x1+0(FP), DX
|
||||
MOVL x0+4(FP), AX
|
||||
DIVL y+8(FP)
|
||||
MOVL AX, q+12(FP)
|
||||
MOVL DX, r+16(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func addVV(z, x, y []Word) (c Word)
|
||||
TEXT ·addVV(SB),7,$0
|
||||
MOVL z+0(FP), DI
|
||||
|
@ -212,11 +231,10 @@ TEXT ·addMulVVW(SB),7,$0
|
|||
|
||||
L6: MOVL (SI)(BX*4), AX
|
||||
MULL BP
|
||||
ADDL (DI)(BX*4), AX
|
||||
ADCL $0, DX
|
||||
ADDL CX, AX
|
||||
ADCL $0, DX
|
||||
MOVL AX, (DI)(BX*4)
|
||||
ADDL AX, (DI)(BX*4)
|
||||
ADCL $0, DX
|
||||
MOVL DX, CX
|
||||
ADDL $1, BX // i++
|
||||
|
||||
|
|
|
@ -7,6 +7,25 @@
|
|||
|
||||
// TODO(gri) - experiment with unrolled loops for faster execution
|
||||
|
||||
// func mulWW(x, y Word) (z1, z0 Word)
|
||||
TEXT ·mulWW(SB),7,$0
|
||||
MOVQ x+0(FP), AX
|
||||
MULQ y+8(FP)
|
||||
MOVQ DX, z1+16(FP)
|
||||
MOVQ AX, z0+24(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func divWW(x1, x0, y Word) (q, r Word)
|
||||
TEXT ·divWW(SB),7,$0
|
||||
MOVQ x1+0(FP), DX
|
||||
MOVQ x0+8(FP), AX
|
||||
DIVQ y+16(FP)
|
||||
MOVQ AX, q+24(FP)
|
||||
MOVQ DX, r+32(FP)
|
||||
RET
|
||||
|
||||
|
||||
// func addVV(z, x, y []Word) (c Word)
|
||||
TEXT ·addVV(SB),7,$0
|
||||
MOVQ z+0(FP), R10
|
||||
|
@ -210,11 +229,10 @@ TEXT ·addMulVVW(SB),7,$0
|
|||
|
||||
L6: MOVQ (R8)(BX*8), AX
|
||||
MULQ R9
|
||||
ADDQ (R10)(BX*8), AX
|
||||
ADCQ $0, DX
|
||||
ADDQ CX, AX
|
||||
ADCQ $0, DX
|
||||
MOVQ AX, (R10)(BX*8)
|
||||
ADDQ AX, (R10)(BX*8)
|
||||
ADCQ $0, DX
|
||||
MOVQ DX, CX
|
||||
ADDL $1, BX // i++
|
||||
|
||||
|
|
|
@ -69,16 +69,20 @@ func (z nat) make(n int) nat {
|
|||
}
|
||||
|
||||
|
||||
func (z nat) setUint64(x uint64) nat {
|
||||
func (z nat) setWord(x Word) nat {
|
||||
if x == 0 {
|
||||
return z.make(0)
|
||||
}
|
||||
z = z.make(1)
|
||||
z[0] = x
|
||||
return z
|
||||
}
|
||||
|
||||
|
||||
func (z nat) setUint64(x uint64) nat {
|
||||
// single-digit values
|
||||
if x == uint64(Word(x)) {
|
||||
z = z.make(1)
|
||||
z[0] = Word(x)
|
||||
return z
|
||||
if w := Word(x); uint64(w) == x {
|
||||
return z.setWord(w)
|
||||
}
|
||||
|
||||
// compute number of words n required to represent x
|
||||
|
@ -194,7 +198,7 @@ func (x nat) cmp(y nat) (r int) {
|
|||
func (z nat) mulAddWW(x nat, y, r Word) nat {
|
||||
m := len(x)
|
||||
if m == 0 || y == 0 {
|
||||
return z.setUint64(uint64(r)) // result is r
|
||||
return z.setWord(r) // result is r
|
||||
}
|
||||
// m > 0
|
||||
|
||||
|
@ -529,6 +533,8 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
|
|||
m := len(uIn) - n
|
||||
|
||||
// determine if z can be reused
|
||||
// TODO(gri) should find a better solution - this if statement
|
||||
// is very costly (see e.g. time pidigits -s -n 10000)
|
||||
if alias(z, uIn) || alias(z, v) {
|
||||
z = nil // z is an alias for uIn or v - cannot reuse
|
||||
}
|
||||
|
@ -549,15 +555,13 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
|
|||
// D2.
|
||||
for j := m; j >= 0; j-- {
|
||||
// D3.
|
||||
var qhat Word
|
||||
if u[j+n] == v[n-1] {
|
||||
qhat = _B - 1
|
||||
} else {
|
||||
qhat := Word(_M)
|
||||
if u[j+n] != v[n-1] {
|
||||
var rhat Word
|
||||
qhat, rhat = divWW_g(u[j+n], u[j+n-1], v[n-1])
|
||||
qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
|
||||
|
||||
// x1 | x2 = q̂v_{n-2}
|
||||
x1, x2 := mulWW_g(qhat, v[n-2])
|
||||
x1, x2 := mulWW(qhat, v[n-2])
|
||||
// test if q̂v_{n-2} > br̂ + u_{j+n-2}
|
||||
for greaterThan(x1, x2, rhat, u[j+n-2]) {
|
||||
qhat--
|
||||
|
@ -567,7 +571,7 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
|
|||
if rhat < prevRhat {
|
||||
break
|
||||
}
|
||||
x1, x2 = mulWW_g(qhat, v[n-2])
|
||||
x1, x2 = mulWW(qhat, v[n-2])
|
||||
}
|
||||
}
|
||||
|
||||
|
|
Loading…
Reference in a new issue