big: potential bug fix, cleanups

- implemented setWord, use it where setUint64 is wrong
- divLarge: use fast mulWW, divWW; implemented mulWW, divWW
- better assembly code for addMulVVW

R=rsc
CC=golang-dev
https://golang.org/cl/1258042
This commit is contained in:
Robert Griesemer 2010-05-19 09:36:50 -07:00
parent 5b8117b98a
commit a688eb6ee4
4 changed files with 61 additions and 19 deletions

View file

@ -56,6 +56,7 @@ func subWW_g(x, y, c Word) (z1, z0 Word) {
// z1<<_W + z0 = x*y
func mulWW(x, y Word) (z1, z0 Word)
func mulWW_g(x, y Word) (z1, z0 Word) {
// Split x and y into 2 halfWords each, multiply
// the halfWords separately while avoiding overflow,
@ -242,6 +243,7 @@ func leadingZeros(x Word) uint {
// q = (x1<<_W + x0 - r)/y
func divWW(x1, x0, y Word) (q, r Word)
func divWW_g(x1, x0, y Word) (q, r Word) {
if x1 == 0 {
q, r = x0/y, x0%y

View file

@ -5,6 +5,25 @@
// This file provides fast assembly versions for the elementary
// arithmetic operations on vectors implemented in arith.go.
// func mulWW(x, y Word) (z1, z0 Word)
TEXT ·mulWW(SB),7,$0
MOVL x+0(FP), AX
MULL y+4(FP)
MOVL DX, z1+8(FP)
MOVL AX, z0+12(FP)
RET
// func divWW(x1, x0, y Word) (q, r Word)
TEXT ·divWW(SB),7,$0
MOVL x1+0(FP), DX
MOVL x0+4(FP), AX
DIVL y+8(FP)
MOVL AX, q+12(FP)
MOVL DX, r+16(FP)
RET
// func addVV(z, x, y []Word) (c Word)
TEXT ·addVV(SB),7,$0
MOVL z+0(FP), DI
@ -212,11 +231,10 @@ TEXT ·addMulVVW(SB),7,$0
L6: MOVL (SI)(BX*4), AX
MULL BP
ADDL (DI)(BX*4), AX
ADCL $0, DX
ADDL CX, AX
ADCL $0, DX
MOVL AX, (DI)(BX*4)
ADDL AX, (DI)(BX*4)
ADCL $0, DX
MOVL DX, CX
ADDL $1, BX // i++

View file

@ -7,6 +7,25 @@
// TODO(gri) - experiment with unrolled loops for faster execution
// func mulWW(x, y Word) (z1, z0 Word)
TEXT ·mulWW(SB),7,$0
MOVQ x+0(FP), AX
MULQ y+8(FP)
MOVQ DX, z1+16(FP)
MOVQ AX, z0+24(FP)
RET
// func divWW(x1, x0, y Word) (q, r Word)
TEXT ·divWW(SB),7,$0
MOVQ x1+0(FP), DX
MOVQ x0+8(FP), AX
DIVQ y+16(FP)
MOVQ AX, q+24(FP)
MOVQ DX, r+32(FP)
RET
// func addVV(z, x, y []Word) (c Word)
TEXT ·addVV(SB),7,$0
MOVQ z+0(FP), R10
@ -210,11 +229,10 @@ TEXT ·addMulVVW(SB),7,$0
L6: MOVQ (R8)(BX*8), AX
MULQ R9
ADDQ (R10)(BX*8), AX
ADCQ $0, DX
ADDQ CX, AX
ADCQ $0, DX
MOVQ AX, (R10)(BX*8)
ADDQ AX, (R10)(BX*8)
ADCQ $0, DX
MOVQ DX, CX
ADDL $1, BX // i++

View file

@ -69,16 +69,20 @@ func (z nat) make(n int) nat {
}
func (z nat) setUint64(x uint64) nat {
func (z nat) setWord(x Word) nat {
if x == 0 {
return z.make(0)
}
z = z.make(1)
z[0] = x
return z
}
func (z nat) setUint64(x uint64) nat {
// single-digit values
if x == uint64(Word(x)) {
z = z.make(1)
z[0] = Word(x)
return z
if w := Word(x); uint64(w) == x {
return z.setWord(w)
}
// compute number of words n required to represent x
@ -194,7 +198,7 @@ func (x nat) cmp(y nat) (r int) {
func (z nat) mulAddWW(x nat, y, r Word) nat {
m := len(x)
if m == 0 || y == 0 {
return z.setUint64(uint64(r)) // result is r
return z.setWord(r) // result is r
}
// m > 0
@ -529,6 +533,8 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
m := len(uIn) - n
// determine if z can be reused
// TODO(gri) should find a better solution - this if statement
// is very costly (see e.g. time pidigits -s -n 10000)
if alias(z, uIn) || alias(z, v) {
z = nil // z is an alias for uIn or v - cannot reuse
}
@ -549,15 +555,13 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
// D2.
for j := m; j >= 0; j-- {
// D3.
var qhat Word
if u[j+n] == v[n-1] {
qhat = _B - 1
} else {
qhat := Word(_M)
if u[j+n] != v[n-1] {
var rhat Word
qhat, rhat = divWW_g(u[j+n], u[j+n-1], v[n-1])
qhat, rhat = divWW(u[j+n], u[j+n-1], v[n-1])
// x1 | x2 = q̂v_{n-2}
x1, x2 := mulWW_g(qhat, v[n-2])
x1, x2 := mulWW(qhat, v[n-2])
// test if q̂v_{n-2} > br̂ + u_{j+n-2}
for greaterThan(x1, x2, rhat, u[j+n-2]) {
qhat--
@ -567,7 +571,7 @@ func (z nat) divLarge(u, uIn, v nat) (q, r nat) {
if rhat < prevRhat {
break
}
x1, x2 = mulWW_g(qhat, v[n-2])
x1, x2 = mulWW(qhat, v[n-2])
}
}