d3dx9: Save multiplications for better performance.

This commit is contained in:
Nozomi Kodama 2013-03-04 00:56:53 -10:00 committed by Alexandre Julliard
parent 9085f1d27f
commit 479195ea41

View file

@ -2378,60 +2378,70 @@ HRESULT WINAPI D3DXSHEvalConeLight(UINT order, const D3DXVECTOR3 *dir, FLOAT rad
FLOAT* WINAPI D3DXSHEvalDirection(FLOAT *out, UINT order, const D3DXVECTOR3 *dir) FLOAT* WINAPI D3DXSHEvalDirection(FLOAT *out, UINT order, const D3DXVECTOR3 *dir)
{ {
const FLOAT dirxx = dir->x * dir->x;
const FLOAT dirxy = dir->x * dir->y;
const FLOAT dirxz = dir->x * dir->z;
const FLOAT diryy = dir->y * dir->y;
const FLOAT diryz = dir->y * dir->z;
const FLOAT dirzz = dir->z * dir->z;
const FLOAT dirxxxx = dirxx * dirxx;
const FLOAT diryyyy = diryy * diryy;
const FLOAT dirzzzz = dirzz * dirzz;
const FLOAT dirxyxy = dirxy * dirxy;
TRACE("out %p, order %u, dir %p\n", out, order, dir); TRACE("out %p, order %u, dir %p\n", out, order, dir);
if ( (order < D3DXSH_MINORDER) || (order > D3DXSH_MAXORDER) ) if ((order < D3DXSH_MINORDER) || (order > D3DXSH_MAXORDER))
return out; return out;
out[0] = 0.5f / sqrt(D3DX_PI); out[0] = 0.5f / sqrt(D3DX_PI);
out[1] = -0.5f / sqrt(D3DX_PI / 3.0f) * dir->y; out[1] = -0.5f / sqrt(D3DX_PI / 3.0f) * dir->y;
out[2] = 0.5f / sqrt(D3DX_PI / 3.0f) * dir->z; out[2] = 0.5f / sqrt(D3DX_PI / 3.0f) * dir->z;
out[3] = -0.5f / sqrt(D3DX_PI / 3.0f) * dir->x; out[3] = -0.5f / sqrt(D3DX_PI / 3.0f) * dir->x;
if ( order == 2 ) if (order == 2)
return out; return out;
out[4] = 0.5f / sqrt(D3DX_PI / 15.0f) * dir->x * dir->y; out[4] = 0.5f / sqrt(D3DX_PI / 15.0f) * dirxy;
out[5] = -0.5f / sqrt(D3DX_PI / 15.0f) * dir->y * dir->z; out[5] = -0.5f / sqrt(D3DX_PI / 15.0f) * diryz;
out[6] = 0.25f / sqrt(D3DX_PI / 5.0f) * ( 3.0f * dir->z * dir->z - 1.0f ); out[6] = 0.25f / sqrt(D3DX_PI / 5.0f) * (3.0f * dirzz - 1.0f);
out[7] = -0.5f / sqrt(D3DX_PI / 15.0f) * dir->x * dir->z; out[7] = -0.5f / sqrt(D3DX_PI / 15.0f) * dirxz;
out[8] = 0.25f / sqrt(D3DX_PI / 15.0f) * ( dir->x * dir->x - dir->y * dir->y ); out[8] = 0.25f / sqrt(D3DX_PI / 15.0f) * (dirxx - diryy);
if ( order == 3 ) if (order == 3)
return out; return out;
out[9] = -sqrt(70.0f / D3DX_PI) / 8.0f * dir->y * (3.0f * dir->x * dir->x - dir->y * dir->y ); out[9] = -sqrt(70.0f / D3DX_PI) / 8.0f * dir->y * (3.0f * dirxx - diryy);
out[10] = sqrt(105.0f / D3DX_PI) / 2.0f * dir->x * dir->y * dir->z; out[10] = sqrt(105.0f / D3DX_PI) / 2.0f * dirxy * dir->z;
out[11] = -sqrt(42.0 / D3DX_PI) / 8.0f * dir->y * ( -1.0f + 5.0f * dir->z * dir->z ); out[11] = -sqrt(42.0 / D3DX_PI) / 8.0f * dir->y * (-1.0f + 5.0f * dirzz);
out[12] = sqrt(7.0f / D3DX_PI) / 4.0f * dir->z * ( 5.0f * dir->z * dir->z - 3.0f ); out[12] = sqrt(7.0f / D3DX_PI) / 4.0f * dir->z * (5.0f * dirzz - 3.0f);
out[13] = sqrt(42.0 / D3DX_PI) / 8.0f * dir->x * ( 1.0f - 5.0f * dir->z * dir->z ); out[13] = sqrt(42.0 / D3DX_PI) / 8.0f * dir->x * (1.0f - 5.0f * dirzz);
out[14] = sqrt(105.0f / D3DX_PI) / 4.0f * dir->z * ( dir->x * dir->x - dir->y * dir->y ); out[14] = sqrt(105.0f / D3DX_PI) / 4.0f * dir->z * (dirxx - diryy);
out[15] = -sqrt(70.0f / D3DX_PI) / 8.0f * dir->x * ( dir->x * dir->x - 3.0f * dir->y * dir->y ); out[15] = -sqrt(70.0f / D3DX_PI) / 8.0f * dir->x * (dirxx - 3.0f * diryy);
if ( order == 4 ) if (order == 4)
return out; return out;
out[16] = 0.75f * sqrt(35.0f / D3DX_PI) * dir->x * dir->y * (dir->x * dir->x - dir->y * dir->y ); out[16] = 0.75f * sqrt(35.0f / D3DX_PI) * dirxy * (dirxx - diryy);
out[17] = 3.0f * dir->z * out[9]; out[17] = 3.0f * dir->z * out[9];
out[18] = 0.75f * sqrt(5.0f / D3DX_PI) * dir->x * dir->y * ( 7.0f * dir->z * dir->z - 1.0f ); out[18] = 0.75f * sqrt(5.0f / D3DX_PI) * dirxy * (7.0f * dirzz - 1.0f);
out[19] = 0.375f * sqrt(10.0f / D3DX_PI) * dir->y * dir->z * ( 3.0f - 7.0f * dir->z * dir->z ); out[19] = 0.375f * sqrt(10.0f / D3DX_PI) * diryz * (3.0f - 7.0f * dirzz);
out[20] = 3.0f / ( 16.0f * sqrt(D3DX_PI) ) * ( 35.0f * dir->z * dir->z * dir->z * dir->z - 30.f * dir->z * dir->z + 3.0f ); out[20] = 3.0f / (16.0f * sqrt(D3DX_PI)) * (35.0f * dirzzzz - 30.f * dirzz + 3.0f);
out[21] = 0.375f * sqrt(10.0f / D3DX_PI) * dir->x * dir->z * ( 3.0f - 7.0f * dir->z * dir->z ); out[21] = 0.375f * sqrt(10.0f / D3DX_PI) * dirxz * (3.0f - 7.0f * dirzz);
out[22] = 0.375f * sqrt(5.0f / D3DX_PI) * ( dir->x * dir->x - dir->y * dir->y ) * ( 7.0f * dir->z * dir->z - 1.0f); out[22] = 0.375f * sqrt(5.0f / D3DX_PI) * (dirxx - diryy) * (7.0f * dirzz - 1.0f);
out[23] = 3.0 * dir->z * out[15]; out[23] = 3.0 * dir->z * out[15];
out[24] = 3.0f / 16.0f * sqrt(35.0f / D3DX_PI) * ( dir->x * dir->x * dir->x * dir->x- 6.0f * dir->x * dir->x * dir->y * dir->y + dir->y * dir->y * dir->y * dir->y ); out[24] = 3.0f / 16.0f * sqrt(35.0f / D3DX_PI) * (dirxxxx - 6.0f * dirxyxy + diryyyy);
if ( order == 5 ) if (order == 5)
return out; return out;
out[25] = -3.0f/ 32.0f * sqrt(154.0f / D3DX_PI) * dir->y * ( 5.0f * dir->x * dir->x * dir->x * dir->x - 10.0f * dir->x * dir->x * dir->y * dir->y + dir->y * dir->y * dir->y * dir->y ); out[25] = -3.0f/ 32.0f * sqrt(154.0f / D3DX_PI) * dir->y * (5.0f * dirxxxx - 10.0f * dirxyxy + diryyyy);
out[26] = 0.75f * sqrt(385.0f / D3DX_PI) * dir->x * dir->y * dir->z * ( dir->x * dir->x - dir->y * dir->y ); out[26] = 0.75f * sqrt(385.0f / D3DX_PI) * dirxy * dir->z * (dirxx - diryy);
out[27] = sqrt(770.0f / D3DX_PI) / 32.0f * dir->y * ( 3.0f * dir->x * dir->x - dir->y * dir->y ) * ( 1.0f - 9.0f * dir->z * dir->z ); out[27] = sqrt(770.0f / D3DX_PI) / 32.0f * dir->y * (3.0f * dirxx - diryy) * (1.0f - 9.0f * dirzz);
out[28] = sqrt(1155.0f / D3DX_PI) / 4.0f * dir->x * dir->y * dir->z * ( 3.0f * dir->z * dir->z - 1.0f); out[28] = sqrt(1155.0f / D3DX_PI) / 4.0f * dirxy * dir->z * (3.0f * dirzz - 1.0f);
out[29] = sqrt(165.0f / D3DX_PI) / 16.0f * dir->y * ( 14.0f * dir->z * dir->z - 21.0f * dir->z * dir->z * dir->z * dir->z - 1.0f ); out[29] = sqrt(165.0f / D3DX_PI) / 16.0f * dir->y * (14.0f * dirzz - 21.0f * dirzzzz - 1.0f);
out[30] = sqrt(11.0f / D3DX_PI) / 16.0f * dir->z * ( 63.0f * dir->z * dir->z * dir->z * dir->z - 70.0f * dir->z * dir->z + 15.0f ); out[30] = sqrt(11.0f / D3DX_PI) / 16.0f * dir->z * (63.0f * dirzzzz - 70.0f * dirzz + 15.0f);
out[31] = sqrt(165.0f / D3DX_PI) / 16.0f * dir->x * ( 14.0f * dir->z * dir->z - 21.0f * dir->z * dir->z * dir->z * dir->z - 1.0f ); out[31] = sqrt(165.0f / D3DX_PI) / 16.0f * dir->x * (14.0f * dirzz - 21.0f * dirzzzz - 1.0f);
out[32] = sqrt(1155.0f / D3DX_PI) / 8.0f * dir->z * ( dir->x * dir->x - dir->y * dir->y ) * ( 3.0f * dir->z * dir->z - 1.0f ); out[32] = sqrt(1155.0f / D3DX_PI) / 8.0f * dir->z * (dirxx - diryy) * (3.0f * dirzz - 1.0f);
out[33] = sqrt(770.0f / D3DX_PI) / 32.0f * dir->x * ( dir->x * dir->x - 3.0f * dir->y * dir->y ) * ( 1.0f - 9.0f * dir->z * dir->z ); out[33] = sqrt(770.0f / D3DX_PI) / 32.0f * dir->x * (dirxx - 3.0f * diryy) * (1.0f - 9.0f * dirzz);
out[34] = 3.0f / 16.0f * sqrt(385.0f / D3DX_PI) * dir->z * ( dir->x * dir->x * dir->x * dir->x - 6.0 * dir->x * dir->x * dir->y * dir->y + dir->y * dir->y * dir->y * dir->y ); out[34] = 3.0f / 16.0f * sqrt(385.0f / D3DX_PI) * dir->z * (dirxxxx - 6.0 * dirxyxy + diryyyy);
out[35] = -3.0f/ 32.0f * sqrt(154.0f / D3DX_PI) * dir->x * ( dir->x * dir->x * dir->x * dir->x - 10.0f * dir->x * dir->x * dir->y * dir->y + 5.0f * dir->y * dir->y * dir->y * dir->y ); out[35] = -3.0f/ 32.0f * sqrt(154.0f / D3DX_PI) * dir->x * (dirxxxx - 10.0f * dirxyxy + 5.0f * diryyyy);
return out; return out;
} }