wined3d: sincos for vertex shaders.

SCS is unfortunately a fragment program only instruction. If we have the NV
extensions we can use SIN and COS. Otherwise we have to approximate sine and
cosine with a taylor series. Luckily we're provided with the necessary
constants by the application.
This commit is contained in:
Stefan Dösinger 2009-05-12 20:11:50 +02:00 committed by Alexandre Julliard
parent 2f3faf4526
commit 6492622350
2 changed files with 167 additions and 5 deletions

View file

@ -10111,6 +10111,82 @@ static void alphatest_test(IDirect3DDevice9 *device) {
ok(hr == D3D_OK, "IDirect3DDevice9_SetPixelShader failed with 0x%08x\n", hr);
}
static void sincos_test(IDirect3DDevice9 *device) {
const DWORD sin_shader_code[] = {
0xfffe0200, /* vs_2_0 */
0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */
0x05000051, 0xa00f0002, 0x40490fdb, 0x3f800000, 0x00000000, 0x3f59999a, /* def c2, 3.14159, 1, 0, 0.85 */
0x03000005, 0x80010001, 0x90000000, 0xa0000002, /* mul r1.x, v0.x, c2.x */
0x04000025, 0x80020000, 0x80000001, 0xa0e40000, 0xa0e40001, /* sincos r0.y, r1.x, c0, c1 */
0x02000001, 0xc00d0000, 0x90e40000, /* mov oPos.xzw, v0 */
0x03000005, 0xc0020000, 0x80550000, 0xa0ff0002, /* mul oPos.y, r0.y, c2.w */
0x02000001, 0xd00f0000, 0xa0a60002, /* mov oD0, c2.zyzz */
0x0000ffff /* end */
};
const DWORD cos_shader_code[] = {
0xfffe0200, /* vs_2_0 */
0x0200001f, 0x80000000, 0x900f0000, /* dcl_position v0 */
0x05000051, 0xa00f0002, 0x40490fdb, 0x3f800000, 0x00000000, 0x3f59999a, /* def c2, 3.14159, 1, 0, 0.85 */
0x03000005, 0x80010001, 0x90000000, 0xa0000002, /* mul r1.x, v0.x, c2.x */
0x04000025, 0x80010000, 0x80000001, 0xa0e40000, 0xa0e40001, /* sincos r0.x, r1.x, c0, c1 */
0x02000001, 0xc00d0000, 0x90e40000, /* mov oPos.xzw, v0 */
0x03000005, 0xc0020000, 0x80000000, 0xa0ff0002, /* mul oPos.y, r0.x, c2.w */
0x02000001, 0xd00f0000, 0xa0a90002, /* mov oD0, c2.yzzz */
0x0000ffff /* end */
};
IDirect3DVertexShader9 *sin_shader, *cos_shader;
HRESULT hr;
struct {
float x, y, z;
} data[1280];
unsigned int i;
float sincosc1[4] = {D3DSINCOSCONST1};
float sincosc2[4] = {D3DSINCOSCONST2};
hr = IDirect3DDevice9_Clear(device, 0, NULL, D3DCLEAR_TARGET | D3DCLEAR_ZBUFFER, 0x00000000, 1.0f, 0);
ok(hr == D3D_OK, "IDirect3DDevice9_Clear failed with 0x%08x\n", hr);
hr = IDirect3DDevice9_CreateVertexShader(device, sin_shader_code, &sin_shader);
ok(hr == D3D_OK, "IDirect3DDevice9_Clear failed with 0x%08x\n", hr);
hr = IDirect3DDevice9_CreateVertexShader(device, cos_shader_code, &cos_shader);
ok(hr == D3D_OK, "IDirect3DDevice9_Clear failed with 0x%08x\n", hr);
hr = IDirect3DDevice9_SetFVF(device, D3DFVF_XYZ);
ok(hr == D3D_OK, "IDirect3DDevice9_SetFVF failed with 0x%08x\n", hr);
hr = IDirect3DDevice9_SetVertexShaderConstantF(device, 0, sincosc1, 1);
ok(hr == D3D_OK, "IDirect3DDevice9_SetVertexShaderConstantF failed with 0x%08x\n", hr);
hr = IDirect3DDevice9_SetVertexShaderConstantF(device, 1, sincosc2, 1);
ok(hr == D3D_OK, "IDirect3DDevice9_SetVertexShaderConstantF failed with 0x%08x\n", hr);
/* Generate a point from -1 to 1 every 0.5 pixels */
for(i = 0; i < 1280; i++) {
data[i].x = (-640.0 + i) / 640.0;
data[i].y = 0.0;
data[i].z = 0.1;
}
hr = IDirect3DDevice9_BeginScene(device);
if(SUCCEEDED(hr)) {
hr = IDirect3DDevice9_SetVertexShader(device, sin_shader);
ok(hr == D3D_OK, "IDirect3DDevice9_SetVertexShader failed with 0x%08x\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_POINTLIST, 1280, data, sizeof(*data));
ok(hr == D3D_OK, "IDirect3DDevice9_DrawPrimitiveUP failed with 0x%08x\n", hr);
hr = IDirect3DDevice9_SetVertexShader(device, cos_shader);
ok(hr == D3D_OK, "IDirect3DDevice9_SetVertexShader failed with 0x%08x\n", hr);
hr = IDirect3DDevice9_DrawPrimitiveUP(device, D3DPT_POINTLIST, 1280, data, sizeof(*data));
ok(hr == D3D_OK, "IDirect3DDevice9_DrawPrimitiveUP failed with 0x%08x\n", hr);
hr = IDirect3DDevice9_EndScene(device);
ok(hr == D3D_OK, "IDirect3DDevice9_EndScene failed with 0x%08x\n", hr);
}
hr = IDirect3DDevice9_Present(device, NULL, NULL, NULL, NULL);
/* TODO: Find a way to properly validate the lines. Precicion issues make this a kinda nasty task */
IDirect3DDevice9_SetVertexShader(device, NULL);
IDirect3DVertexShader9_Release(sin_shader);
IDirect3DVertexShader9_Release(cos_shader);
}
START_TEST(visual)
{
IDirect3DDevice9 *device_ptr;
@ -10220,6 +10296,7 @@ START_TEST(visual)
if (caps.VertexShaderVersion >= D3DVS_VERSION(2, 0))
{
test_mova(device_ptr);
sincos_test(device_ptr);
if (caps.VertexShaderVersion >= D3DVS_VERSION(3, 0)) {
test_vshader_input(device_ptr);
test_vshader_float16(device_ptr);

View file

@ -1725,13 +1725,98 @@ static void shader_hw_sincos(const struct wined3d_shader_instruction *ins)
* can't use map2gl
*/
SHADER_BUFFER *buffer = ins->ctx->buffer;
struct shader_arb_ctx_priv *priv = ins->ctx->backend_data;
const struct wined3d_shader_dst_param *dst = &ins->dst[0];
char dst_name[50];
char src_name[50];
char src_name0[50], src_name1[50], src_name2[50];
BOOL is_color;
shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
shader_arb_get_src_param(ins, &ins->src[0], 0, src_name);
shader_addline(buffer, "SCS%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name,
src_name);
shader_arb_get_src_param(ins, &ins->src[0], 0, src_name0);
if(shader_is_pshader_version(ins->ctx->reg_maps->shader_version.type)) {
shader_arb_get_dst_param(ins, &ins->dst[0], dst_name);
shader_addline(buffer, "SCS%s %s, %s;\n", shader_arb_get_modifier(ins), dst_name,
src_name0);
} else if(priv->target_version >= NV2) {
shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color);
/* Sincos writemask must be .x, .y or .xy */
if(dst->write_mask & WINED3DSP_WRITEMASK_0)
shader_addline(buffer, "COS%s %s.x, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0);
if(dst->write_mask & WINED3DSP_WRITEMASK_1)
shader_addline(buffer, "SIN%s %s.y, %s;\n", shader_arb_get_modifier(ins), dst_name, src_name0);
} else {
/* Approximate sine and cosine with a taylor series, as per math textbook. The application passes 8
* helper constants(D3DSINCOSCONST1 and D3DSINCOSCONST2) in src1 and src2.
*
* sin(x) = x - x^3/3! + x^5/5! - x^7/7! + ...
* cos(x) = 1 - x^2/2! + x^4/4! - x^6/6! + ...
*
* The constants we get are:
*
* +1 +1, -1 -1 +1 +1 -1 -1
* ---- , ---- , ---- , ----- , ----- , ----- , ------
* 1!*2 2!*4 3!*8 4!*16 5!*32 6!*64 7!*128
*
* If used with x^2, x^3, x^4 etc they calculate sin(x/2) and cos(x/2):
*
* (x/2)^2 = x^2 / 4
* (x/2)^3 = x^3 / 8
* (x/2)^4 = x^4 / 16
* (x/2)^5 = x^5 / 32
* etc
*
* To get the final result:
* sin(x) = 2 * sin(x/2) * cos(x/2)
* cos(x) = cos(x/2)^2 - sin(x/2)^2
* (from sin(x+y) and cos(x+y) rules)
*
* As per MSDN, dst.z is undefined after the operation, and so is
* dst.x and dst.y if they're masked out by the writemask. Ie
* sincos dst.y, src1, c0, c1
* returns the sine in dst.y. dst.x and dst.z are undefined, dst.w is not touched. The assembler
* vsa.exe also stops with an error if the dest register is the same register as the source
* register. This means we can use dest.xyz as temporary storage. The assembler vsa.exe output also
* indicates that sincos consumes 8 instruction slots in vs_2_0(and, strangely, in vs_3_0).
*/
shader_arb_get_src_param(ins, &ins->src[1], 1, src_name1);
shader_arb_get_src_param(ins, &ins->src[2], 2, src_name2);
shader_arb_get_register_name(ins, &dst->reg, dst_name, &is_color);
shader_addline(buffer, "MUL %s.x, %s, %s;\n", dst_name, src_name0, src_name0); /* x ^ 2 */
shader_addline(buffer, "MUL TA.y, %s.x, %s;\n", dst_name, src_name0); /* x ^ 3 */
shader_addline(buffer, "MUL %s.y, TA.y, %s;\n", dst_name, src_name0); /* x ^ 4 */
shader_addline(buffer, "MUL TA.z, %s.y, %s;\n", dst_name, src_name0); /* x ^ 5 */
shader_addline(buffer, "MUL %s.z, TA.z, %s;\n", dst_name, src_name0); /* x ^ 6 */
shader_addline(buffer, "MUL TA.w, %s.z, %s;\n", dst_name, src_name0); /* x ^ 7 */
/* sin(x/2)
*
* Unfortunately we don't get the constants in a DP4-capable form. Is there a way to
* properly merge that with MULs in the code above?
* The swizzles .yz and xw however fit into the .yzxw swizzle added to ps_2_0. Maybe
* we can merge the sine and cosine MAD rows to calculate them together.
*/
shader_addline(buffer, "MUL TA.x, %s, %s.w;\n", src_name0, src_name2); /* x^1, +1/(1!*2) */
shader_addline(buffer, "MAD TA.x, TA.y, %s.x, TA.x;\n", src_name2); /* -1/(3!*8) */
shader_addline(buffer, "MAD TA.x, TA.z, %s.w, TA.x;\n", src_name1); /* +1/(5!*32) */
shader_addline(buffer, "MAD TA.x, TA.w, %s.x, TA.x;\n", src_name1); /* -1/(7!*128) */
/* cos(x/2) */
shader_addline(buffer, "MAD TA.y, %s.x, %s.y, %s.z;\n", dst_name, src_name2, src_name2); /* -1/(2!*4), +1.0 */
shader_addline(buffer, "MAD TA.y, %s.y, %s.z, TA.y;\n", dst_name, src_name1); /* +1/(4!*16) */
shader_addline(buffer, "MAD TA.y, %s.z, %s.y, TA.y;\n", dst_name, src_name1); /* -1/(6!*64) */
if(dst->write_mask & WINED3DSP_WRITEMASK_0) {
/* cos x */
shader_addline(buffer, "MUL TA.z, TA.y, TA.y;\n");
shader_addline(buffer, "MAD %s.x, -TA.x, TA.x, TA.z;\n", dst_name);
}
if(dst->write_mask & WINED3DSP_WRITEMASK_1) {
/* sin x */
shader_addline(buffer, "MUL %s.y, TA.x, TA.y;\n", dst_name);
shader_addline(buffer, "ADD %s.y, %s.y, %s.y;\n", dst_name, dst_name, dst_name);
}
}
}
/* GL locking is done by the caller */