- add S3TC support to BltFast (fixes NOLF)

- proper surface unlocking in case of error in the Blt case
This commit is contained in:
Lionel Ulmer 2004-08-02 22:21:30 +00:00 committed by Alexandre Julliard
parent 1866dfbe16
commit fbf003dd39

View file

@ -359,26 +359,132 @@ static HRESULT _Blt_ColorFill(
return DD_OK;
}
void ComputeShifts(DWORD mask, DWORD* lshift, DWORD* rshift)
static void ComputeShifts(DWORD mask, DWORD* lshift, DWORD* rshift)
{
int pos = 0;
int bits = 0;
*lshift = 0;
*rshift = 0;
if (!mask)
return;
while(!(mask & (1 << pos)))
pos++;
while(mask & (1 << (pos+bits)))
bits++;
*lshift = pos;
*rshift = 8 - bits;
int pos = 0;
int bits = 0;
*lshift = 0;
*rshift = 0;
if (!mask)
return;
while(!(mask & (1 << pos)))
pos++;
while(mask & (1 << (pos+bits)))
bits++;
*lshift = pos;
*rshift = 8 - bits;
}
/* This is used to factorize the decompression between the Blt and BltFast code */
static void DoDXTCDecompression(const DDSURFACEDESC2 *sdesc, const DDSURFACEDESC2 *ddesc)
{
DWORD rs,rb,rm;
DWORD gs,gb,gm;
DWORD bs,bb,bm;
DWORD as,ab,am;
if (!s3tc_initialized) {
/* FIXME: We may fake this by rendering the texture into the framebuffer using OpenGL functions and reading back
* the framebuffer. This will be slow and somewhat ugly. */
FIXME("Manual S3TC decompression is not supported in native mode\n");
return;
}
rm = ddesc->u4.ddpfPixelFormat.u2.dwRBitMask;
ComputeShifts(rm, &rs, &rb);
gm = ddesc->u4.ddpfPixelFormat.u3.dwGBitMask;
ComputeShifts(gm, &gs, &gb);
bm = ddesc->u4.ddpfPixelFormat.u4.dwBBitMask;
ComputeShifts(bm, &bs, &bb);
am = ddesc->u4.ddpfPixelFormat.u5.dwRGBAlphaBitMask;
ComputeShifts(am, &as, &ab);
if (sdesc->u4.ddpfPixelFormat.dwFourCC == MAKE_FOURCC('D','X','T','1')) {
int is16 = ddesc->u4.ddpfPixelFormat.u1.dwRGBBitCount == 16;
int pitch = ddesc->u1.lPitch;
int width = ddesc->dwWidth;
int height = ddesc->dwHeight;
int x,y;
char* dst = (char*) ddesc->lpSurface;
char* src = (char*) sdesc->lpSurface;
for (x = 0; x < width; x++)
for (y =0; y < height; y++) {
DWORD pixel = 0;
BYTE data[4];
(*fetch_2d_texel_rgba_dxt1)(width, src, x, y, data);
pixel = 0;
pixel |= ((data[0] >> rb) << rs) & rm;
pixel |= ((data[1] >> gb) << gs) & gm;
pixel |= ((data[2] >> bb) << bs) & bm;
pixel |= ((data[3] >> ab) << as) & am;
if (is16)
*((WORD*)(dst+y*pitch+x*(is16?2:4))) = pixel;
else
*((DWORD*)(dst+y*pitch+x*(is16?2:4))) = pixel;
}
} else if (sdesc->u4.ddpfPixelFormat.dwFourCC == MAKE_FOURCC('D','X','T','3')) {
int is16 = ddesc->u4.ddpfPixelFormat.u1.dwRGBBitCount == 16;
int pitch = ddesc->u1.lPitch;
int width = ddesc->dwWidth;
int height = ddesc->dwHeight;
int x,y;
char* dst = (char*) ddesc->lpSurface;
char* src = (char*) sdesc->lpSurface;
for (x = 0; x < width; x++)
for (y =0; y < height; y++) {
DWORD pixel = 0;
BYTE data[4];
(*fetch_2d_texel_rgba_dxt3)(width, src, x, y, data);
pixel = 0;
pixel |= ((data[0] >> rb) << rs) & rm;
pixel |= ((data[1] >> gb) << gs) & gm;
pixel |= ((data[2] >> bb) << bs) & bm;
pixel |= ((data[3] >> ab) << as) & am;
if (is16)
*((WORD*)(dst+y*pitch+x*(is16?2:4))) = pixel;
else
*((DWORD*)(dst+y*pitch+x*(is16?2:4))) = pixel;
}
} else if (sdesc->u4.ddpfPixelFormat.dwFourCC == MAKE_FOURCC('D','X','T','5')) {
int is16 = ddesc->u4.ddpfPixelFormat.u1.dwRGBBitCount == 16;
int pitch = ddesc->u1.lPitch;
int width = ddesc->dwWidth;
int height = ddesc->dwHeight;
int x,y;
char* dst = (char*) ddesc->lpSurface;
char* src = (char*) sdesc->lpSurface;
for (x = 0; x < width; x++)
for (y =0; y < height; y++) {
DWORD pixel = 0;
BYTE data[4];
(*fetch_2d_texel_rgba_dxt5)(width, src, x, y, data);
pixel = 0;
pixel |= ((data[0] >> rb) << rs) & rm;
pixel |= ((data[1] >> gb) << gs) & gm;
pixel |= ((data[2] >> bb) << bs) & bm;
pixel |= ((data[3] >> ab) << as) & am;
if (is16)
*((WORD*)(dst+y*pitch+x*(is16?2:4))) = pixel;
else
*((DWORD*)(dst+y*pitch+x*(is16?2:4))) = pixel;
}
}
#if 0 /* Usefull for debugging */
{
static int idx;
char texname[255];
FILE* f;
sprintf(texname, "dxt_%d.pnm", idx++);
f = fopen(texname,"w");
DDRAW_dump_surface_to_disk(This, f, 1);
fclose(f);
}
#endif
}
HRESULT WINAPI
DIB_DirectDrawSurface_Blt(LPDIRECTDRAWSURFACE7 iface, LPRECT rdst,
LPDIRECTDRAWSURFACE7 src, LPRECT rsrc,
@ -391,7 +497,7 @@ DIB_DirectDrawSurface_Blt(LPDIRECTDRAWSURFACE7 iface, LPRECT rdst,
int bpp, srcheight, srcwidth, dstheight, dstwidth, width;
int x, y;
LPBYTE dbuf, sbuf;
TRACE("(%p)->(%p,%p,%p,%08lx,%p)\n", This,rdst,src,rsrc,dwFlags,lpbltfx);
if (TRACE_ON(ddraw)) {
@ -422,7 +528,8 @@ DIB_DirectDrawSurface_Blt(LPDIRECTDRAWSURFACE7 iface, LPRECT rdst,
(ddesc.u4.ddpfPixelFormat.dwFlags & DDPF_FOURCC)) {
if (sdesc.u4.ddpfPixelFormat.dwFourCC != sdesc.u4.ddpfPixelFormat.dwFourCC) {
FIXME("FOURCC->FOURCC copy only supported for the same type of surface\n");
return DDERR_INVALIDPIXELFORMAT;
ret = DDERR_INVALIDPIXELFORMAT;
goto release;
}
memcpy(ddesc.lpSurface, sdesc.lpSurface, ddesc.u1.dwLinearSize);
goto release;
@ -430,107 +537,7 @@ DIB_DirectDrawSurface_Blt(LPDIRECTDRAWSURFACE7 iface, LPRECT rdst,
if ((sdesc.u4.ddpfPixelFormat.dwFlags & DDPF_FOURCC) &&
(!(ddesc.u4.ddpfPixelFormat.dwFlags & DDPF_FOURCC))) {
DWORD rs,rb,rm;
DWORD gs,gb,gm;
DWORD bs,bb,bm;
DWORD as,ab,am;
if (!s3tc_initialized) {
/* FIXME: We may fake this by rendering the texture into the framebuffer using OpenGL functions and reading back
* the framebuffer. This will be slow and somewhat ugly. */
FIXME("Manual S3TC decompression is not supported in native mode\n");
goto release;
}
rm = ddesc.u4.ddpfPixelFormat.u2.dwRBitMask;
ComputeShifts(rm, &rs, &rb);
gm = ddesc.u4.ddpfPixelFormat.u3.dwGBitMask;
ComputeShifts(gm, &gs, &gb);
bm = ddesc.u4.ddpfPixelFormat.u4.dwBBitMask;
ComputeShifts(bm, &bs, &bb);
am = ddesc.u4.ddpfPixelFormat.u5.dwRGBAlphaBitMask;
ComputeShifts(am, &as, &ab);
if (sdesc.u4.ddpfPixelFormat.dwFourCC == MAKE_FOURCC('D','X','T','1')) {
int is16 = ddesc.u4.ddpfPixelFormat.u1.dwRGBBitCount == 16;
int pitch = ddesc.u1.lPitch;
int width = ddesc.dwWidth;
int height = ddesc.dwHeight;
int x,y;
char* dst = (char*) ddesc.lpSurface;
char* src = (char*) sdesc.lpSurface;
for (x = 0; x < width; x++)
for (y =0; y < height; y++) {
DWORD pixel = 0;
BYTE data[4];
(*fetch_2d_texel_rgba_dxt1)(width, src, x, y, data);
pixel = 0;
pixel |= ((data[0] >> rb) << rs) & rm;
pixel |= ((data[1] >> gb) << gs) & gm;
pixel |= ((data[2] >> bb) << bs) & bm;
pixel |= ((data[3] >> ab) << as) & am;
if (is16)
*((WORD*)(dst+y*pitch+x*(is16?2:4))) = pixel;
else
*((DWORD*)(dst+y*pitch+x*(is16?2:4))) = pixel;
}
} else if (sdesc.u4.ddpfPixelFormat.dwFourCC == MAKE_FOURCC('D','X','T','3')) {
int is16 = ddesc.u4.ddpfPixelFormat.u1.dwRGBBitCount == 16;
int pitch = ddesc.u1.lPitch;
int width = ddesc.dwWidth;
int height = ddesc.dwHeight;
int x,y;
char* dst = (char*) ddesc.lpSurface;
char* src = (char*) sdesc.lpSurface;
for (x = 0; x < width; x++)
for (y =0; y < height; y++) {
DWORD pixel = 0;
BYTE data[4];
(*fetch_2d_texel_rgba_dxt3)(width, src, x, y, data);
pixel = 0;
pixel |= ((data[0] >> rb) << rs) & rm;
pixel |= ((data[1] >> gb) << gs) & gm;
pixel |= ((data[2] >> bb) << bs) & bm;
pixel |= ((data[3] >> ab) << as) & am;
if (is16)
*((WORD*)(dst+y*pitch+x*(is16?2:4))) = pixel;
else
*((DWORD*)(dst+y*pitch+x*(is16?2:4))) = pixel;
}
} else if (sdesc.u4.ddpfPixelFormat.dwFourCC == MAKE_FOURCC('D','X','T','5')) {
int is16 = ddesc.u4.ddpfPixelFormat.u1.dwRGBBitCount == 16;
int pitch = ddesc.u1.lPitch;
int width = ddesc.dwWidth;
int height = ddesc.dwHeight;
int x,y;
char* dst = (char*) ddesc.lpSurface;
char* src = (char*) sdesc.lpSurface;
for (x = 0; x < width; x++)
for (y =0; y < height; y++) {
DWORD pixel = 0;
BYTE data[4];
(*fetch_2d_texel_rgba_dxt5)(width, src, x, y, data);
pixel = 0;
pixel |= ((data[0] >> rb) << rs) & rm;
pixel |= ((data[1] >> gb) << gs) & gm;
pixel |= ((data[2] >> bb) << bs) & bm;
pixel |= ((data[3] >> ab) << as) & am;
if (is16)
*((WORD*)(dst+y*pitch+x*(is16?2:4))) = pixel;
else
*((DWORD*)(dst+y*pitch+x*(is16?2:4))) = pixel;
}
}
#if 0 /* Usefull for debugging */
{
static int idx;
char texname[255];
FILE* f;
sprintf(texname, "dxt_%d.pnm", idx++);
f = fopen(texname,"w");
DDRAW_dump_surface_to_disk(This, f, 1);
fclose(f);
}
#endif
DoDXTCDecompression(&sdesc, &ddesc);
goto release;
}
@ -984,14 +991,6 @@ DIB_DirectDrawSurface_BltFast(LPDIRECTDRAWSURFACE7 iface, DWORD dstx,
ddesc = This->surface_desc;
sdesc = (ICOM_OBJECT(IDirectDrawSurfaceImpl, IDirectDrawSurface7, src))->surface_desc;
if ((sdesc.u4.ddpfPixelFormat.dwFlags & DDPF_FOURCC) && (ddesc.u4.ddpfPixelFormat.dwFlags & DDPF_FOURCC)) {
if (trans)
FIXME("trans arg not supported when a FOURCC surface is involved\n");
if (dstx || dsty)
FIXME("offset for destination surface is not supported\n");
DIB_DirectDrawSurface_Blt(iface, NULL, src, rsrc, 0, NULL);
}
if (!rsrc) {
WARN("rsrc is NULL!\n");
rsrc = &rsrc2;
@ -1027,6 +1026,26 @@ DIB_DirectDrawSurface_BltFast(LPDIRECTDRAWSURFACE7 iface, DWORD dstx,
ddesc.dwSize = sizeof(ddesc);
IDirectDrawSurface7_Lock(iface, &lock_dst, &ddesc, DDLOCK_WRITEONLY, 0);
/* Handle first the FOURCC surfaces... */
if ((sdesc.u4.ddpfPixelFormat.dwFlags & DDPF_FOURCC) && (ddesc.u4.ddpfPixelFormat.dwFlags & DDPF_FOURCC)) {
if (trans)
FIXME("trans arg not supported when a FOURCC surface is involved\n");
if (dstx || dsty)
FIXME("offset for destination surface is not supported\n");
if (sdesc.u4.ddpfPixelFormat.dwFourCC != sdesc.u4.ddpfPixelFormat.dwFourCC) {
FIXME("FOURCC->FOURCC copy only supported for the same type of surface\n");
ret = DDERR_INVALIDPIXELFORMAT;
goto error;
}
memcpy(ddesc.lpSurface, sdesc.lpSurface, ddesc.u1.dwLinearSize);
goto error;
}
if ((sdesc.u4.ddpfPixelFormat.dwFlags & DDPF_FOURCC) &&
(!(ddesc.u4.ddpfPixelFormat.dwFlags & DDPF_FOURCC))) {
DoDXTCDecompression(&sdesc, &ddesc);
goto error;
}
bpp = GET_BPP(This->surface_desc);
sbuf = (BYTE *) sdesc.lpSurface;
dbuf = (BYTE *) ddesc.lpSurface;