wined3d: Implement YUV emulation with GL_ARB_fragment_program.

2024-09-19 20:08:54 +00:00 · 2008-08-19 11:09:45 -05:00 · 2008-08-19 11:09:45 -05:00 · 1f4cf3513d
parent bfe7067973
commit 1f4cf3513d
3 changed files with 299 additions and 2 deletions
--- a/dlls/wined3d/arb_program_shader.c
+++ b/dlls/wined3d/arb_program_shader.c
@ -2982,3 +2982,296 @@ const struct fragment_pipeline arbfp_fragment_pipeline = {
    shader_arb_conv_supported,
    arbfp_fragmentstate_template
 };
+
+#define GLINFO_LOCATION device->adapter->gl_info
+
+struct arbfp_blit_priv {
+    GLenum yuy2_rect_shader, yuy2_2d_shader;
+    GLenum uyvy_rect_shader, uyvy_2d_shader;
+};
+
+static HRESULT arbfp_blit_alloc(IWineD3DDevice *iface) {
+    IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) iface;
+    device->blit_priv = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, sizeof(struct arbfp_blit_priv));
+    if(!device->blit_priv) {
+        ERR("Out of memory\n");
+        return E_OUTOFMEMORY;
+    }
+    return WINED3D_OK;
+}
+static void arbfp_blit_free(IWineD3DDevice *iface) {
+    IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) iface;
+    struct arbfp_blit_priv *priv = (struct arbfp_blit_priv *) device->blit_priv;
+
+    ENTER_GL();
+    GL_EXTCALL(glDeleteProgramsARB(1, &priv->yuy2_rect_shader));
+    GL_EXTCALL(glDeleteProgramsARB(1, &priv->yuy2_2d_shader));
+    GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_rect_shader));
+    GL_EXTCALL(glDeleteProgramsARB(1, &priv->uyvy_2d_shader));
+    checkGLcall("Delete yuv programs\n");
+    LEAVE_GL();
+}
+
+GLenum gen_yuv_shader(IWineD3DDeviceImpl *device, WINED3DFORMAT fmt, GLenum textype) {
+    GLenum shader;
+    SHADER_BUFFER buffer;
+    const char *tex, *texinstr;
+    char chroma, luminance;
+    struct arbfp_blit_priv *priv = (struct arbfp_blit_priv *) device->blit_priv;
+
+    /* Shader header */
+    buffer.bsize = 0;
+    buffer.lineNo = 0;
+    buffer.newline = TRUE;
+    buffer.buffer = HeapAlloc(GetProcessHeap(), HEAP_ZERO_MEMORY, SHADER_PGMSIZE);
+
+    switch(textype) {
+        case GL_TEXTURE_2D:             tex = "2D";     texinstr = "TXP"; break;
+        case GL_TEXTURE_RECTANGLE_ARB:  tex = "RECT";   texinstr = "TEX"; break;
+        default:
+            /* This is more tricky than just replacing the texture type - we have to navigate
+             * properly in the texture to find the correct chroma values
+             */
+            FIXME("Implement yuv correction for non-2d, non-rect textures\n");
+            return 0;
+    }
+
+    if(fmt == WINED3DFMT_UYVY) {
+        chroma = 'r';
+        luminance = 'a';
+    } else {
+        chroma = 'a';
+        luminance = 'r';
+    }
+
+    GL_EXTCALL(glGenProgramsARB(1, &shader));
+    checkGLcall("GL_EXTCALL(glGenProgramsARB(1, &shader))");
+    GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));
+    checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)");
+    if(!shader) return 0;
+
+    /* The YUY2 and UYVY formats contain two pixels packed into a 32 bit macropixel,
+     * giving effectively 16 bit per pixel. The color consists of a luminance(Y) and
+     * two chroma(U and V) values. Each macropixel has two luminance values, one for
+     * each single pixel it contains, and one U and one V value shared between both
+     * pixels.
+     *
+     * The data is loaded into an A8L8 texture. With YUY2, the luminance component
+     * contains the luminance and alpha the chroma. With UYVY it is vice versa. Thus
+     * take the format into account when generating the read swizzles
+     *
+     * Reading the Y value is streightforward - just sample the texture. The hardware
+     * takes care of filtering in the horizontal and vertical direction.
+     *
+     * Reading the U and V values is harder. We have to avoid filtering horizontally,
+     * because that would mix the U and V values of one pixel or two adjacent pixels.
+     * Thus floor the texture coordinate and add 0.5 to get an unfiltered read,
+     * regardless of the filtering setting. Vertical filtering works automatically
+     * though - the U and V values of two rows are mixed nicely.
+     *
+     * Appart of avoiding filtering issues, the code has to know which value it just
+     * read, and where it can find the other one. To determine this, it checks if
+     * it sampled an even or odd pixel, and shifts the 2nd read accordingly.
+     *
+     * Handling horizontal filtering of U and V values requires reading a 2nd pair
+     * of pixels, extracting U and V and mixing them. This is not implemented yet.
+     *
+     * An alternative implementation idea is to load the texture as A8R8G8B8 texture,
+     * with width / 2. This way one read gives all 3 values, finding U and V is easy
+     * in an unfiltered situation. Finding the luminance on the other hand requires
+     * finding out if it is an odd or even pixel. The real drawback of this approach
+     * is filtering. This would have to be emulated completely in the shader, reading
+     * up two 2 packed pixels in up to 2 rows and interpolating both horizontally and
+     * vertically. Beyond that it would require adjustments to the texture handling
+     * code to deal with the width scaling
+     */
+    shader_addline(&buffer, "!!ARBfp1.0\n");
+    shader_addline(&buffer, "TEMP luminance;\n");
+    shader_addline(&buffer, "TEMP temp;\n");
+    shader_addline(&buffer, "TEMP chroma;\n");
+    shader_addline(&buffer, "TEMP texcrd;\n");
+    shader_addline(&buffer, "TEMP texcrd2;\n");
+    shader_addline(&buffer, "PARAM coef = {1.0, 0.5, 2.0, 0.0};\n");
+    shader_addline(&buffer, "PARAM yuv_coef = {1.403, 0.344, 0.714, 1.770};\n");
+    shader_addline(&buffer, "PARAM size = program.local[0];\n");
+
+    /* First we have to read the chroma values. This means we need at least two pixels(no filtering),
+     * or 4 pixels(with filtering). To get the unmodified chromas, we have to rid ourselves of the
+     * filtering when we sample the texture.
+     *
+     * These are the rules for reading the chroma:
+     *
+     * Even pixel: Cr
+     * Even pixel: U
+     * Odd pixel: V
+     *
+     * So we have to get the sampling x position in non-normalized coordinates in integers
+     */
+    if(textype != GL_TEXTURE_RECTANGLE_ARB) {
+        shader_addline(&buffer, "MUL texcrd.rg, fragment.texcoord[0], size.x;\n");
+        shader_addline(&buffer, "MOV texcrd.a, size.x;\n");
+    } else {
+        shader_addline(&buffer, "MOV texcrd, fragment.texcoord[0];\n");
+    }
+    /* We must not allow filtering between pixel x and x+1, this would mix U and V
+     * Vertical filtering is ok. However, bear in mind that the pixel center is at
+     * 0.5, so add 0.5.
+     */
+    shader_addline(&buffer, "FLR texcrd.x, texcrd.x;\n");
+    shader_addline(&buffer, "ADD texcrd.x, texcrd.x, coef.y;\n");
+
+    /* Divide the x coordinate by 0.5 and get the fraction. This gives 0.25 and 0.75 for the
+     * even and odd pixels respectively
+     */
+    shader_addline(&buffer, "MUL texcrd2, texcrd, coef.y;\n");
+    shader_addline(&buffer, "FRC texcrd2, texcrd2;\n");
+
+    /* Sample Pixel 1 */
+    shader_addline(&buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex);
+
+    /* Put the value into either of the chroma values */
+    shader_addline(&buffer, "SGE temp.x, texcrd2.x, coef.y;\n");
+    shader_addline(&buffer, "MUL chroma.r, luminance.%c, temp.x;\n", chroma);
+    shader_addline(&buffer, "SLT temp.x, texcrd2.x, coef.y;\n");
+    shader_addline(&buffer, "MUL chroma.g, luminance.%c, temp.x;\n", chroma);
+
+    /* Sample pixel 2. If we read an even pixel(SLT above returned 1), sample
+     * the pixel right to the current one. Otherwise, sample the left pixel.
+     * Bias and scale the SLT result to -1;1 and add it to the texcrd.x.
+     */
+    shader_addline(&buffer, "MAD temp.x, temp.x, coef.z, -coef.x;\n");
+    shader_addline(&buffer, "ADD texcrd.x, texcrd, temp.x;\n");
+    shader_addline(&buffer, "%s luminance, texcrd, texture[0], %s;\n", texinstr, tex);
+
+    /* Put the value into the other chroma */
+    shader_addline(&buffer, "SGE temp.x, texcrd2.x, coef.y;\n");
+    shader_addline(&buffer, "MAD chroma.g, luminance.%c, temp.x, chroma.g;\n", chroma);
+    shader_addline(&buffer, "SLT temp.x, texcrd2.x, coef.y;\n");
+    shader_addline(&buffer, "MAD chroma.r, luminance.%c, temp.x, chroma.r;\n", chroma);
+
+    /* TODO: If filtering is enabled, sample a 2nd pair of pixels left or right of
+     * the current one and lerp the two U and V values
+     */
+
+    /* This gives the correctly filtered luminance value */
+    shader_addline(&buffer, "TEX luminance, fragment.texcoord[0], texture[0], %s;\n", tex);
+
+    /* Calculate the final result. Formula is taken from
+     * http://www.fourcc.org/fccyvrgb.php. Note that the chroma
+     * ranges from -0.5 to 0.5
+     */
+    shader_addline(&buffer, "SUB chroma.rg, chroma, coef.y;\n");
+
+    shader_addline(&buffer, "MAD result.color.r, chroma.r, yuv_coef.x, luminance.%c;\n", luminance);
+    shader_addline(&buffer, "MAD temp.r, -chroma.g, yuv_coef.y, luminance.%c;\n", luminance);
+    shader_addline(&buffer, "MAD result.color.g, -chroma.r, yuv_coef.z, temp.r;\n");
+    shader_addline(&buffer, "MAD result.color.b, chroma.g, yuv_coef.w, luminance.%c;\n", luminance);
+    shader_addline(&buffer, "END\n");
+
+    GL_EXTCALL(glProgramStringARB(GL_FRAGMENT_PROGRAM_ARB, GL_PROGRAM_FORMAT_ASCII_ARB, strlen(buffer.buffer), buffer.buffer));
+
+    if (glGetError() == GL_INVALID_OPERATION) {
+        GLint pos;
+        glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &pos);
+        FIXME("Fragment program error at position %d: %s\n", pos,
+              debugstr_a((const char *)glGetString(GL_PROGRAM_ERROR_STRING_ARB)));
+    }
+
+    if(fmt == WINED3DFMT_YUY2) {
+        if(textype == GL_TEXTURE_RECTANGLE_ARB) {
+            priv->yuy2_rect_shader = shader;
+        } else {
+            priv->yuy2_2d_shader = shader;
+        }
+    } else {
+        if(textype == GL_TEXTURE_RECTANGLE_ARB) {
+            priv->uyvy_rect_shader = shader;
+        } else {
+            priv->uyvy_2d_shader = shader;
+        }
+    }
+    return shader;
+}
+
+static HRESULT arbfp_blit_set(IWineD3DDevice *iface, WINED3DFORMAT fmt, GLenum textype, UINT width, UINT height) {
+    GLenum shader;
+    IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) iface;
+    float size[4] = {width, height, 1, 1};
+    struct arbfp_blit_priv *priv = (struct arbfp_blit_priv *) device->blit_priv;
+    const GlPixelFormatDesc *glDesc;
+
+    getFormatDescEntry(fmt, &GLINFO_LOCATION, &glDesc);
+
+    if(glDesc->conversion_group != WINED3DFMT_YUY2 && glDesc->conversion_group != WINED3DFMT_UYVY) {
+        /* Don't bother setting up a shader for unconverted formats */
+        glEnable(textype);
+        checkGLcall("glEnable(textype)");
+        return WINED3D_OK;
+    }
+
+    if(glDesc->conversion_group == WINED3DFMT_YUY2) {
+        if(textype == GL_TEXTURE_RECTANGLE_ARB) {
+            shader = priv->yuy2_rect_shader;
+        } else {
+            shader = priv->yuy2_2d_shader;
+        }
+    } else {
+        if(textype == GL_TEXTURE_RECTANGLE_ARB) {
+            shader = priv->uyvy_rect_shader;
+        } else {
+            shader = priv->uyvy_2d_shader;
+        }
+    }
+
+    if(!shader) {
+        shader = gen_yuv_shader(device, glDesc->conversion_group, textype);
+    }
+
+    glEnable(GL_FRAGMENT_PROGRAM_ARB);
+    checkGLcall("glEnable(GL_FRAGMENT_PROGRAM_ARB)");
+    GL_EXTCALL(glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader));
+    checkGLcall("glBindProgramARB(GL_FRAGMENT_PROGRAM_ARB, shader)");
+    GL_EXTCALL(glProgramLocalParameter4fvARB(GL_FRAGMENT_PROGRAM_ARB, 0, size));
+    checkGLcall("glProgramLocalParameter4fvARB");
+
+    return WINED3D_OK;
+}
+
+static void arbfp_blit_unset(IWineD3DDevice *iface) {
+    IWineD3DDeviceImpl *device = (IWineD3DDeviceImpl *) iface;
+    glDisable(GL_FRAGMENT_PROGRAM_ARB);
+    checkGLcall("glDisable(GL_FRAGMENT_PROGRAM_ARB)");
+    glDisable(GL_TEXTURE_2D);
+    checkGLcall("glDisable(GL_TEXTURE_2D)");
+    if(GL_SUPPORT(ARB_TEXTURE_CUBE_MAP)) {
+        glDisable(GL_TEXTURE_CUBE_MAP_ARB);
+        checkGLcall("glDisable(GL_TEXTURE_CUBE_MAP_ARB)");
+    }
+    if(GL_SUPPORT(ARB_TEXTURE_RECTANGLE)) {
+        glDisable(GL_TEXTURE_RECTANGLE_ARB);
+        checkGLcall("glDisable(GL_TEXTURE_RECTANGLE_ARB)");
+    }
+}
+
+static BOOL arbfp_blit_conv_supported(WINED3DFORMAT fmt) {
+    TRACE("Checking blit format support for format %s:", debug_d3dformat(fmt));
+    switch(fmt) {
+        case WINED3DFMT_YUY2:
+        case WINED3DFMT_UYVY:
+            TRACE("[OK]\n");
+            return TRUE;
+        default:
+            TRACE("[FAILED]\n");
+            return FALSE;
+    }
+}
+
+const struct blit_shader arbfp_blit = {
+    arbfp_blit_alloc,
+    arbfp_blit_free,
+    arbfp_blit_set,
+    arbfp_blit_unset,
+    arbfp_blit_conv_supported
+};
+
+#undef GLINFO_LOCATION
--- a/dlls/wined3d/directx.c
+++ b/dlls/wined3d/directx.c
@ -2953,8 +2953,11 @@ static const struct blit_shader *select_blit_implementation(UINT Adapter, WINED3
    int ps_selected_mode;

    select_shader_mode(&GLINFO_LOCATION, DeviceType, &ps_selected_mode, &vs_selected_mode);
-    return &ffp_blit;
-
+    if((ps_selected_mode == SHADER_ARB || ps_selected_mode == SHADER_GLSL) && GL_SUPPORT(ARB_FRAGMENT_PROGRAM)) {
+        return &arbfp_blit;
+    } else {
+        return &ffp_blit;
+    }
 }

 /* Note: d3d8 passes in a pointer to a D3DCAPS8 structure, which is a true
--- a/dlls/wined3d/wined3d_private.h
+++ b/dlls/wined3d/wined3d_private.h
@ -587,6 +587,7 @@ struct blit_shader {
 };

 extern const struct blit_shader ffp_blit;
+extern const struct blit_shader arbfp_blit;

 /* The new context manager that should deal with onscreen and offscreen rendering */
 struct WineD3DContext {