LibSoftGPU: Make blending simpler and more efficient

Previously, we would precalculate "alpha blend factors" on every
configuration update and then calculate the source and destination
blending factors in one go using all these factors. The idea here was
probably that we would get better performance by avoiding branching.

However, by measuring blending performance in Quake III, it seems that
this simpler version that only calculates the required factors reduces
the CPU time spent in `rasterize_triangle` by 3%.

As a bonus, `GL_SRC_ALPHA_SATURATE` is now also implemented.
This commit is contained in:
Jelle Raaijmakers 2023-02-01 21:18:53 +01:00 committed by Andreas Kling
parent f0f9d8f1e0
commit 69b94e4235
4 changed files with 63 additions and 130 deletions

View file

@ -1,27 +0,0 @@
/*
* Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <LibGfx/Vector4.h>
namespace SoftGPU {
struct AlphaBlendFactors final {
FloatVector4 src_constant {};
float src_factor_src_alpha = 0;
float src_factor_dst_alpha = 0;
float src_factor_src_color = 0;
float src_factor_dst_color = 0;
FloatVector4 dst_constant {};
float dst_factor_src_alpha = 0;
float dst_factor_dst_alpha = 0;
float dst_factor_src_color = 0;
float dst_factor_dst_color = 0;
};
}

View file

@ -1,7 +1,7 @@
/*
* Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
* Copyright (c) 2021, Jesse Buhagiar <jooster669@gmail.com>
* Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl>
* Copyright (c) 2022-2023, Jelle Raaijmakers <jelle@gmta.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -103,89 +103,6 @@ static Vector4<f32x4> to_vec4(u32x4 bgra)
};
}
void Device::setup_blend_factors()
{
m_alpha_blend_factors = {};
switch (m_options.blend_source_factor) {
case GPU::BlendFactor::Zero:
break;
case GPU::BlendFactor::One:
m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
break;
case GPU::BlendFactor::SrcColor:
m_alpha_blend_factors.src_factor_src_color = 1;
break;
case GPU::BlendFactor::OneMinusSrcColor:
m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.src_factor_src_color = -1;
break;
case GPU::BlendFactor::SrcAlpha:
m_alpha_blend_factors.src_factor_src_alpha = 1;
break;
case GPU::BlendFactor::OneMinusSrcAlpha:
m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.src_factor_src_alpha = -1;
break;
case GPU::BlendFactor::DstAlpha:
m_alpha_blend_factors.src_factor_dst_alpha = 1;
break;
case GPU::BlendFactor::OneMinusDstAlpha:
m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.src_factor_dst_alpha = -1;
break;
case GPU::BlendFactor::DstColor:
m_alpha_blend_factors.src_factor_dst_color = 1;
break;
case GPU::BlendFactor::OneMinusDstColor:
m_alpha_blend_factors.src_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.src_factor_dst_color = -1;
break;
case GPU::BlendFactor::SrcAlphaSaturate:
default:
VERIFY_NOT_REACHED();
}
switch (m_options.blend_destination_factor) {
case GPU::BlendFactor::Zero:
break;
case GPU::BlendFactor::One:
m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
break;
case GPU::BlendFactor::SrcColor:
m_alpha_blend_factors.dst_factor_src_color = 1;
break;
case GPU::BlendFactor::OneMinusSrcColor:
m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.dst_factor_src_color = -1;
break;
case GPU::BlendFactor::SrcAlpha:
m_alpha_blend_factors.dst_factor_src_alpha = 1;
break;
case GPU::BlendFactor::OneMinusSrcAlpha:
m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.dst_factor_src_alpha = -1;
break;
case GPU::BlendFactor::DstAlpha:
m_alpha_blend_factors.dst_factor_dst_alpha = 1;
break;
case GPU::BlendFactor::OneMinusDstAlpha:
m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.dst_factor_dst_alpha = -1;
break;
case GPU::BlendFactor::DstColor:
m_alpha_blend_factors.dst_factor_dst_color = 1;
break;
case GPU::BlendFactor::OneMinusDstColor:
m_alpha_blend_factors.dst_constant = { 1.0f, 1.0f, 1.0f, 1.0f };
m_alpha_blend_factors.dst_factor_dst_color = -1;
break;
case GPU::BlendFactor::SrcAlphaSaturate:
default:
VERIFY_NOT_REACHED();
}
}
ALWAYS_INLINE static void test_alpha(PixelQuad& quad, GPU::AlphaTestFunction alpha_test_function, f32x4 const& reference_value)
{
auto const alpha = quad.get_output_float(SHADER_OUTPUT_FIRST_COLOR + 3);
@ -218,6 +135,44 @@ ALWAYS_INLINE static void test_alpha(PixelQuad& quad, GPU::AlphaTestFunction alp
}
}
ALWAYS_INLINE static bool is_blend_factor_constant(GPU::BlendFactor blend_factor)
{
return (blend_factor == GPU::BlendFactor::One || blend_factor == GPU::BlendFactor::Zero);
}
// OpenGL 1.5 § 4.1.8, table 4.1
ALWAYS_INLINE static Vector4<f32x4> get_blend_factor(GPU::BlendFactor blend_factor, Vector4<f32x4> const& source_color, Vector4<f32x4> const& destination_color)
{
switch (blend_factor) {
case GPU::BlendFactor::DstAlpha:
return to_vec4(destination_color.w());
case GPU::BlendFactor::DstColor:
return destination_color;
case GPU::BlendFactor::One:
return to_vec4(expand4(1.f));
case GPU::BlendFactor::OneMinusDstAlpha:
return to_vec4(1.f - destination_color.w());
case GPU::BlendFactor::OneMinusDstColor:
return to_vec4(expand4(1.f)) - destination_color;
case GPU::BlendFactor::OneMinusSrcAlpha:
return to_vec4(1.f - source_color.w());
case GPU::BlendFactor::OneMinusSrcColor:
return to_vec4(expand4(1.f)) - source_color;
case GPU::BlendFactor::SrcAlpha:
return to_vec4(source_color.w());
case GPU::BlendFactor::SrcAlphaSaturate: {
auto saturated = min(source_color.w(), 1.f - destination_color.w());
return { saturated, saturated, saturated, expand4(1.f) };
}
case GPU::BlendFactor::SrcColor:
return source_color;
case GPU::BlendFactor::Zero:
return to_vec4(expand4(0.f));
default:
VERIFY_NOT_REACHED();
}
}
template<typename CB1, typename CB2, typename CB3>
ALWAYS_INLINE void Device::rasterize(Gfx::IntRect& render_bounds, CB1 set_coverage_mask, CB2 set_quad_depth, CB3 set_quad_attributes)
{
@ -284,6 +239,18 @@ ALWAYS_INLINE void Device::rasterize(Gfx::IntRect& render_bounds, CB1 set_covera
auto const qy0 = render_bounds_top & ~1;
auto const qy1 = render_bounds_bottom & ~1;
// Blend factors
Vector4<f32x4> src_factor;
Vector4<f32x4> dst_factor;
auto const src_factor_is_constant = is_blend_factor_constant(m_options.blend_source_factor);
auto const dst_factor_is_constant = is_blend_factor_constant(m_options.blend_destination_factor);
if (m_options.enable_blending) {
if (src_factor_is_constant)
src_factor = get_blend_factor(m_options.blend_source_factor, {}, {});
if (dst_factor_is_constant)
dst_factor = get_blend_factor(m_options.blend_destination_factor, {}, {});
}
// Rasterize all quads
// FIXME: this could be embarrassingly parallel
for (int qy = qy0; qy <= qy1; qy += 2) {
@ -474,19 +441,12 @@ ALWAYS_INLINE void Device::rasterize(Gfx::IntRect& render_bounds, CB1 set_covera
// Blend color values from pixel_staging into color_buffer
auto const& src = out_color;
auto dst = to_vec4(dst_u32);
auto const dst = to_vec4(dst_u32);
auto src_factor = expand4(m_alpha_blend_factors.src_constant)
+ src * m_alpha_blend_factors.src_factor_src_color
+ Vector4<f32x4> { src.w(), src.w(), src.w(), src.w() } * m_alpha_blend_factors.src_factor_src_alpha
+ dst * m_alpha_blend_factors.src_factor_dst_color
+ Vector4<f32x4> { dst.w(), dst.w(), dst.w(), dst.w() } * m_alpha_blend_factors.src_factor_dst_alpha;
auto dst_factor = expand4(m_alpha_blend_factors.dst_constant)
+ src * m_alpha_blend_factors.dst_factor_src_color
+ Vector4<f32x4> { src.w(), src.w(), src.w(), src.w() } * m_alpha_blend_factors.dst_factor_src_alpha
+ dst * m_alpha_blend_factors.dst_factor_dst_color
+ Vector4<f32x4> { dst.w(), dst.w(), dst.w(), dst.w() } * m_alpha_blend_factors.dst_factor_dst_alpha;
if (!src_factor_is_constant)
src_factor = get_blend_factor(m_options.blend_source_factor, src, dst);
if (!dst_factor_is_constant)
dst_factor = get_blend_factor(m_options.blend_destination_factor, src, dst);
out_color = src * src_factor + dst * dst_factor;
}
@ -1595,9 +1555,6 @@ void Device::draw_statistics_overlay(Gfx::Bitmap& target)
void Device::set_options(GPU::RasterizerOptions const& options)
{
m_options = options;
if (m_options.enable_blending)
setup_blend_factors();
}
void Device::set_light_model_params(GPU::LightModelParameters const& lighting_model)

View file

@ -1,6 +1,6 @@
/*
* Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
* Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl>
* Copyright (c) 2022-2023, Jelle Raaijmakers <jelle@gmta.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -29,7 +29,6 @@
#include <LibGfx/Matrix4x4.h>
#include <LibGfx/Rect.h>
#include <LibGfx/Vector4.h>
#include <LibSoftGPU/AlphaBlendFactors.h>
#include <LibSoftGPU/Buffer/FrameBuffer.h>
#include <LibSoftGPU/Buffer/Typed2DBuffer.h>
#include <LibSoftGPU/Clipper.h>
@ -102,7 +101,6 @@ private:
void rasterize_point(GPU::Vertex&);
void rasterize_triangle(Triangle&);
void setup_blend_factors();
void shade_fragments(PixelQuad&);
RefPtr<FrameBuffer<GPU::ColorType, GPU::DepthType, GPU::StencilType>> m_frame_buffer {};
@ -113,7 +111,6 @@ private:
Vector<Triangle> m_processed_triangles;
Vector<GPU::Vertex> m_clipped_vertices;
Array<Sampler, GPU::NUM_TEXTURE_UNITS> m_samplers;
AlphaBlendFactors m_alpha_blend_factors;
Array<GPU::Light, NUM_LIGHTS> m_lights;
Array<GPU::Material, 2u> m_materials;
GPU::RasterPosition m_raster_position;

View file

@ -1,5 +1,6 @@
/*
* Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
* Copyright (c) 2023, Jelle Raaijmakers <jelle@gmta.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
@ -138,4 +139,9 @@ ALWAYS_INLINE static Vector2<AK::SIMD::f32x4> to_vec2_f32x4(Vector2<AK::SIMD::i3
};
}
ALWAYS_INLINE static constexpr Vector4<AK::SIMD::f32x4> to_vec4(AK::SIMD::f32x4 v)
{
return { v, v, v, v };
}
}