LibGL+LibSoftGPU: Implement the stencil buffer

This implements an 8-bit front stencil buffer. Stencil operations are
SIMD optimized. LibGL changes include:

* New `glStencilMask` and `glStencilMaskSeparate` functions
* New context parameter `GL_STENCIL_CLEAR_VALUE`
This commit is contained in:
Jelle Raaijmakers 2022-01-16 22:48:46 +01:00 committed by Andreas Kling
parent 6386671944
commit 11c807ebd1
13 changed files with 430 additions and 77 deletions

View file

@ -109,6 +109,17 @@ ALWAYS_INLINE static f32x4 load4_masked(float const* a, float const* b, float co
};
}
ALWAYS_INLINE static i32x4 load4_masked(u8 const* a, u8 const* b, u8 const* c, u8 const* d, i32x4 mask)
{
int bits = maskbits(mask);
return i32x4 {
bits & 1 ? *a : 0,
bits & 2 ? *b : 0,
bits & 4 ? *c : 0,
bits & 8 ? *d : 0,
};
}
ALWAYS_INLINE static u32x4 load4_masked(u32 const* a, u32 const* b, u32 const* c, u32 const* d, i32x4 mask)
{
int bits = maskbits(mask);

View file

@ -98,6 +98,7 @@ extern "C" {
#define GL_COLOR_MATERIAL 0x0B57
#define GL_FOG_START 0x0B63
#define GL_FOG_END 0x0B64
#define GL_STENCIL_CLEAR_VALUE 0x0B91
#define GL_MATRIX_MODE 0x0BA0
#define GL_NORMALIZE 0x0BA1
#define GL_VIEWPORT 0x0BA2
@ -605,9 +606,10 @@ GLAPI void glLightModelfv(GLenum pname, GLfloat const* params);
GLAPI void glLightModeli(GLenum pname, GLint param);
GLAPI void glStencilFunc(GLenum func, GLint ref, GLuint mask);
GLAPI void glStencilFuncSeparate(GLenum face, GLenum func, GLint ref, GLuint mask);
GLAPI void glStencilMask(GLuint mask);
GLAPI void glStencilMaskSeparate(GLenum face, GLuint mask);
GLAPI void glStencilOp(GLenum sfail, GLenum dpfail, GLenum dppass);
GLAPI void glStencilOpSeparate(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass);
GLAPI void glStencilMask(GLuint mask);
GLAPI void glNormal3f(GLfloat nx, GLfloat ny, GLfloat nz);
GLAPI void glNormal3fv(GLfloat const* v);
GLAPI void glNormalPointer(GLenum type, GLsizei stride, void const* pointer);

View file

@ -97,6 +97,7 @@ public:
virtual void gl_pixel_storei(GLenum pname, GLint param) = 0;
virtual void gl_scissor(GLint x, GLint y, GLsizei width, GLsizei height) = 0;
virtual void gl_stencil_func_separate(GLenum face, GLenum func, GLint ref, GLuint mask) = 0;
virtual void gl_stencil_mask_separate(GLenum face, GLuint mask) = 0;
virtual void gl_stencil_op_separate(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass) = 0;
virtual void gl_normal(GLfloat nx, GLfloat ny, GLfloat nz) = 0;
virtual void gl_normal_pointer(GLenum type, GLsizei stride, void const* pointer) = 0;

View file

@ -36,5 +36,10 @@ void glStencilOpSeparate(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass
void glStencilMask(GLuint mask)
{
dbgln("(STUBBED) glStencilMask(0x{:08x})", mask);
g_gl_context->gl_stencil_mask_separate(GL_FRONT_AND_BACK, mask);
}
void glStencilMaskSeparate(GLenum face, GLuint mask)
{
g_gl_context->gl_stencil_mask_separate(face, mask);
}

View file

@ -165,7 +165,9 @@ Optional<ContextParameter> SoftwareGLContext::get_context_parameter(GLenum name)
return ContextParameter { .type = GL_BOOL, .is_capability = true, .value = { .boolean_value = scissor_enabled } };
}
case GL_STENCIL_BITS:
return ContextParameter { .type = GL_INT, .value = { .integer_value = sizeof(float) * 8 } };
return ContextParameter { .type = GL_INT, .value = { .integer_value = m_device_info.stencil_bits } };
case GL_STENCIL_CLEAR_VALUE:
return ContextParameter { .type = GL_INT, .value = { .integer_value = m_clear_stencil } };
case GL_STENCIL_TEST:
return ContextParameter { .type = GL_BOOL, .is_capability = true, .value = { .boolean_value = m_stencil_test_enabled } };
case GL_TEXTURE_1D:
@ -239,9 +241,8 @@ void SoftwareGLContext::gl_clear(GLbitfield mask)
if (mask & GL_DEPTH_BUFFER_BIT)
m_rasterizer.clear_depth(static_cast<float>(m_clear_depth));
// FIXME: implement GL_STENCIL_BUFFER_BIT
if (mask & GL_STENCIL_BUFFER_BIT)
dbgln_if(GL_DEBUG, "gl_clear(): GL_STENCIL_BUFFER_BIT is unimplemented");
m_rasterizer.clear_stencil(m_clear_stencil);
}
void SoftwareGLContext::gl_clear_color(GLclampf red, GLclampf green, GLclampf blue, GLclampf alpha)
@ -268,9 +269,7 @@ void SoftwareGLContext::gl_clear_stencil(GLint s)
RETURN_WITH_ERROR_IF(m_in_draw_state, GL_INVALID_OPERATION);
// FIXME: "s is masked with 2^m - 1 , where m is the number of bits in the stencil buffer"
m_clear_stencil = s;
m_clear_stencil = static_cast<u8>(s & ((1 << m_device_info.stencil_bits) - 1));
}
void SoftwareGLContext::gl_color(GLdouble r, GLdouble g, GLdouble b, GLdouble a)
@ -697,6 +696,8 @@ void SoftwareGLContext::gl_enable(GLenum capability)
break;
case GL_STENCIL_TEST:
m_stencil_test_enabled = true;
rasterizer_options.enable_stencil_test = true;
update_rasterizer_options = true;
break;
case GL_TEXTURE_1D:
m_active_texture_unit->set_texture_1d_enabled(true);
@ -808,6 +809,8 @@ void SoftwareGLContext::gl_disable(GLenum capability)
break;
case GL_STENCIL_TEST:
m_stencil_test_enabled = false;
rasterizer_options.enable_stencil_test = false;
update_rasterizer_options = true;
break;
case GL_TEXTURE_1D:
m_active_texture_unit->set_texture_1d_enabled(false);
@ -2631,13 +2634,28 @@ void SoftwareGLContext::gl_stencil_func_separate(GLenum face, GLenum func, GLint
|| func == GL_ALWAYS),
GL_INVALID_ENUM);
// FIXME: "ref is clamped to the range 02^n - 1 , where n is the number of bitplanes in the stencil buffer"
ref = clamp(ref, 0, (1 << m_device_info.stencil_bits) - 1);
StencilFunctionOptions new_options = { func, ref, mask };
if (face == GL_FRONT || face == GL_FRONT_AND_BACK)
m_stencil_function[Face::Front] = new_options;
if (face == GL_BACK || face == GL_FRONT_AND_BACK)
m_stencil_function[Face::Back] = new_options;
m_stencil_configuration_dirty = true;
}
void SoftwareGLContext::gl_stencil_mask_separate(GLenum face, GLuint mask)
{
APPEND_TO_CALL_LIST_AND_RETURN_IF_NEEDED(gl_stencil_mask_separate, face, mask);
RETURN_WITH_ERROR_IF(m_in_draw_state, GL_INVALID_OPERATION);
if (face == GL_FRONT || face == GL_FRONT_AND_BACK)
m_stencil_operation[Face::Front].write_mask = mask;
if (face == GL_BACK || face == GL_FRONT_AND_BACK)
m_stencil_operation[Face::Back].write_mask = mask;
m_stencil_configuration_dirty = true;
}
void SoftwareGLContext::gl_stencil_op_separate(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass)
@ -2647,39 +2665,26 @@ void SoftwareGLContext::gl_stencil_op_separate(GLenum face, GLenum sfail, GLenum
RETURN_WITH_ERROR_IF(!(face == GL_FRONT || face == GL_BACK || face == GL_FRONT_AND_BACK), GL_INVALID_ENUM);
RETURN_WITH_ERROR_IF(!(sfail == GL_KEEP
|| sfail == GL_ZERO
|| sfail == GL_REPLACE
|| sfail == GL_INCR
|| sfail == GL_INCR_WRAP
|| sfail == GL_DECR
|| sfail == GL_DECR_WRAP
|| sfail == GL_INVERT),
GL_INVALID_ENUM);
RETURN_WITH_ERROR_IF(!(dpfail == GL_KEEP
|| dpfail == GL_ZERO
|| dpfail == GL_REPLACE
|| dpfail == GL_INCR
|| dpfail == GL_INCR_WRAP
|| dpfail == GL_DECR
|| dpfail == GL_DECR_WRAP
|| dpfail == GL_INVERT),
GL_INVALID_ENUM);
RETURN_WITH_ERROR_IF(!(dppass == GL_KEEP
|| dppass == GL_ZERO
|| dppass == GL_REPLACE
|| dppass == GL_INCR
|| dppass == GL_INCR_WRAP
|| dppass == GL_DECR
|| dppass == GL_DECR_WRAP
|| dppass == GL_INVERT),
GL_INVALID_ENUM);
auto is_valid_op = [](GLenum op) -> bool {
return op == GL_KEEP || op == GL_ZERO || op == GL_REPLACE || op == GL_INCR || op == GL_INCR_WRAP
|| op == GL_DECR || op == GL_DECR_WRAP || op == GL_INVERT;
};
RETURN_WITH_ERROR_IF(!is_valid_op(sfail), GL_INVALID_ENUM);
RETURN_WITH_ERROR_IF(!is_valid_op(dpfail), GL_INVALID_ENUM);
RETURN_WITH_ERROR_IF(!is_valid_op(dppass), GL_INVALID_ENUM);
StencilOperationOptions new_options = { sfail, dpfail, dppass };
auto update_stencil_operation = [&](Face face, GLenum sfail, GLenum dpfail, GLenum dppass) {
auto& stencil_operation = m_stencil_operation[face];
stencil_operation.op_fail = sfail;
stencil_operation.op_depth_fail = dpfail;
stencil_operation.op_pass = dppass;
};
if (face == GL_FRONT || face == GL_FRONT_AND_BACK)
m_stencil_operation[Face::Front] = new_options;
update_stencil_operation(Face::Front, sfail, dpfail, dppass);
if (face == GL_BACK || face == GL_FRONT_AND_BACK)
m_stencil_operation[Face::Back] = new_options;
update_stencil_operation(Face::Back, sfail, dpfail, dppass);
m_stencil_configuration_dirty = true;
}
void SoftwareGLContext::gl_normal(GLfloat nx, GLfloat ny, GLfloat nz)
@ -2908,6 +2913,7 @@ void SoftwareGLContext::sync_device_config()
sync_device_sampler_config();
sync_device_texcoord_config();
sync_light_state();
sync_stencil_configuration();
}
void SoftwareGLContext::sync_device_sampler_config()
@ -3170,6 +3176,74 @@ void SoftwareGLContext::sync_device_texcoord_config()
m_rasterizer.set_options(options);
}
void SoftwareGLContext::sync_stencil_configuration()
{
if (!m_stencil_configuration_dirty)
return;
m_stencil_configuration_dirty = false;
auto set_device_stencil = [&](SoftGPU::Face face, StencilFunctionOptions func, StencilOperationOptions op) {
SoftGPU::StencilConfiguration device_configuration;
// Stencil test function
auto map_func = [](GLenum func) -> SoftGPU::StencilTestFunction {
switch (func) {
case GL_ALWAYS:
return SoftGPU::StencilTestFunction::Always;
case GL_EQUAL:
return SoftGPU::StencilTestFunction::Equal;
case GL_GEQUAL:
return SoftGPU::StencilTestFunction::GreaterOrEqual;
case GL_GREATER:
return SoftGPU::StencilTestFunction::Greater;
case GL_LESS:
return SoftGPU::StencilTestFunction::Less;
case GL_LEQUAL:
return SoftGPU::StencilTestFunction::LessOrEqual;
case GL_NEVER:
return SoftGPU::StencilTestFunction::Never;
case GL_NOTEQUAL:
return SoftGPU::StencilTestFunction::NotEqual;
}
VERIFY_NOT_REACHED();
};
device_configuration.test_function = map_func(func.func);
device_configuration.reference_value = func.reference_value;
device_configuration.test_mask = func.mask;
// Stencil operation
auto map_operation = [](GLenum operation) -> SoftGPU::StencilOperation {
switch (operation) {
case GL_DECR:
return SoftGPU::StencilOperation::Decrement;
case GL_DECR_WRAP:
return SoftGPU::StencilOperation::DecrementWrap;
case GL_INCR:
return SoftGPU::StencilOperation::Increment;
case GL_INCR_WRAP:
return SoftGPU::StencilOperation::IncrementWrap;
case GL_INVERT:
return SoftGPU::StencilOperation::Invert;
case GL_KEEP:
return SoftGPU::StencilOperation::Keep;
case GL_REPLACE:
return SoftGPU::StencilOperation::Replace;
case GL_ZERO:
return SoftGPU::StencilOperation::Zero;
}
VERIFY_NOT_REACHED();
};
device_configuration.on_stencil_test_fail = map_operation(op.op_fail);
device_configuration.on_depth_test_fail = map_operation(op.op_depth_fail);
device_configuration.on_pass = map_operation(op.op_pass);
device_configuration.write_mask = op.write_mask;
m_rasterizer.set_stencil_configuration(face, device_configuration);
};
set_device_stencil(SoftGPU::Face::Front, m_stencil_function[Face::Front], m_stencil_operation[Face::Front]);
set_device_stencil(SoftGPU::Face::Back, m_stencil_function[Face::Back], m_stencil_operation[Face::Back]);
}
void SoftwareGLContext::gl_lightf(GLenum light, GLenum pname, GLfloat param)
{
APPEND_TO_CALL_LIST_AND_RETURN_IF_NEEDED(gl_lightf, light, pname, param);

View file

@ -128,6 +128,7 @@ public:
virtual void gl_pixel_storei(GLenum pname, GLint param) override;
virtual void gl_scissor(GLint x, GLint y, GLsizei width, GLsizei height) override;
virtual void gl_stencil_func_separate(GLenum face, GLenum func, GLint ref, GLuint mask) override;
virtual void gl_stencil_mask_separate(GLenum face, GLuint mask) override;
virtual void gl_stencil_op_separate(GLenum face, GLenum sfail, GLenum dpfail, GLenum dppass) override;
virtual void gl_normal(GLfloat nx, GLfloat ny, GLfloat nz) override;
virtual void gl_normal_pointer(GLenum type, GLsizei stride, void const* pointer) override;
@ -154,6 +155,7 @@ private:
void sync_device_sampler_config();
void sync_device_texcoord_config();
void sync_light_state();
void sync_stencil_configuration();
template<typename T>
T* store_in_listing(T value)
@ -195,7 +197,7 @@ private:
FloatVector4 m_clear_color { 0.0f, 0.0f, 0.0f, 0.0f };
double m_clear_depth { 1.0 };
GLint m_clear_stencil { 0 };
u8 m_clear_stencil { 0 };
FloatVector4 m_current_vertex_color = { 1.0f, 1.0f, 1.0f, 1.0f };
FloatVector4 m_current_vertex_tex_coord = { 0.0f, 0.0f, 0.0f, 1.0f };
@ -225,6 +227,7 @@ private:
// Stencil configuration
bool m_stencil_test_enabled { false };
bool m_stencil_configuration_dirty { true };
struct StencilFunctionOptions {
GLenum func { GL_ALWAYS };
@ -237,6 +240,7 @@ private:
GLenum op_fail { GL_KEEP };
GLenum op_depth_fail { GL_KEEP };
GLenum op_pass { GL_KEEP };
GLuint write_mask { NumericLimits<GLuint>::max() };
};
Array<StencilOperationOptions, 2u> m_stencil_operation;
@ -360,6 +364,7 @@ private:
decltype(&SoftwareGLContext::gl_polygon_offset),
decltype(&SoftwareGLContext::gl_scissor),
decltype(&SoftwareGLContext::gl_stencil_func_separate),
decltype(&SoftwareGLContext::gl_stencil_mask_separate),
decltype(&SoftwareGLContext::gl_stencil_op_separate),
decltype(&SoftwareGLContext::gl_normal),
decltype(&SoftwareGLContext::gl_raster_pos),

View file

@ -4,6 +4,7 @@ set(SOURCES
Device.cpp
Image.cpp
Sampler.cpp
StencilBuffer.cpp
)
add_compile_options(-Wno-psabi)

View file

@ -1,12 +1,14 @@
/*
* Copyright (c) 2021, Stephan Unverwerth <s.unverwerth@serenityos.org>
* Copyright (c) 2021, Jesse Buhagiar <jooster669@gmail.com>
* Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <AK/Function.h>
#include <AK/Math.h>
#include <AK/NumericLimits.h>
#include <AK/SIMDExtras.h>
#include <AK/SIMDMath.h>
#include <LibCore/ElapsedTimer.h>
@ -82,11 +84,11 @@ static Vector4<f32x4> to_vec4(u32x4 rgba)
};
}
static Gfx::IntRect window_coordinates_to_target_coordinates(Gfx::IntRect const window_rect, Gfx::IntRect const target_rect)
Gfx::IntRect Device::window_coordinates_to_target_coordinates(Gfx::IntRect const& window_rect)
{
return {
window_rect.x(),
target_rect.height() - window_rect.height() - window_rect.y(),
m_render_target->rect().height() - window_rect.height() - window_rect.y(),
window_rect.width(),
window_rect.height(),
};
@ -213,7 +215,7 @@ void Device::rasterize_triangle(const Triangle& triangle)
auto render_bounds = m_render_target->rect();
if (m_options.scissor_enabled)
render_bounds.intersect(window_coordinates_to_target_coordinates(m_options.scissor_box, m_render_target->rect()));
render_bounds.intersect(window_coordinates_to_target_coordinates(m_options.scissor_box));
// Obey top-left rule:
// This sets up "zero" for later pixel coverage tests.
@ -229,7 +231,7 @@ void Device::rasterize_triangle(const Triangle& triangle)
zero.set_y(0);
// This function calculates the 3 edge values for the pixel relative to the triangle.
auto calculate_edge_values4 = [v0, v1, v2](const Vector2<i32x4>& p) -> Vector3<i32x4> {
auto calculate_edge_values4 = [v0, v1, v2](Vector2<i32x4> const& p) -> Vector3<i32x4> {
return {
edge_function4(v1, v2, p),
edge_function4(v2, v0, p),
@ -238,7 +240,7 @@ void Device::rasterize_triangle(const Triangle& triangle)
};
// This function tests whether a point as identified by its 3 edge values lies within the triangle
auto test_point4 = [zero](const Vector3<i32x4>& edges) -> i32x4 {
auto test_point4 = [zero](Vector3<i32x4> const& edges) -> i32x4 {
return edges.x() >= zero.x()
&& edges.y() >= zero.y()
&& edges.z() >= zero.z();
@ -257,8 +259,6 @@ void Device::rasterize_triangle(const Triangle& triangle)
float const vertex1_eye_absz = fabs(vertex1.eye_coordinates.z());
float const vertex2_eye_absz = fabs(vertex2.eye_coordinates.z());
// FIXME: implement stencil testing
int const render_bounds_left = render_bounds.x();
int const render_bounds_right = render_bounds.x() + render_bounds.width();
int const render_bounds_top = render_bounds.y();
@ -269,10 +269,46 @@ void Device::rasterize_triangle(const Triangle& triangle)
expand4(subpixel_factor / 2),
};
// Stencil configuration and writing
auto const stencil_configuration = m_stencil_configuration[Face::Front];
auto const stencil_reference_value = stencil_configuration.reference_value & stencil_configuration.test_mask;
auto write_to_stencil = [](u8* stencil_ptrs[4], i32x4 stencil_value, StencilOperation op, u8 reference_value, u8 write_mask, i32x4 pixel_mask) {
if (write_mask == 0 || op == StencilOperation::Keep)
return;
switch (op) {
case StencilOperation::Decrement:
stencil_value = (stencil_value & ~write_mask) | (max(stencil_value - 1, expand4(0)) & write_mask);
break;
case StencilOperation::DecrementWrap:
stencil_value = (stencil_value & ~write_mask) | (((stencil_value - 1) & 0xFF) & write_mask);
break;
case StencilOperation::Increment:
stencil_value = (stencil_value & ~write_mask) | (min(stencil_value + 1, expand4(0xFF)) & write_mask);
break;
case StencilOperation::IncrementWrap:
stencil_value = (stencil_value & ~write_mask) | (((stencil_value + 1) & 0xFF) & write_mask);
break;
case StencilOperation::Invert:
stencil_value ^= write_mask;
break;
case StencilOperation::Replace:
stencil_value = (stencil_value & ~write_mask) | (reference_value & write_mask);
break;
case StencilOperation::Zero:
stencil_value &= ~write_mask;
break;
default:
VERIFY_NOT_REACHED();
}
store4_masked(stencil_value, stencil_ptrs[0], stencil_ptrs[1], stencil_ptrs[2], stencil_ptrs[3], pixel_mask);
};
// Iterate over all blocks within the bounds of the triangle
for (int by = by0; by < by1; by += 2) {
for (int bx = bx0; bx < bx1; bx += 2) {
PixelQuad quad;
quad.screen_coordinates = {
@ -306,14 +342,70 @@ void Device::rasterize_triangle(const Triangle& triangle)
int coverage_bits = maskbits(quad.mask);
// Stencil testing
u8* stencil_ptrs[4];
i32x4 stencil_value;
if (m_options.enable_stencil_test) {
stencil_ptrs[0] = coverage_bits & 1 ? &m_stencil_buffer->scanline(by)[bx] : nullptr;
stencil_ptrs[1] = coverage_bits & 2 ? &m_stencil_buffer->scanline(by)[bx + 1] : nullptr;
stencil_ptrs[2] = coverage_bits & 4 ? &m_stencil_buffer->scanline(by + 1)[bx] : nullptr;
stencil_ptrs[3] = coverage_bits & 8 ? &m_stencil_buffer->scanline(by + 1)[bx + 1] : nullptr;
stencil_value = load4_masked(stencil_ptrs[0], stencil_ptrs[1], stencil_ptrs[2], stencil_ptrs[3], quad.mask);
stencil_value &= stencil_configuration.test_mask;
i32x4 stencil_test_passed;
switch (stencil_configuration.test_function) {
case StencilTestFunction::Always:
stencil_test_passed = expand4(~0);
break;
case StencilTestFunction::Equal:
stencil_test_passed = stencil_value == stencil_reference_value;
break;
case StencilTestFunction::Greater:
stencil_test_passed = stencil_value > stencil_reference_value;
break;
case StencilTestFunction::GreaterOrEqual:
stencil_test_passed = stencil_value >= stencil_reference_value;
break;
case StencilTestFunction::Less:
stencil_test_passed = stencil_value < stencil_reference_value;
break;
case StencilTestFunction::LessOrEqual:
stencil_test_passed = stencil_value <= stencil_reference_value;
break;
case StencilTestFunction::Never:
stencil_test_passed = expand4(0);
break;
case StencilTestFunction::NotEqual:
stencil_test_passed = stencil_value != stencil_reference_value;
break;
default:
VERIFY_NOT_REACHED();
}
// Update stencil buffer for pixels that failed the stencil test
write_to_stencil(
stencil_ptrs,
stencil_value,
stencil_configuration.on_stencil_test_fail,
stencil_reference_value,
stencil_configuration.write_mask,
quad.mask & ~stencil_test_passed);
// Update coverage mask + early quad rejection
quad.mask &= stencil_test_passed;
if (none(quad.mask))
continue;
}
// Depth testing
float* depth_ptrs[4] = {
coverage_bits & 1 ? &m_depth_buffer->scanline(by)[bx] : nullptr,
coverage_bits & 2 ? &m_depth_buffer->scanline(by)[bx + 1] : nullptr,
coverage_bits & 4 ? &m_depth_buffer->scanline(by + 1)[bx] : nullptr,
coverage_bits & 8 ? &m_depth_buffer->scanline(by + 1)[bx + 1] : nullptr,
};
// AND the depth mask onto the coverage mask
if (m_options.enable_depth_test) {
auto depth = load4_masked(depth_ptrs[0], depth_ptrs[1], depth_ptrs[2], depth_ptrs[3], quad.mask);
@ -321,31 +413,35 @@ void Device::rasterize_triangle(const Triangle& triangle)
// FIXME: Also apply depth_offset_factor which depends on the depth gradient
quad.depth += m_options.depth_offset_constant * NumericLimits<float>::epsilon();
i32x4 depth_test_passed;
switch (m_options.depth_func) {
case DepthTestFunction::Always:
depth_test_passed = expand4(~0);
break;
case DepthTestFunction::Never:
quad.mask ^= quad.mask;
depth_test_passed = expand4(0);
break;
case DepthTestFunction::Greater:
quad.mask &= quad.depth > depth;
depth_test_passed = quad.depth > depth;
break;
case DepthTestFunction::GreaterOrEqual:
quad.mask &= quad.depth >= depth;
depth_test_passed = quad.depth >= depth;
break;
case DepthTestFunction::NotEqual:
#ifdef __SSE__
quad.mask &= quad.depth != depth;
depth_test_passed = quad.depth != depth;
#else
quad.mask[0] = bit_cast<u32>(quad.depth[0]) != bit_cast<u32>(depth[0]) ? -1 : 0;
quad.mask[1] = bit_cast<u32>(quad.depth[1]) != bit_cast<u32>(depth[1]) ? -1 : 0;
quad.mask[2] = bit_cast<u32>(quad.depth[2]) != bit_cast<u32>(depth[2]) ? -1 : 0;
quad.mask[3] = bit_cast<u32>(quad.depth[3]) != bit_cast<u32>(depth[3]) ? -1 : 0;
depth_test_passed = i32x4 {
bit_cast<u32>(quad.depth[0]) != bit_cast<u32>(depth[0]) ? -1 : 0,
bit_cast<u32>(quad.depth[1]) != bit_cast<u32>(depth[1]) ? -1 : 0,
bit_cast<u32>(quad.depth[2]) != bit_cast<u32>(depth[2]) ? -1 : 0,
bit_cast<u32>(quad.depth[3]) != bit_cast<u32>(depth[3]) ? -1 : 0,
};
#endif
break;
case DepthTestFunction::Equal:
#ifdef __SSE__
quad.mask &= quad.depth == depth;
depth_test_passed = quad.depth == depth;
#else
//
// This is an interesting quirk that occurs due to us using the x87 FPU when Serenity is
@ -358,25 +454,52 @@ void Device::rasterize_triangle(const Triangle& triangle)
// the first 32-bits of this depth value is "good enough" that if we get a hit on it being
// equal, we can pretty much guarantee that it's actually equal.
//
quad.mask[0] = bit_cast<u32>(quad.depth[0]) == bit_cast<u32>(depth[0]) ? -1 : 0;
quad.mask[1] = bit_cast<u32>(quad.depth[1]) == bit_cast<u32>(depth[1]) ? -1 : 0;
quad.mask[2] = bit_cast<u32>(quad.depth[2]) == bit_cast<u32>(depth[2]) ? -1 : 0;
quad.mask[3] = bit_cast<u32>(quad.depth[3]) == bit_cast<u32>(depth[3]) ? -1 : 0;
depth_test_passed = i32x4 {
bit_cast<u32>(quad.depth[0]) == bit_cast<u32>(depth[0]) ? -1 : 0,
bit_cast<u32>(quad.depth[1]) == bit_cast<u32>(depth[1]) ? -1 : 0,
bit_cast<u32>(quad.depth[2]) == bit_cast<u32>(depth[2]) ? -1 : 0,
bit_cast<u32>(quad.depth[3]) == bit_cast<u32>(depth[3]) ? -1 : 0,
};
#endif
break;
case DepthTestFunction::LessOrEqual:
quad.mask &= quad.depth <= depth;
depth_test_passed = quad.depth <= depth;
break;
case DepthTestFunction::Less:
quad.mask &= quad.depth < depth;
depth_test_passed = quad.depth < depth;
break;
default:
VERIFY_NOT_REACHED();
}
// Nice, no pixels passed the depth test -> block rejected by early z
// Update stencil buffer for pixels that failed the depth test
if (m_options.enable_stencil_test) {
write_to_stencil(
stencil_ptrs,
stencil_value,
stencil_configuration.on_depth_test_fail,
stencil_reference_value,
stencil_configuration.write_mask,
quad.mask & ~depth_test_passed);
}
// Update coverage mask + early quad rejection
quad.mask &= depth_test_passed;
if (none(quad.mask))
continue;
}
// Update stencil buffer for passed pixels
if (m_options.enable_stencil_test) {
write_to_stencil(
stencil_ptrs,
stencil_value,
stencil_configuration.on_pass,
stencil_reference_value,
stencil_configuration.write_mask,
quad.mask);
}
INCREASE_STATISTICS_COUNTER(g_num_pixels_shaded, maskcount(quad.mask));
// Draw the pixels according to the previously generated mask
@ -415,9 +538,8 @@ void Device::rasterize_triangle(const Triangle& triangle)
}
// Write to depth buffer
if (m_options.enable_depth_test && m_options.enable_depth_write) {
if (m_options.enable_depth_test && m_options.enable_depth_write)
store4_masked(quad.depth, depth_ptrs[0], depth_ptrs[1], depth_ptrs[2], depth_ptrs[3], quad.mask);
}
// We will not update the color buffer at all
if (!m_options.color_mask || !m_options.enable_color_write)
@ -465,8 +587,9 @@ void Device::rasterize_triangle(const Triangle& triangle)
}
Device::Device(const Gfx::IntSize& size)
: m_render_target { Gfx::Bitmap::try_create(Gfx::BitmapFormat::BGRA8888, size).release_value_but_fixme_should_propagate_errors() }
, m_depth_buffer { adopt_own(*new DepthBuffer(size)) }
: m_render_target(Gfx::Bitmap::try_create(Gfx::BitmapFormat::BGRA8888, size).release_value_but_fixme_should_propagate_errors())
, m_depth_buffer(make<DepthBuffer>(size))
, m_stencil_buffer(MUST(StencilBuffer::try_create(size)))
{
m_options.scissor_box = m_render_target->rect();
m_options.viewport = m_render_target->rect();
@ -478,7 +601,8 @@ DeviceInfo Device::info() const
.vendor_name = "SerenityOS",
.device_name = "SoftGPU",
.num_texture_units = NUM_SAMPLERS,
.num_lights = NUM_LIGHTS
.num_lights = NUM_LIGHTS,
.stencil_bits = sizeof(u8) * 8,
};
}
@ -626,7 +750,7 @@ void Device::draw_primitives(PrimitiveType primitive_type, FloatMatrix4x4 const&
}
// Now let's transform each triangle and send that to the GPU
auto const viewport = window_coordinates_to_target_coordinates(m_options.viewport, m_render_target->rect());
auto const viewport = window_coordinates_to_target_coordinates(m_options.viewport);
auto const viewport_half_width = viewport.width() / 2.0f;
auto const viewport_half_height = viewport.height() / 2.0f;
auto const viewport_center_x = viewport.x() + viewport_half_width;
@ -956,7 +1080,7 @@ void Device::clear_color(const FloatVector4& color)
if (m_options.scissor_enabled) {
auto fill_rect = m_render_target->rect();
fill_rect.intersect(window_coordinates_to_target_coordinates(m_options.scissor_box, fill_rect));
fill_rect.intersect(window_coordinates_to_target_coordinates(m_options.scissor_box));
Gfx::Painter painter { *m_render_target };
painter.fill_rect(fill_rect, fill_color);
return;
@ -970,13 +1094,23 @@ void Device::clear_depth(float depth)
wait_for_all_threads();
if (m_options.scissor_enabled) {
m_depth_buffer->clear(window_coordinates_to_target_coordinates(m_options.scissor_box, m_render_target->rect()), depth);
m_depth_buffer->clear(window_coordinates_to_target_coordinates(m_options.scissor_box), depth);
return;
}
m_depth_buffer->clear(depth);
}
void Device::clear_stencil(u8 value)
{
Gfx::IntRect clear_rect = m_stencil_buffer->rect();
if (m_options.scissor_enabled)
clear_rect.intersect(window_coordinates_to_target_coordinates(m_options.scissor_box));
m_stencil_buffer->clear(clear_rect, value);
}
void Device::blit_to_color_buffer_at_raster_position(Gfx::Bitmap const& source)
{
if (!m_raster_position.valid)
@ -1148,6 +1282,11 @@ void Device::set_material_state(Face face, Material const& material)
m_materials[face] = material;
}
void Device::set_stencil_configuration(Face face, StencilConfiguration const& stencil_configuration)
{
m_stencil_configuration[face] = stencil_configuration;
}
void Device::set_raster_position(RasterPosition const& raster_position)
{
m_raster_position = raster_position;
@ -1192,7 +1331,7 @@ Gfx::IntRect Device::raster_rect_in_target_coordinates(Gfx::IntSize size)
size.width(),
size.height(),
};
return window_coordinates_to_target_coordinates(raster_rect, m_render_target->rect());
return window_coordinates_to_target_coordinates(raster_rect);
}
}

View file

@ -10,6 +10,7 @@
#include <AK/Array.h>
#include <AK/NonnullRefPtr.h>
#include <AK/OwnPtr.h>
#include <AK/Vector.h>
#include <LibGfx/Bitmap.h>
#include <LibGfx/Matrix3x3.h>
#include <LibGfx/Matrix4x4.h>
@ -26,6 +27,7 @@
#include <LibSoftGPU/Light/Light.h>
#include <LibSoftGPU/Light/Material.h>
#include <LibSoftGPU/Sampler.h>
#include <LibSoftGPU/StencilBuffer.h>
#include <LibSoftGPU/Triangle.h>
#include <LibSoftGPU/Vertex.h>
@ -38,6 +40,7 @@ struct TexCoordGenerationConfig {
struct RasterizerOptions {
bool shade_smooth { true };
bool enable_stencil_test { false };
bool enable_depth_test { false };
bool enable_depth_write { true };
bool enable_alpha_test { false };
@ -94,6 +97,17 @@ struct RasterPosition {
FloatVector4 texture_coordinates { 0.0f, 0.0f, 0.0f, 1.0f };
};
struct StencilConfiguration {
StencilTestFunction test_function;
u8 reference_value;
u8 test_mask;
StencilOperation on_stencil_test_fail;
StencilOperation on_depth_test_fail;
StencilOperation on_pass;
u8 write_mask;
};
class Device final {
public:
Device(const Gfx::IntSize& min_size);
@ -104,6 +118,7 @@ public:
void resize(const Gfx::IntSize& min_size);
void clear_color(const FloatVector4&);
void clear_depth(float);
void clear_stencil(u8);
void blit_to(Gfx::Bitmap&);
void blit_to_color_buffer_at_raster_position(Gfx::Bitmap const&);
void blit_to_depth_buffer_at_raster_position(Vector<float> const&, size_t, size_t);
@ -120,6 +135,7 @@ public:
void set_sampler_config(unsigned, SamplerConfig const&);
void set_light_state(unsigned, Light const&);
void set_material_state(Face, Material const&);
void set_stencil_configuration(Face, StencilConfiguration const&);
RasterPosition raster_position() const { return m_raster_position; }
void set_raster_position(RasterPosition const& raster_position);
@ -128,15 +144,16 @@ public:
private:
void draw_statistics_overlay(Gfx::Bitmap&);
Gfx::IntRect raster_rect_in_target_coordinates(Gfx::IntSize size);
Gfx::IntRect window_coordinates_to_target_coordinates(Gfx::IntRect const&);
void rasterize_triangle(const Triangle& triangle);
void setup_blend_factors();
void shade_fragments(PixelQuad&);
bool test_alpha(PixelQuad&);
private:
RefPtr<Gfx::Bitmap> m_render_target;
OwnPtr<DepthBuffer> m_depth_buffer;
NonnullOwnPtr<DepthBuffer> m_depth_buffer;
NonnullOwnPtr<StencilBuffer> m_stencil_buffer;
RasterizerOptions m_options;
LightModelParameters m_lighting_model;
Clipper m_clipper;
@ -149,6 +166,7 @@ private:
Array<Light, NUM_LIGHTS> m_lights;
Array<Material, 2u> m_materials;
RasterPosition m_raster_position;
Array<StencilConfiguration, 2u> m_stencil_configuration;
};
}

View file

@ -15,6 +15,7 @@ struct DeviceInfo final {
String device_name;
unsigned num_texture_units;
unsigned num_lights;
u8 stencil_bits;
};
}

View file

@ -87,6 +87,28 @@ enum class PrimitiveType {
Quads,
};
enum StencilOperation {
Decrement,
DecrementWrap,
Increment,
IncrementWrap,
Invert,
Keep,
Replace,
Zero,
};
enum StencilTestFunction {
Always,
Equal,
Greater,
GreaterOrEqual,
Less,
LessOrEqual,
Never,
NotEqual,
};
enum TexCoordGenerationCoordinate {
None = 0x0,
S = 0x1,

View file

@ -0,0 +1,41 @@
/*
* Copyright (c) 2022, Jelle Raaijmakers <jelle@gmta.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#include <LibSoftGPU/StencilBuffer.h>
namespace SoftGPU {
ErrorOr<NonnullOwnPtr<StencilBuffer>> StencilBuffer::try_create(Gfx::IntSize const& size)
{
auto rect = Gfx::IntRect { 0, 0, size.width(), size.height() };
auto data = TRY(FixedArray<u8>::try_create(size.area()));
return adopt_own(*new StencilBuffer(rect, move(data)));
}
StencilBuffer::StencilBuffer(Gfx::IntRect const& rect, FixedArray<u8> data)
: m_data(move(data))
, m_rect(rect)
{
}
void StencilBuffer::clear(Gfx::IntRect rect, u8 value)
{
rect.intersect(m_rect);
for (int y = rect.top(); y <= rect.bottom(); ++y) {
auto* line = scanline(y);
for (int x = rect.left(); x <= rect.right(); ++x)
line[x] = value;
}
}
u8* StencilBuffer::scanline(int y)
{
VERIFY(m_rect.contains_vertically(y));
return &m_data[y * m_rect.width()];
}
}

View file

@ -0,0 +1,33 @@
/*
* Copyright (c) 2021, Jelle Raaijmakers <jelle@gmta.nl>
*
* SPDX-License-Identifier: BSD-2-Clause
*/
#pragma once
#include <AK/Error.h>
#include <AK/FixedArray.h>
#include <AK/NonnullOwnPtr.h>
#include <AK/Try.h>
#include <LibGfx/Rect.h>
#include <LibGfx/Size.h>
namespace SoftGPU {
class StencilBuffer final {
public:
static ErrorOr<NonnullOwnPtr<StencilBuffer>> try_create(Gfx::IntSize const& size);
void clear(Gfx::IntRect rect, u8 value);
Gfx::IntRect const& rect() const { return m_rect; }
u8* scanline(int y);
private:
StencilBuffer(Gfx::IntRect const& rect, FixedArray<u8> data);
FixedArray<u8> m_data;
Gfx::IntRect m_rect;
};
}