godot/modules/lightmapper_rd/lm_compute.glsl
Dario 47214ea9f5 Optimize lightmapper using triangle clusters on the acceleration structure.
Add an additional layer of indirection to the grid used by the lightmapper to store fixed-size triangle clusters. Greatly speeds up baking times on scenes with high triangle density, as the clusters will help to avoid unnecessary checks when the triangle density is high on the scene.
2023-10-13 17:57:25 -03:00

955 lines
36 KiB
GLSL

#[versions]
primary = "#define MODE_DIRECT_LIGHT";
secondary = "#define MODE_BOUNCE_LIGHT";
dilate = "#define MODE_DILATE";
unocclude = "#define MODE_UNOCCLUDE";
light_probes = "#define MODE_LIGHT_PROBES";
denoise = "#define MODE_DENOISE";
#[compute]
#version 450
#VERSION_DEFINES
// One 2D local group focusing in one layer at a time, though all
// in parallel (no barriers) makes more sense than a 3D local group
// as this can take more advantage of the cache for each group.
#ifdef MODE_LIGHT_PROBES
layout(local_size_x = 64, local_size_y = 1, local_size_z = 1) in;
#else
layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in;
#endif
#include "lm_common_inc.glsl"
#ifdef MODE_LIGHT_PROBES
layout(set = 1, binding = 0, std430) restrict buffer LightProbeData {
vec4 data[];
}
light_probes;
layout(set = 1, binding = 1) uniform texture2DArray source_light;
layout(set = 1, binding = 2) uniform texture2D environment;
#endif
#ifdef MODE_UNOCCLUDE
layout(rgba32f, set = 1, binding = 0) uniform restrict image2DArray position;
layout(rgba32f, set = 1, binding = 1) uniform restrict readonly image2DArray unocclude;
#endif
#if defined(MODE_DIRECT_LIGHT) || defined(MODE_BOUNCE_LIGHT)
layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_light;
layout(set = 1, binding = 1) uniform texture2DArray source_light;
layout(set = 1, binding = 2) uniform texture2DArray source_position;
layout(set = 1, binding = 3) uniform texture2DArray source_normal;
layout(rgba16f, set = 1, binding = 4) uniform restrict image2DArray accum_light;
#endif
#ifdef MODE_BOUNCE_LIGHT
layout(set = 1, binding = 5) uniform texture2D environment;
#endif
#if defined(MODE_DILATE) || defined(MODE_DENOISE)
layout(rgba16f, set = 1, binding = 0) uniform restrict writeonly image2DArray dest_light;
layout(set = 1, binding = 1) uniform texture2DArray source_light;
#endif
#ifdef MODE_DENOISE
layout(set = 1, binding = 2) uniform texture2DArray source_normal;
layout(set = 1, binding = 3) uniform DenoiseParams {
float spatial_bandwidth;
float light_bandwidth;
float albedo_bandwidth;
float normal_bandwidth;
float filter_strength;
}
denoise_params;
#endif
layout(push_constant, std430) uniform Params {
uint atlas_slice;
uint ray_count;
uint ray_from;
uint ray_to;
ivec2 region_ofs;
uint probe_count;
}
params;
//check it, but also return distance and barycentric coords (for uv lookup)
bool ray_hits_triangle(vec3 from, vec3 dir, float max_dist, vec3 p0, vec3 p1, vec3 p2, out float r_distance, out vec3 r_barycentric) {
const float EPSILON = 0.00001;
const vec3 e0 = p1 - p0;
const vec3 e1 = p0 - p2;
vec3 triangle_normal = cross(e1, e0);
float n_dot_dir = dot(triangle_normal, dir);
if (abs(n_dot_dir) < EPSILON) {
return false;
}
const vec3 e2 = (p0 - from) / n_dot_dir;
const vec3 i = cross(dir, e2);
r_barycentric.y = dot(i, e1);
r_barycentric.z = dot(i, e0);
r_barycentric.x = 1.0 - (r_barycentric.z + r_barycentric.y);
r_distance = dot(triangle_normal, e2);
return (r_distance > bake_params.bias) && (r_distance < max_dist) && all(greaterThanEqual(r_barycentric, vec3(0.0)));
}
const uint RAY_MISS = 0;
const uint RAY_FRONT = 1;
const uint RAY_BACK = 2;
const uint RAY_ANY = 3;
bool ray_box_test(vec3 p_from, vec3 p_inv_dir, vec3 p_box_min, vec3 p_box_max) {
vec3 t0 = (p_box_min - p_from) * p_inv_dir;
vec3 t1 = (p_box_max - p_from) * p_inv_dir;
vec3 tmin = min(t0, t1), tmax = max(t0, t1);
return max(tmin.x, max(tmin.y, tmin.z)) <= min(tmax.x, min(tmax.y, tmax.z));
}
#if CLUSTER_SIZE > 32
#define CLUSTER_TRIANGLE_ITERATION
#endif
uint trace_ray(vec3 p_from, vec3 p_to, bool p_any_hit, out float r_distance, out vec3 r_normal, out uint r_triangle, out vec3 r_barycentric) {
// World coordinates.
vec3 rel = p_to - p_from;
float rel_len = length(rel);
vec3 dir = normalize(rel);
vec3 inv_dir = 1.0 / dir;
// Cell coordinates.
vec3 from_cell = (p_from - bake_params.to_cell_offset) * bake_params.to_cell_size;
vec3 to_cell = (p_to - bake_params.to_cell_offset) * bake_params.to_cell_size;
// Prepare DDA.
vec3 rel_cell = to_cell - from_cell;
ivec3 icell = ivec3(from_cell);
ivec3 iendcell = ivec3(to_cell);
vec3 dir_cell = normalize(rel_cell);
vec3 delta = min(abs(1.0 / dir_cell), bake_params.grid_size); // Use bake_params.grid_size as max to prevent infinity values.
ivec3 step = ivec3(sign(rel_cell));
vec3 side = (sign(rel_cell) * (vec3(icell) - from_cell) + (sign(rel_cell) * 0.5) + 0.5) * delta;
uint iters = 0;
while (all(greaterThanEqual(icell, ivec3(0))) && all(lessThan(icell, ivec3(bake_params.grid_size))) && (iters < 1000)) {
uvec2 cell_data = texelFetch(usampler3D(grid, linear_sampler), icell, 0).xy;
uint triangle_count = cell_data.x;
if (triangle_count > 0) {
uint hit = RAY_MISS;
float best_distance = 1e20;
uint cluster_start = cluster_indices.data[cell_data.y * 2];
uint cell_triangle_start = cluster_indices.data[cell_data.y * 2 + 1];
uint cluster_count = (triangle_count + CLUSTER_SIZE - 1) / CLUSTER_SIZE;
uint cluster_base_index = 0;
while (cluster_base_index < cluster_count) {
// To minimize divergence, all Ray-AABB tests on the clusters contained in the cell are performed
// before checking against the triangles. We do this 32 clusters at a time and store the intersected
// clusters on each bit of the 32-bit integer.
uint cluster_test_count = min(32, cluster_count - cluster_base_index);
uint cluster_hits = 0;
for (uint i = 0; i < cluster_test_count; i++) {
uint cluster_index = cluster_start + cluster_base_index + i;
ClusterAABB cluster_aabb = cluster_aabbs.data[cluster_index];
if (ray_box_test(p_from, inv_dir, cluster_aabb.min_bounds, cluster_aabb.max_bounds)) {
cluster_hits |= (1 << i);
}
}
// Check the triangles in any of the clusters that were intersected by toggling off the bits in the
// 32-bit integer counter until no bits are left.
while (cluster_hits > 0) {
uint cluster_index = findLSB(cluster_hits);
cluster_hits &= ~(1 << cluster_index);
cluster_index += cluster_base_index;
// Do the same divergence execution trick with triangles as well.
uint triangle_base_index = 0;
#ifdef CLUSTER_TRIANGLE_ITERATION
while (triangle_base_index < triangle_count)
#endif
{
uint triangle_start_index = cell_triangle_start + cluster_index * CLUSTER_SIZE + triangle_base_index;
uint triangle_test_count = min(CLUSTER_SIZE, triangle_count - triangle_base_index);
uint triangle_hits = 0;
for (uint i = 0; i < triangle_test_count; i++) {
uint triangle_index = triangle_indices.data[triangle_start_index + i];
if (ray_box_test(p_from, inv_dir, triangles.data[triangle_index].min_bounds, triangles.data[triangle_index].max_bounds)) {
triangle_hits |= (1 << i);
}
}
while (triangle_hits > 0) {
uint cluster_triangle_index = findLSB(triangle_hits);
triangle_hits &= ~(1 << cluster_triangle_index);
cluster_triangle_index += triangle_start_index;
uint triangle_index = triangle_indices.data[cluster_triangle_index];
Triangle triangle = triangles.data[triangle_index];
// Gather the triangle vertex positions.
vec3 vtx0 = vertices.data[triangle.indices.x].position;
vec3 vtx1 = vertices.data[triangle.indices.y].position;
vec3 vtx2 = vertices.data[triangle.indices.z].position;
vec3 normal = -normalize(cross((vtx0 - vtx1), (vtx0 - vtx2)));
bool backface = dot(normal, dir) >= 0.0;
float distance;
vec3 barycentric;
if (ray_hits_triangle(p_from, dir, rel_len, vtx0, vtx1, vtx2, distance, barycentric)) {
if (p_any_hit) {
// Return early if any hit was requested.
return RAY_ANY;
}
vec3 position = p_from + dir * distance;
vec3 hit_cell = (position - bake_params.to_cell_offset) * bake_params.to_cell_size;
if (icell != ivec3(hit_cell)) {
// It's possible for the ray to hit a triangle in a position outside the bounds of the cell
// if it's large enough to cover multiple ones. The hit must be ignored if this is the case.
continue;
}
if (!backface) {
// The case of meshes having both a front and back face in the same plane is more common than
// expected, so if this is a front-face, bias it closer to the ray origin, so it always wins
// over the back-face.
distance = max(bake_params.bias, distance - bake_params.bias);
}
if (distance < best_distance) {
hit = backface ? RAY_BACK : RAY_FRONT;
best_distance = distance;
r_distance = distance;
r_normal = normal;
r_triangle = triangle_index;
r_barycentric = barycentric;
}
}
}
#ifdef CLUSTER_TRIANGLE_ITERATION
triangle_base_index += CLUSTER_SIZE;
#endif
}
}
cluster_base_index += 32;
}
if (hit != RAY_MISS) {
return hit;
}
}
if (icell == iendcell) {
break;
}
bvec3 mask = lessThanEqual(side.xyz, min(side.yzx, side.zxy));
side += vec3(mask) * delta;
icell += ivec3(vec3(mask)) * step;
iters++;
}
return RAY_MISS;
}
uint trace_ray_closest_hit_triangle(vec3 p_from, vec3 p_to, out uint r_triangle, out vec3 r_barycentric) {
float distance;
vec3 normal;
return trace_ray(p_from, p_to, false, distance, normal, r_triangle, r_barycentric);
}
uint trace_ray_closest_hit_distance(vec3 p_from, vec3 p_to, out float r_distance, out vec3 r_normal) {
uint triangle;
vec3 barycentric;
return trace_ray(p_from, p_to, false, r_distance, r_normal, triangle, barycentric);
}
uint trace_ray_any_hit(vec3 p_from, vec3 p_to) {
float distance;
vec3 normal;
uint triangle;
vec3 barycentric;
return trace_ray(p_from, p_to, true, distance, normal, triangle, barycentric);
}
// https://www.reedbeta.com/blog/hash-functions-for-gpu-rendering/
uint hash(uint value) {
uint state = value * 747796405u + 2891336453u;
uint word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u;
return (word >> 22u) ^ word;
}
uint random_seed(ivec3 seed) {
return hash(seed.x ^ hash(seed.y ^ hash(seed.z)));
}
// generates a random value in range [0.0, 1.0)
float randomize(inout uint value) {
value = hash(value);
return float(value / 4294967296.0);
}
const float PI = 3.14159265f;
// http://www.realtimerendering.com/raytracinggems/unofficial_RayTracingGems_v1.4.pdf (chapter 15)
vec3 generate_hemisphere_cosine_weighted_direction(inout uint noise) {
float noise1 = randomize(noise);
float noise2 = randomize(noise) * 2.0 * PI;
return vec3(sqrt(noise1) * cos(noise2), sqrt(noise1) * sin(noise2), sqrt(1.0 - noise1));
}
// Distribution generation adapted from "Generating uniformly distributed numbers on a sphere"
// <http://corysimon.github.io/articles/uniformdistn-on-sphere/>
vec3 generate_sphere_uniform_direction(inout uint noise) {
float theta = 2.0 * PI * randomize(noise);
float phi = acos(1.0 - 2.0 * randomize(noise));
return vec3(sin(phi) * cos(theta), sin(phi) * sin(theta), cos(phi));
}
vec3 generate_ray_dir_from_normal(vec3 normal, inout uint noise) {
vec3 v0 = abs(normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);
vec3 tangent = normalize(cross(v0, normal));
vec3 bitangent = normalize(cross(tangent, normal));
mat3 normal_mat = mat3(tangent, bitangent, normal);
return normal_mat * generate_hemisphere_cosine_weighted_direction(noise);
}
#if defined(MODE_DIRECT_LIGHT) || defined(MODE_BOUNCE_LIGHT) || defined(MODE_LIGHT_PROBES)
float get_omni_attenuation(float distance, float inv_range, float decay) {
float nd = distance * inv_range;
nd *= nd;
nd *= nd; // nd^4
nd = max(1.0 - nd, 0.0);
nd *= nd; // nd^2
return nd * pow(max(distance, 0.0001), -decay);
}
void trace_direct_light(vec3 p_position, vec3 p_normal, uint p_light_index, bool p_soft_shadowing, out vec3 r_light, out vec3 r_light_dir, inout uint r_noise) {
r_light = vec3(0.0f);
vec3 light_pos;
float dist;
float attenuation;
float soft_shadowing_disk_size;
Light light_data = lights.data[p_light_index];
if (light_data.type == LIGHT_TYPE_DIRECTIONAL) {
vec3 light_vec = light_data.direction;
light_pos = p_position - light_vec * length(bake_params.world_size);
r_light_dir = normalize(light_pos - p_position);
dist = length(bake_params.world_size);
attenuation = 1.0;
soft_shadowing_disk_size = light_data.size;
} else {
light_pos = light_data.position;
r_light_dir = normalize(light_pos - p_position);
dist = distance(p_position, light_pos);
if (dist > light_data.range) {
return;
}
soft_shadowing_disk_size = light_data.size / dist;
attenuation = get_omni_attenuation(dist, 1.0 / light_data.range, light_data.attenuation);
if (light_data.type == LIGHT_TYPE_SPOT) {
vec3 rel = normalize(p_position - light_pos);
float cos_spot_angle = light_data.cos_spot_angle;
float cos_angle = dot(rel, light_data.direction);
if (cos_angle < cos_spot_angle) {
return;
}
float scos = max(cos_angle, cos_spot_angle);
float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - cos_spot_angle));
attenuation *= 1.0 - pow(spot_rim, light_data.inv_spot_attenuation);
}
}
attenuation *= max(0.0, dot(p_normal, r_light_dir));
if (attenuation <= 0.0001) {
return;
}
float penumbra = 0.0;
if ((light_data.size > 0.0) && p_soft_shadowing) {
vec3 light_to_point = -r_light_dir;
vec3 aux = light_to_point.y < 0.777 ? vec3(0.0, 1.0, 0.0) : vec3(1.0, 0.0, 0.0);
vec3 light_to_point_tan = normalize(cross(light_to_point, aux));
vec3 light_to_point_bitan = normalize(cross(light_to_point, light_to_point_tan));
const uint shadowing_rays_check_penumbra_denom = 2;
uint shadowing_ray_count = p_soft_shadowing ? params.ray_count : 1;
uint hits = 0;
vec3 light_disk_to_point = light_to_point;
for (uint j = 0; j < shadowing_ray_count; j++) {
// Optimization:
// Once already traced an important proportion of rays, if all are hits or misses,
// assume we're not in the penumbra so we can infer the rest would have the same result
if (p_soft_shadowing) {
if (j == shadowing_ray_count / shadowing_rays_check_penumbra_denom) {
if (hits == j) {
// Assume totally lit
hits = shadowing_ray_count;
break;
} else if (hits == 0) {
// Assume totally dark
hits = 0;
break;
}
}
}
float r = randomize(r_noise);
float a = randomize(r_noise) * 2.0 * PI;
vec2 disk_sample = (r * vec2(cos(a), sin(a))) * soft_shadowing_disk_size * light_data.shadow_blur;
light_disk_to_point = normalize(light_to_point + disk_sample.x * light_to_point_tan + disk_sample.y * light_to_point_bitan);
if (trace_ray_any_hit(p_position - light_disk_to_point * bake_params.bias, p_position - light_disk_to_point * dist) == RAY_MISS) {
hits++;
}
}
penumbra = float(hits) / float(shadowing_ray_count);
} else {
if (trace_ray_any_hit(p_position + r_light_dir * bake_params.bias, light_pos) == RAY_MISS) {
penumbra = 1.0;
}
}
r_light = light_data.color * light_data.energy * attenuation * penumbra;
}
#endif
#if defined(MODE_BOUNCE_LIGHT) || defined(MODE_LIGHT_PROBES)
vec3 trace_environment_color(vec3 ray_dir) {
vec3 sky_dir = normalize(mat3(bake_params.env_transform) * ray_dir);
vec2 st = vec2(atan(sky_dir.x, sky_dir.z), acos(sky_dir.y));
if (st.x < 0.0) {
st.x += PI * 2.0;
}
return textureLod(sampler2D(environment, linear_sampler), st / vec2(PI * 2.0, PI), 0.0).rgb;
}
vec3 trace_indirect_light(vec3 p_position, vec3 p_ray_dir, inout uint r_noise) {
// The lower limit considers the case where the lightmapper might have bounces disabled but light probes are requested.
vec3 position = p_position;
vec3 ray_dir = p_ray_dir;
uint max_depth = max(bake_params.bounces, 1);
vec3 throughput = vec3(1.0);
vec3 light = vec3(0.0);
for (uint depth = 0; depth < max_depth; depth++) {
uint tidx;
vec3 barycentric;
uint trace_result = trace_ray_closest_hit_triangle(position + ray_dir * bake_params.bias, position + ray_dir * length(bake_params.world_size), tidx, barycentric);
if (trace_result == RAY_FRONT) {
Vertex vert0 = vertices.data[triangles.data[tidx].indices.x];
Vertex vert1 = vertices.data[triangles.data[tidx].indices.y];
Vertex vert2 = vertices.data[triangles.data[tidx].indices.z];
vec3 uvw = vec3(barycentric.x * vert0.uv + barycentric.y * vert1.uv + barycentric.z * vert2.uv, float(triangles.data[tidx].slice));
position = barycentric.x * vert0.position + barycentric.y * vert1.position + barycentric.z * vert2.position;
vec3 norm0 = vec3(vert0.normal_xy, vert0.normal_z);
vec3 norm1 = vec3(vert1.normal_xy, vert1.normal_z);
vec3 norm2 = vec3(vert2.normal_xy, vert2.normal_z);
vec3 normal = barycentric.x * norm0 + barycentric.y * norm1 + barycentric.z * norm2;
vec3 direct_light = vec3(0.0f);
#ifdef USE_LIGHT_TEXTURE_FOR_BOUNCES
direct_light += textureLod(sampler2DArray(source_light, linear_sampler), uvw, 0.0).rgb;
#else
// Trace the lights directly. Significantly more expensive but more accurate in scenarios
// where the lightmap texture isn't reliable.
for (uint i = 0; i < bake_params.light_count; i++) {
vec3 light;
vec3 light_dir;
trace_direct_light(position, normal, i, false, light, light_dir, r_noise);
direct_light += light * lights.data[i].indirect_energy;
}
direct_light *= bake_params.exposure_normalization;
#endif
vec3 albedo = textureLod(sampler2DArray(albedo_tex, linear_sampler), uvw, 0).rgb;
vec3 emissive = textureLod(sampler2DArray(emission_tex, linear_sampler), uvw, 0).rgb;
emissive *= bake_params.exposure_normalization;
light += throughput * emissive;
throughput *= albedo;
light += throughput * direct_light * bake_params.bounce_indirect_energy;
// Use Russian Roulette to determine a probability to terminate the bounce earlier as an optimization.
// <https://computergraphics.stackexchange.com/questions/2316/is-russian-roulette-really-the-answer>
float p = max(max(throughput.x, throughput.y), throughput.z);
if (randomize(r_noise) > p) {
break;
}
// Boost the throughput from the probability of the ray being terminated early.
throughput *= 1.0 / p;
// Generate a new ray direction for the next bounce from this surface's normal.
ray_dir = generate_ray_dir_from_normal(normal, r_noise);
} else if (trace_result == RAY_MISS) {
// Look for the environment color and stop bouncing.
light += throughput * trace_environment_color(ray_dir);
break;
} else {
// Ignore any other trace results.
break;
}
}
return light;
}
#endif
void main() {
// Check if invocation is out of bounds.
#ifdef MODE_LIGHT_PROBES
int probe_index = int(gl_GlobalInvocationID.x);
if (probe_index >= params.probe_count) {
return;
}
#else
ivec2 atlas_pos = ivec2(gl_GlobalInvocationID.xy) + params.region_ofs;
if (any(greaterThanEqual(atlas_pos, bake_params.atlas_size))) {
return;
}
#endif
#ifdef MODE_DIRECT_LIGHT
vec3 normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
if (length(normal) < 0.5) {
return; //empty texel, no process
}
vec3 position = texelFetch(sampler2DArray(source_position, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
vec3 light_for_texture = vec3(0.0);
vec3 light_for_bounces = vec3(0.0);
#ifdef USE_SH_LIGHTMAPS
vec4 sh_accum[4] = vec4[](
vec4(0.0, 0.0, 0.0, 1.0),
vec4(0.0, 0.0, 0.0, 1.0),
vec4(0.0, 0.0, 0.0, 1.0),
vec4(0.0, 0.0, 0.0, 1.0));
#endif
// Use atlas position and a prime number as the seed.
uint noise = random_seed(ivec3(atlas_pos, 43573547));
for (uint i = 0; i < bake_params.light_count; i++) {
vec3 light;
vec3 light_dir;
trace_direct_light(position, normal, i, true, light, light_dir, noise);
if (lights.data[i].static_bake) {
light_for_texture += light;
#ifdef USE_SH_LIGHTMAPS
float c[4] = float[](
0.282095, //l0
0.488603 * light_dir.y, //l1n1
0.488603 * light_dir.z, //l1n0
0.488603 * light_dir.x //l1p1
);
for (uint j = 0; j < 4; j++) {
sh_accum[j].rgb += light * c[j] * 8.0;
}
#endif
}
light_for_bounces += light * lights.data[i].indirect_energy;
}
light_for_bounces *= bake_params.exposure_normalization;
imageStore(dest_light, ivec3(atlas_pos, params.atlas_slice), vec4(light_for_bounces, 1.0));
#ifdef USE_SH_LIGHTMAPS
// Keep for adding at the end.
imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 0), sh_accum[0]);
imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 1), sh_accum[1]);
imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 2), sh_accum[2]);
imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + 3), sh_accum[3]);
#else
light_for_texture *= bake_params.exposure_normalization;
imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice), vec4(light_for_texture, 1.0));
#endif
#endif
#ifdef MODE_BOUNCE_LIGHT
#ifdef USE_SH_LIGHTMAPS
vec4 sh_accum[4] = vec4[](
vec4(0.0, 0.0, 0.0, 1.0),
vec4(0.0, 0.0, 0.0, 1.0),
vec4(0.0, 0.0, 0.0, 1.0),
vec4(0.0, 0.0, 0.0, 1.0));
#else
vec3 light_accum = vec3(0.0);
#endif
// Retrieve starting normal and position.
vec3 normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
if (length(normal) < 0.5) {
// The pixel is empty, skip processing it.
return;
}
vec3 position = texelFetch(sampler2DArray(source_position, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
uint noise = random_seed(ivec3(params.ray_from, atlas_pos));
for (uint i = params.ray_from; i < params.ray_to; i++) {
vec3 ray_dir = generate_ray_dir_from_normal(normal, noise);
vec3 light = trace_indirect_light(position, ray_dir, noise);
#ifdef USE_SH_LIGHTMAPS
float c[4] = float[](
0.282095, //l0
0.488603 * ray_dir.y, //l1n1
0.488603 * ray_dir.z, //l1n0
0.488603 * ray_dir.x //l1p1
);
for (uint j = 0; j < 4; j++) {
sh_accum[j].rgb += light * c[j] * 8.0;
}
#else
light_accum += light;
#endif
}
// Add the averaged result to the accumulated light texture.
#ifdef USE_SH_LIGHTMAPS
for (int i = 0; i < 4; i++) {
vec4 accum = imageLoad(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + i));
accum.rgb += sh_accum[i].rgb / float(params.ray_count);
imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice * 4 + i), accum);
}
#else
vec4 accum = imageLoad(accum_light, ivec3(atlas_pos, params.atlas_slice));
accum.rgb += light_accum / float(params.ray_count);
imageStore(accum_light, ivec3(atlas_pos, params.atlas_slice), accum);
#endif
#endif
#ifdef MODE_UNOCCLUDE
//texel_size = 0.5;
//compute tangents
vec4 position_alpha = imageLoad(position, ivec3(atlas_pos, params.atlas_slice));
if (position_alpha.a < 0.5) {
return;
}
vec3 vertex_pos = position_alpha.xyz;
vec4 normal_tsize = imageLoad(unocclude, ivec3(atlas_pos, params.atlas_slice));
vec3 face_normal = normal_tsize.xyz;
float texel_size = normal_tsize.w;
vec3 v0 = abs(face_normal.z) < 0.999 ? vec3(0.0, 0.0, 1.0) : vec3(0.0, 1.0, 0.0);
vec3 tangent = normalize(cross(v0, face_normal));
vec3 bitangent = normalize(cross(tangent, face_normal));
vec3 base_pos = vertex_pos + face_normal * bake_params.bias; // Raise a bit.
vec3 rays[4] = vec3[](tangent, bitangent, -tangent, -bitangent);
float min_d = 1e20;
for (int i = 0; i < 4; i++) {
vec3 ray_to = base_pos + rays[i] * texel_size;
float d;
vec3 norm;
if (trace_ray_closest_hit_distance(base_pos, ray_to, d, norm) == RAY_BACK) {
if (d < min_d) {
// This bias needs to be greater than the regular bias, because otherwise later, rays will go the other side when pointing back.
vertex_pos = base_pos + rays[i] * d + norm * bake_params.bias * 10.0;
min_d = d;
}
}
}
position_alpha.xyz = vertex_pos;
imageStore(position, ivec3(atlas_pos, params.atlas_slice), position_alpha);
#endif
#ifdef MODE_LIGHT_PROBES
vec3 position = probe_positions.data[probe_index].xyz;
vec4 probe_sh_accum[9] = vec4[](
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0),
vec4(0.0));
uint noise = random_seed(ivec3(params.ray_from, probe_index, 49502741 /* some prime */));
for (uint i = params.ray_from; i < params.ray_to; i++) {
vec3 ray_dir = generate_sphere_uniform_direction(noise);
vec3 light = trace_indirect_light(position, ray_dir, noise);
float c[9] = float[](
0.282095, //l0
0.488603 * ray_dir.y, //l1n1
0.488603 * ray_dir.z, //l1n0
0.488603 * ray_dir.x, //l1p1
1.092548 * ray_dir.x * ray_dir.y, //l2n2
1.092548 * ray_dir.y * ray_dir.z, //l2n1
//0.315392 * (ray_dir.x * ray_dir.x + ray_dir.y * ray_dir.y + 2.0 * ray_dir.z * ray_dir.z), //l20
0.315392 * (3.0 * ray_dir.z * ray_dir.z - 1.0), //l20
1.092548 * ray_dir.x * ray_dir.z, //l2p1
0.546274 * (ray_dir.x * ray_dir.x - ray_dir.y * ray_dir.y) //l2p2
);
for (uint j = 0; j < 9; j++) {
probe_sh_accum[j].rgb += light * c[j];
}
}
if (params.ray_from > 0) {
for (uint j = 0; j < 9; j++) { //accum from existing
probe_sh_accum[j] += light_probes.data[probe_index * 9 + j];
}
}
if (params.ray_to == params.ray_count) {
for (uint j = 0; j < 9; j++) { //accum from existing
probe_sh_accum[j] *= 4.0 / float(params.ray_count);
}
}
for (uint j = 0; j < 9; j++) { //accum from existing
light_probes.data[probe_index * 9 + j] = probe_sh_accum[j];
}
#endif
#ifdef MODE_DILATE
vec4 c = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0);
//sides first, as they are closer
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, 0), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(0, 1), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, 0), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(0, -1), params.atlas_slice), 0);
//endpoints second
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, -1), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, 1), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, -1), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, 1), params.atlas_slice), 0);
//far sides third
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, 0), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(0, 2), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, 0), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(0, -2), params.atlas_slice), 0);
//far-mid endpoints
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, -1), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, 1), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, -1), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, 1), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, -2), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-1, 2), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, -2), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(1, 2), params.atlas_slice), 0);
//far endpoints
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, -2), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(-2, 2), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, -2), params.atlas_slice), 0);
c = c.a > 0.5 ? c : texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos + ivec2(2, 2), params.atlas_slice), 0);
imageStore(dest_light, ivec3(atlas_pos, params.atlas_slice), c);
#endif
#ifdef MODE_DENOISE
// Joint Non-local means (JNLM) denoiser.
//
// Based on YoctoImageDenoiser's JNLM implementation with corrections from "Nonlinearly Weighted First-order Regression for Denoising Monte Carlo Renderings".
//
// <https://github.com/ManuelPrandini/YoctoImageDenoiser/blob/06e19489dd64e47792acffde536393802ba48607/libs/yocto_extension/yocto_extension.cpp#L207>
// <https://benedikt-bitterli.me/nfor/nfor.pdf>
//
// MIT License
//
// Copyright (c) 2020 ManuelPrandini
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in all
// copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
// SOFTWARE.
//
// Most of the constants below have been hand-picked to fit the common scenarios lightmaps
// are generated with, but they can be altered freely to experiment and achieve better results.
// Half the size of the patch window around each pixel that is weighted to compute the denoised pixel.
// A value of 1 represents a 3x3 window, a value of 2 a 5x5 window, etc.
const int HALF_PATCH_WINDOW = 4;
// Half the size of the search window around each pixel that is denoised and weighted to compute the denoised pixel.
const int HALF_SEARCH_WINDOW = 10;
// For all of the following sigma values, smaller values will give less weight to pixels that have a bigger distance
// in the feature being evaluated. Therefore, smaller values are likely to cause more noise to appear, but will also
// cause less features to be erased in the process.
// Controls how much the spatial distance of the pixels influences the denoising weight.
const float SIGMA_SPATIAL = denoise_params.spatial_bandwidth;
// Controls how much the light color distance of the pixels influences the denoising weight.
const float SIGMA_LIGHT = denoise_params.light_bandwidth;
// Controls how much the albedo color distance of the pixels influences the denoising weight.
const float SIGMA_ALBEDO = denoise_params.albedo_bandwidth;
// Controls how much the normal vector distance of the pixels influences the denoising weight.
const float SIGMA_NORMAL = denoise_params.normal_bandwidth;
// Strength of the filter. The original paper recommends values around 10 to 15 times the Sigma parameter.
const float FILTER_VALUE = denoise_params.filter_strength * SIGMA_LIGHT;
// Formula constants.
const int PATCH_WINDOW_DIMENSION = (HALF_PATCH_WINDOW * 2 + 1);
const int PATCH_WINDOW_DIMENSION_SQUARE = (PATCH_WINDOW_DIMENSION * PATCH_WINDOW_DIMENSION);
const float TWO_SIGMA_SPATIAL_SQUARE = 2.0f * SIGMA_SPATIAL * SIGMA_SPATIAL;
const float TWO_SIGMA_LIGHT_SQUARE = 2.0f * SIGMA_LIGHT * SIGMA_LIGHT;
const float TWO_SIGMA_ALBEDO_SQUARE = 2.0f * SIGMA_ALBEDO * SIGMA_ALBEDO;
const float TWO_SIGMA_NORMAL_SQUARE = 2.0f * SIGMA_NORMAL * SIGMA_NORMAL;
const float FILTER_SQUARE_TWO_SIGMA_LIGHT_SQUARE = FILTER_VALUE * FILTER_VALUE * TWO_SIGMA_LIGHT_SQUARE;
const float EPSILON = 1e-6f;
#ifdef USE_SH_LIGHTMAPS
const uint slice_count = 4;
const uint slice_base = params.atlas_slice * slice_count;
#else
const uint slice_count = 1;
const uint slice_base = params.atlas_slice;
#endif
for (uint i = 0; i < slice_count; i++) {
uint lightmap_slice = slice_base + i;
vec3 denoised_rgb = vec3(0.0f);
vec4 input_light = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(atlas_pos, lightmap_slice), 0);
vec3 input_albedo = texelFetch(sampler2DArray(albedo_tex, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).rgb;
vec3 input_normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(atlas_pos, params.atlas_slice), 0).xyz;
if (length(input_normal) > EPSILON) {
// Compute the denoised pixel if the normal is valid.
float sum_weights = 0.0f;
vec3 input_rgb = input_light.rgb;
for (int search_y = -HALF_SEARCH_WINDOW; search_y <= HALF_SEARCH_WINDOW; search_y++) {
for (int search_x = -HALF_SEARCH_WINDOW; search_x <= HALF_SEARCH_WINDOW; search_x++) {
ivec2 search_pos = atlas_pos + ivec2(search_x, search_y);
vec3 search_rgb = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(search_pos, lightmap_slice), 0).rgb;
vec3 search_albedo = texelFetch(sampler2DArray(albedo_tex, linear_sampler), ivec3(search_pos, params.atlas_slice), 0).rgb;
vec3 search_normal = texelFetch(sampler2DArray(source_normal, linear_sampler), ivec3(search_pos, params.atlas_slice), 0).xyz;
float patch_square_dist = 0.0f;
for (int offset_y = -HALF_PATCH_WINDOW; offset_y <= HALF_PATCH_WINDOW; offset_y++) {
for (int offset_x = -HALF_PATCH_WINDOW; offset_x <= HALF_PATCH_WINDOW; offset_x++) {
ivec2 offset_input_pos = atlas_pos + ivec2(offset_x, offset_y);
ivec2 offset_search_pos = search_pos + ivec2(offset_x, offset_y);
vec3 offset_input_rgb = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(offset_input_pos, lightmap_slice), 0).rgb;
vec3 offset_search_rgb = texelFetch(sampler2DArray(source_light, linear_sampler), ivec3(offset_search_pos, lightmap_slice), 0).rgb;
vec3 offset_delta_rgb = offset_input_rgb - offset_search_rgb;
patch_square_dist += dot(offset_delta_rgb, offset_delta_rgb) - TWO_SIGMA_LIGHT_SQUARE;
}
}
patch_square_dist = max(0.0f, patch_square_dist / (3.0f * PATCH_WINDOW_DIMENSION_SQUARE));
float weight = 1.0f;
// Ignore weight if search position is out of bounds.
weight *= step(0, search_pos.x) * step(search_pos.x, bake_params.atlas_size.x - 1);
weight *= step(0, search_pos.y) * step(search_pos.y, bake_params.atlas_size.y - 1);
// Ignore weight if normal is zero length.
weight *= step(EPSILON, length(search_normal));
// Weight with pixel distance.
vec2 pixel_delta = vec2(search_x, search_y);
float pixel_square_dist = dot(pixel_delta, pixel_delta);
weight *= exp(-pixel_square_dist / TWO_SIGMA_SPATIAL_SQUARE);
// Weight with patch.
weight *= exp(-patch_square_dist / FILTER_SQUARE_TWO_SIGMA_LIGHT_SQUARE);
// Weight with albedo.
vec3 albedo_delta = input_albedo - search_albedo;
float albedo_square_dist = dot(albedo_delta, albedo_delta);
weight *= exp(-albedo_square_dist / TWO_SIGMA_ALBEDO_SQUARE);
// Weight with normal.
vec3 normal_delta = input_normal - search_normal;
float normal_square_dist = dot(normal_delta, normal_delta);
weight *= exp(-normal_square_dist / TWO_SIGMA_NORMAL_SQUARE);
denoised_rgb += weight * search_rgb;
sum_weights += weight;
}
}
denoised_rgb /= sum_weights;
} else {
// Ignore pixels where the normal is empty, just copy the light color.
denoised_rgb = input_light.rgb;
}
imageStore(dest_light, ivec3(atlas_pos, lightmap_slice), vec4(denoised_rgb, input_light.a));
}
#endif
}