Shader optimizations to reduce VGPR usage and increase occupancy

This commit is contained in:
reduz 2021-01-08 13:03:09 -03:00 committed by Juan Linietsky
parent 53e234fe5f
commit 7008e3c6ea
5 changed files with 882 additions and 636 deletions

View file

@ -6062,22 +6062,17 @@ void RendererSceneRenderRD::_setup_reflections(const PagedArray<RID> &p_reflecti
reflection_ubo.box_offset[2] = origin_offset.z; reflection_ubo.box_offset[2] = origin_offset.z;
reflection_ubo.mask = storage->reflection_probe_get_cull_mask(base_probe); reflection_ubo.mask = storage->reflection_probe_get_cull_mask(base_probe);
float intensity = storage->reflection_probe_get_intensity(base_probe); reflection_ubo.intensity = storage->reflection_probe_get_intensity(base_probe);
bool interior = storage->reflection_probe_is_interior(base_probe); reflection_ubo.ambient_mode = storage->reflection_probe_get_ambient_mode(base_probe);
bool box_projection = storage->reflection_probe_is_box_projection(base_probe);
reflection_ubo.params[0] = intensity; reflection_ubo.exterior = !storage->reflection_probe_is_interior(base_probe);
reflection_ubo.params[1] = 0; reflection_ubo.box_project = storage->reflection_probe_is_box_projection(base_probe);
reflection_ubo.params[2] = interior ? 1.0 : 0.0;
reflection_ubo.params[3] = box_projection ? 1.0 : 0.0;
Color ambient_linear = storage->reflection_probe_get_ambient_color(base_probe).to_linear(); Color ambient_linear = storage->reflection_probe_get_ambient_color(base_probe).to_linear();
float interior_ambient_energy = storage->reflection_probe_get_ambient_color_energy(base_probe); float interior_ambient_energy = storage->reflection_probe_get_ambient_color_energy(base_probe);
uint32_t ambient_mode = storage->reflection_probe_get_ambient_mode(base_probe);
reflection_ubo.ambient[0] = ambient_linear.r * interior_ambient_energy; reflection_ubo.ambient[0] = ambient_linear.r * interior_ambient_energy;
reflection_ubo.ambient[1] = ambient_linear.g * interior_ambient_energy; reflection_ubo.ambient[1] = ambient_linear.g * interior_ambient_energy;
reflection_ubo.ambient[2] = ambient_linear.b * interior_ambient_energy; reflection_ubo.ambient[2] = ambient_linear.b * interior_ambient_energy;
reflection_ubo.ambient_mode = ambient_mode;
Transform transform = reflection_probe_instance_get_transform(rpi); Transform transform = reflection_probe_instance_get_transform(rpi);
Transform proj = (p_camera_inverse_transform * transform).inverse(); Transform proj = (p_camera_inverse_transform * transform).inverse();
@ -6300,13 +6295,14 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const
float sign = storage->light_is_negative(base) ? -1 : 1; float sign = storage->light_is_negative(base) ? -1 : 1;
Color linear_col = storage->light_get_color(base).to_linear(); Color linear_col = storage->light_get_color(base).to_linear();
light_data.attenuation_energy[0] = Math::make_half_float(storage->light_get_param(base, RS::LIGHT_PARAM_ATTENUATION)); light_data.attenuation = storage->light_get_param(base, RS::LIGHT_PARAM_ATTENUATION);
light_data.attenuation_energy[1] = Math::make_half_float(sign * storage->light_get_param(base, RS::LIGHT_PARAM_ENERGY) * Math_PI);
light_data.color_specular[0] = MIN(uint32_t(linear_col.r * 255), 255); float energy = sign * storage->light_get_param(base, RS::LIGHT_PARAM_ENERGY) * Math_PI;
light_data.color_specular[1] = MIN(uint32_t(linear_col.g * 255), 255);
light_data.color_specular[2] = MIN(uint32_t(linear_col.b * 255), 255); light_data.color[0] = linear_col.r * energy;
light_data.color_specular[3] = MIN(uint32_t(storage->light_get_param(base, RS::LIGHT_PARAM_SPECULAR) * 255), 255); light_data.color[1] = linear_col.g * energy;
light_data.color[2] = linear_col.b * energy;
light_data.specular_amount = storage->light_get_param(base, RS::LIGHT_PARAM_SPECULAR);
float radius = MAX(0.001, storage->light_get_param(base, RS::LIGHT_PARAM_RANGE)); float radius = MAX(0.001, storage->light_get_param(base, RS::LIGHT_PARAM_RANGE));
light_data.inv_radius = 1.0 / radius; light_data.inv_radius = 1.0 / radius;
@ -6327,9 +6323,9 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const
light_data.size = size; light_data.size = size;
light_data.cone_attenuation_angle[0] = Math::make_half_float(storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ATTENUATION)); light_data.cone_attenuation = storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ATTENUATION);
float spot_angle = storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ANGLE); float spot_angle = storage->light_get_param(base, RS::LIGHT_PARAM_SPOT_ANGLE);
light_data.cone_attenuation_angle[1] = Math::make_half_float(Math::cos(Math::deg2rad(spot_angle))); light_data.cone_angle = Math::cos(Math::deg2rad(spot_angle));
light_data.mask = storage->light_get_cull_mask(base); light_data.mask = storage->light_get_cull_mask(base);
@ -6364,12 +6360,7 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const
if (p_using_shadows && p_shadow_atlas.is_valid() && shadow_atlas_owns_light_instance(p_shadow_atlas, li)) { if (p_using_shadows && p_shadow_atlas.is_valid() && shadow_atlas_owns_light_instance(p_shadow_atlas, li)) {
// fill in the shadow information // fill in the shadow information
Color shadow_color = storage->light_get_shadow_color(base); light_data.shadow_enabled = true;
light_data.shadow_color_enabled[0] = MIN(uint32_t(shadow_color.r * 255), 255);
light_data.shadow_color_enabled[1] = MIN(uint32_t(shadow_color.g * 255), 255);
light_data.shadow_color_enabled[2] = MIN(uint32_t(shadow_color.b * 255), 255);
light_data.shadow_color_enabled[3] = 255;
if (type == RS::LIGHT_SPOT) { if (type == RS::LIGHT_SPOT) {
light_data.shadow_bias = (storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0); light_data.shadow_bias = (storage->light_get_param(base, RS::LIGHT_PARAM_SHADOW_BIAS) * radius / 10.0);
@ -6427,7 +6418,7 @@ void RendererSceneRenderRD::_setup_lights(const PagedArray<RID> &p_lights, const
} }
} }
} else { } else {
light_data.shadow_color_enabled[3] = 0; light_data.shadow_enabled = false;
} }
light_instance_set_index(li, light_count); light_instance_set_index(li, light_count);
@ -6763,7 +6754,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e
cluster.lights_shadow_rect_cache_count = 0; cluster.lights_shadow_rect_cache_count = 0;
for (int i = 0; i < p_positional_light_count; i++) { for (int i = 0; i < p_positional_light_count; i++) {
if (cluster.lights[i].shadow_color_enabled[3] > 127) { if (cluster.lights[i].shadow_enabled != 0) {
RID li = cluster.lights_instances[i]; RID li = cluster.lights_instances[i];
ERR_CONTINUE(!shadow_atlas->shadow_owners.has(li)); ERR_CONTINUE(!shadow_atlas->shadow_owners.has(li));
@ -8499,7 +8490,6 @@ RendererSceneRenderRD::RendererSceneRenderRD(RendererStorageRD *p_storage) {
{ //reflections { //reflections
uint32_t reflection_buffer_size; uint32_t reflection_buffer_size;
if (uniform_max_size < 65536) { if (uniform_max_size < 65536) {
//Yes, you guessed right, ARM again
reflection_buffer_size = uniform_max_size; reflection_buffer_size = uniform_max_size;
} else { } else {
reflection_buffer_size = 65536; reflection_buffer_size = 65536;

View file

@ -1297,14 +1297,23 @@ private:
struct Cluster { struct Cluster {
/* Scene State UBO */ /* Scene State UBO */
struct ReflectionData { //should always be 128 bytes enum {
REFLECTION_AMBIENT_DISABLED = 0,
REFLECTION_AMBIENT_ENVIRONMENT = 1,
REFLECTION_AMBIENT_COLOR = 2,
};
struct ReflectionData {
float box_extents[3]; float box_extents[3];
float index; float index;
float box_offset[3]; float box_offset[3];
uint32_t mask; uint32_t mask;
float params[4]; // intensity, 0, interior , boxproject
float ambient[3]; // ambient color, float ambient[3]; // ambient color,
float intensity;
bool exterior;
bool box_project;
uint32_t ambient_mode; uint32_t ambient_mode;
uint32_t pad;
float local_matrix[16]; // up to here for spot and omni, rest is for directional float local_matrix[16]; // up to here for spot and omni, rest is for directional
}; };
@ -1313,10 +1322,15 @@ private:
float inv_radius; float inv_radius;
float direction[3]; float direction[3];
float size; float size;
uint16_t attenuation_energy[2]; //16 bits attenuation, then energy
uint8_t color_specular[4]; //rgb color, a specular (8 bit unorm) float color[3];
uint16_t cone_attenuation_angle[2]; // attenuation and angle, (16bit float) float attenuation;
uint8_t shadow_color_enabled[4]; //shadow rgb color, a>0.5 enabled (8bit unorm)
float cone_attenuation;
float cone_angle;
float specular_amount;
uint32_t shadow_enabled;
float atlas_rect[4]; // in omni, used for atlas uv, in spot, used for projector uv float atlas_rect[4]; // in omni, used for atlas uv, in spot, used for projector uv
float shadow_matrix[16]; float shadow_matrix[16];
float shadow_bias; float shadow_bias;

View file

@ -6,12 +6,18 @@
struct LightData { //this structure needs to be as packed as possible struct LightData { //this structure needs to be as packed as possible
vec3 position; vec3 position;
float inv_radius; float inv_radius;
vec3 direction; vec3 direction;
float size; float size;
uint attenuation_energy; //attenuation
uint color_specular; //rgb color, a specular (8 bit unorm) vec3 color;
uint cone_attenuation_angle; // attenuation and angle, (16bit float) float attenuation;
uint shadow_color_enabled; //shadow rgb color, a>0.5 enabled (8bit unorm)
float cone_attenuation;
float cone_angle;
float specular_amount;
bool shadow_enabled;
vec4 atlas_rect; // rect in the shadow atlas vec4 atlas_rect; // rect in the shadow atlas
mat4 shadow_matrix; mat4 shadow_matrix;
float shadow_bias; float shadow_bias;
@ -34,9 +40,13 @@ struct ReflectionData {
float index; float index;
vec3 box_offset; vec3 box_offset;
uint mask; uint mask;
vec4 params; // intensity, 0, interior , boxproject
vec3 ambient; // ambient color vec3 ambient; // ambient color
float intensity;
bool exterior;
bool box_project;
uint ambient_mode; uint ambient_mode;
uint pad;
//0-8 is intensity,8-9 is ambient, mode
mat4 local_matrix; // up to here for spot and omni, rest is for directional mat4 local_matrix; // up to here for spot and omni, rest is for directional
// notes: for ambientblend, use distance to edge to blend between already existing global environment // notes: for ambientblend, use distance to edge to blend between already existing global environment
}; };

File diff suppressed because it is too large Load diff

View file

@ -280,19 +280,14 @@ void main() {
vec3 light_pos = lights.data[i].position; vec3 light_pos = lights.data[i].position;
float d = distance(lights.data[i].position, view_pos); float d = distance(lights.data[i].position, view_pos);
vec3 shadow_attenuation = vec3(1.0); float shadow_attenuation = 1.0;
if (d * lights.data[i].inv_radius < 1.0) { if (d * lights.data[i].inv_radius < 1.0) {
vec2 attenuation_energy = unpackHalf2x16(lights.data[i].attenuation_energy); float attenuation = get_omni_attenuation(d, lights.data[i].inv_radius, lights.data[i].attenuation);
vec4 color_specular = unpackUnorm4x8(lights.data[i].color_specular);
float attenuation = get_omni_attenuation(d, lights.data[i].inv_radius, attenuation_energy.x); vec3 light = lights.data[i].color / M_PI;
vec3 light = attenuation_energy.y * color_specular.rgb / M_PI; if (lights.data[i].shadow_enabled) {
vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[i].shadow_color_enabled);
if (shadow_color_enabled.a > 0.5) {
//has shadow //has shadow
vec4 v = vec4(view_pos, 1.0); vec4 v = vec4(view_pos, 1.0);
@ -319,9 +314,8 @@ void main() {
splane.w = 1.0; //needed? i think it should be 1 already splane.w = 1.0; //needed? i think it should be 1 already
float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r;
float shadow = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade);
shadow_attenuation = mix(shadow_color_enabled.rgb, vec3(1.0), shadow); shadow_attenuation = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade);
} }
total_light += light * attenuation * shadow_attenuation; total_light += light * attenuation * shadow_attenuation;
} }
@ -336,25 +330,19 @@ void main() {
vec3 light_pos = lights.data[i].position; vec3 light_pos = lights.data[i].position;
vec3 light_rel_vec = lights.data[i].position - view_pos; vec3 light_rel_vec = lights.data[i].position - view_pos;
float d = length(light_rel_vec); float d = length(light_rel_vec);
vec3 shadow_attenuation = vec3(1.0); float shadow_attenuation = 1.0;
if (d * lights.data[i].inv_radius < 1.0) { if (d * lights.data[i].inv_radius < 1.0) {
vec2 attenuation_energy = unpackHalf2x16(lights.data[i].attenuation_energy); float attenuation = get_omni_attenuation(d, lights.data[i].inv_radius, lights.data[i].attenuation);
vec4 color_specular = unpackUnorm4x8(lights.data[i].color_specular);
float attenuation = get_omni_attenuation(d, lights.data[i].inv_radius, attenuation_energy.x);
vec3 spot_dir = lights.data[i].direction; vec3 spot_dir = lights.data[i].direction;
vec2 spot_att_angle = unpackHalf2x16(lights.data[i].cone_attenuation_angle); float scos = max(dot(-normalize(light_rel_vec), spot_dir), lights.data[i].cone_angle);
float scos = max(dot(-normalize(light_rel_vec), spot_dir), spot_att_angle.y); float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - lights.data[i].cone_angle));
float spot_rim = max(0.0001, (1.0 - scos) / (1.0 - spot_att_angle.y)); attenuation *= 1.0 - pow(spot_rim, lights.data[i].cone_attenuation);
attenuation *= 1.0 - pow(spot_rim, spot_att_angle.x);
vec3 light = attenuation_energy.y * color_specular.rgb / M_PI; vec3 light = lights.data[i].color / M_PI;
vec4 shadow_color_enabled = unpackUnorm4x8(lights.data[i].shadow_color_enabled); if (lights.data[i].shadow_enabled) {
if (shadow_color_enabled.a > 0.5) {
//has shadow //has shadow
vec4 v = vec4(view_pos, 1.0); vec4 v = vec4(view_pos, 1.0);
@ -362,9 +350,8 @@ void main() {
splane /= splane.w; splane /= splane.w;
float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r; float depth = texture(sampler2D(shadow_atlas, linear_sampler), splane.xy).r;
float shadow = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade);
shadow_attenuation = mix(shadow_color_enabled.rgb, vec3(1.0), shadow); shadow_attenuation = exp(min(0.0, (depth - splane.z)) / lights.data[i].inv_radius * lights.data[i].shadow_volumetric_fog_fade);
} }
total_light += light * attenuation * shadow_attenuation; total_light += light * attenuation * shadow_attenuation;