From 4df39dc14036176d00ffc1a191140e05d9a1982a Mon Sep 17 00:00:00 2001 From: joined72 Date: Sat, 24 Feb 2024 18:42:17 +0100 Subject: [PATCH] Fix CPU/GPUParticles2D bugs on Compatibility Rendering (GLES3) on Adreno 3XX devices. --- drivers/gles3/shaders/canvas.glsl | 16 +++-- drivers/gles3/shaders/particles.glsl | 3 +- drivers/gles3/shaders/particles_copy.glsl | 69 ++++++++++----------- drivers/gles3/shaders/scene.glsl | 14 +++-- drivers/gles3/shaders/stdlib_inc.glsl | 33 ++++++---- drivers/gles3/storage/particles_storage.cpp | 8 +++ 6 files changed, 84 insertions(+), 59 deletions(-) diff --git a/drivers/gles3/shaders/canvas.glsl b/drivers/gles3/shaders/canvas.glsl index 8da7d7dc8078..efddbe9ad211 100644 --- a/drivers/gles3/shaders/canvas.glsl +++ b/drivers/gles3/shaders/canvas.glsl @@ -160,15 +160,18 @@ void main() { if (gl_VertexID % 3 == 0) { vertex = read_draw_data_point_a; uv = read_draw_data_uv_a; - color = vec4(unpackHalf2x16(read_draw_data_color_a_rg), unpackHalf2x16(read_draw_data_color_a_ba)); + color.xy = unpackHalf2x16(read_draw_data_color_a_rg); + color.zw = unpackHalf2x16(read_draw_data_color_a_ba); } else if (gl_VertexID % 3 == 1) { vertex = read_draw_data_point_b; uv = read_draw_data_uv_b; - color = vec4(unpackHalf2x16(read_draw_data_color_b_rg), unpackHalf2x16(read_draw_data_color_b_ba)); + color.xy = unpackHalf2x16(read_draw_data_color_b_rg); + color.zw = unpackHalf2x16(read_draw_data_color_b_ba); } else { vertex = read_draw_data_point_c; uv = read_draw_data_uv_c; - color = vec4(unpackHalf2x16(read_draw_data_color_c_rg), unpackHalf2x16(read_draw_data_color_c_ba)); + color.xy = unpackHalf2x16(read_draw_data_color_c_rg); + color.zw = unpackHalf2x16(read_draw_data_color_c_ba); } #elif defined(USE_ATTRIBUTES) @@ -178,11 +181,14 @@ void main() { #ifdef USE_INSTANCING if (bool(read_draw_data_flags & FLAGS_INSTANCING_HAS_COLORS)) { - vec4 instance_color = vec4(unpackHalf2x16(instance_color_custom_data.x), unpackHalf2x16(instance_color_custom_data.y)); + vec4 instance_color; + instance_color.xy = unpackHalf2x16(uint(instance_color_custom_data.x)); + instance_color.zw = unpackHalf2x16(uint(instance_color_custom_data.y)); color *= instance_color; } if (bool(read_draw_data_flags & FLAGS_INSTANCING_HAS_CUSTOM_DATA)) { - instance_custom = vec4(unpackHalf2x16(instance_color_custom_data.z), unpackHalf2x16(instance_color_custom_data.w)); + instance_custom.xy = unpackHalf2x16(instance_color_custom_data.z); + instance_custom.zw = unpackHalf2x16(instance_color_custom_data.w); } #endif // !USE_INSTANCING diff --git a/drivers/gles3/shaders/particles.glsl b/drivers/gles3/shaders/particles.glsl index 64ef26b0756c..1472761fe6f7 100644 --- a/drivers/gles3/shaders/particles.glsl +++ b/drivers/gles3/shaders/particles.glsl @@ -321,7 +321,8 @@ void main() { amount = max(0.0, 1.0 - d); } else if (attractors[i].type == ATTRACTOR_TYPE_VECTOR_FIELD) { } - amount = pow(amount, attractors[i].attenuation); + mediump float attractor_attenuation = attractors[i].attenuation; + amount = pow(amount, attractor_attenuation); dir = safe_normalize(mix(dir, attractors[i].transform[2].xyz, attractors[i].directionality)); attractor_force -= amount * dir * attractors[i].strength; } diff --git a/drivers/gles3/shaders/particles_copy.glsl b/drivers/gles3/shaders/particles_copy.glsl index 0bb8efc52de7..55b5e6d7ce9f 100644 --- a/drivers/gles3/shaders/particles_copy.glsl +++ b/drivers/gles3/shaders/particles_copy.glsl @@ -57,45 +57,39 @@ void main() { txform = transpose(mat4(xform_1, xform_2, vec4(0.0, 0.0, 1.0, 0.0), vec4(0.0, 0.0, 0.0, 1.0))); #endif - switch (align_mode) { - case TRANSFORM_ALIGN_DISABLED: { - } break; //nothing - case TRANSFORM_ALIGN_Z_BILLBOARD: { - mat3 local = mat3(normalize(cross(align_up, sort_direction)), align_up, sort_direction); - local = local * mat3(txform); - txform[0].xyz = local[0]; - txform[1].xyz = local[1]; - txform[2].xyz = local[2]; + if (align_mode == TRANSFORM_ALIGN_DISABLED) { + // nothing + } else if (align_mode == TRANSFORM_ALIGN_Z_BILLBOARD) { + mat3 local = mat3(normalize(cross(align_up, sort_direction)), align_up, sort_direction); + local = local * mat3(txform); + txform[0].xyz = local[0]; + txform[1].xyz = local[1]; + txform[2].xyz = local[2]; + } else if (align_mode == TRANSFORM_ALIGN_Y_TO_VELOCITY) { + vec3 v = velocity_flags.xyz; + float s = (length(txform[0]) + length(txform[1]) + length(txform[2])) / 3.0; + if (length(v) > 0.0) { + txform[1].xyz = normalize(v); + } else { + txform[1].xyz = normalize(txform[1].xyz); + } - } break; - case TRANSFORM_ALIGN_Y_TO_VELOCITY: { - vec3 v = velocity_flags.xyz; - float s = (length(txform[0]) + length(txform[1]) + length(txform[2])) / 3.0; - if (length(v) > 0.0) { - txform[1].xyz = normalize(v); - } else { - txform[1].xyz = normalize(txform[1].xyz); - } + txform[0].xyz = normalize(cross(txform[1].xyz, txform[2].xyz)); + txform[2].xyz = vec3(0.0, 0.0, 1.0) * s; + txform[0].xyz *= s; + txform[1].xyz *= s; + } else if (align_mode == TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY) { + vec3 sv = velocity_flags.xyz - sort_direction * dot(sort_direction, velocity_flags.xyz); //screen velocity - txform[0].xyz = normalize(cross(txform[1].xyz, txform[2].xyz)); - txform[2].xyz = vec3(0.0, 0.0, 1.0) * s; - txform[0].xyz *= s; - txform[1].xyz *= s; - } break; - case TRANSFORM_ALIGN_Z_BILLBOARD_Y_TO_VELOCITY: { - vec3 sv = velocity_flags.xyz - sort_direction * dot(sort_direction, velocity_flags.xyz); //screen velocity + if (length(sv) == 0.0) { + sv = align_up; + } - if (length(sv) == 0.0) { - sv = align_up; - } + sv = normalize(sv); - sv = normalize(sv); - - txform[0].xyz = normalize(cross(sv, sort_direction)) * length(txform[0]); - txform[1].xyz = sv * length(txform[1]); - txform[2].xyz = sort_direction * length(txform[2]); - - } break; + txform[0].xyz = normalize(cross(sv, sort_direction)) * length(txform[0]); + txform[1].xyz = sv * length(txform[1]); + txform[2].xyz = sort_direction * length(txform[2]); } txform[3].xyz += velocity_flags.xyz * frame_remainder; @@ -108,7 +102,10 @@ void main() { } txform = transpose(txform); - instance_color_custom_data = uvec4(packHalf2x16(color.xy), packHalf2x16(color.zw), packHalf2x16(custom.xy), packHalf2x16(custom.zw)); + instance_color_custom_data.x = packHalf2x16(color.xy); + instance_color_custom_data.y = packHalf2x16(color.zw); + instance_color_custom_data.z = packHalf2x16(custom.xy); + instance_color_custom_data.w = packHalf2x16(custom.zw); out_xform_1 = txform[0]; out_xform_2 = txform[1]; #ifdef MODE_3D diff --git a/drivers/gles3/shaders/scene.glsl b/drivers/gles3/shaders/scene.glsl index a6db90c3f543..801811d1cd8a 100644 --- a/drivers/gles3/shaders/scene.glsl +++ b/drivers/gles3/shaders/scene.glsl @@ -366,7 +366,9 @@ void main() { #if defined(COLOR_USED) color_interp = color_attrib; #ifdef USE_INSTANCING - vec4 instance_color = vec4(unpackHalf2x16(instance_color_custom_data.x), unpackHalf2x16(instance_color_custom_data.y)); + vec4 instance_color; + instance_color.xy = unpackHalf2x16(instance_color_custom_data.x); + instance_color.zw = unpackHalf2x16(instance_color_custom_data.y); color_interp *= instance_color; #endif #endif @@ -403,7 +405,9 @@ void main() { #endif //USE_MULTIVIEW #ifdef USE_INSTANCING - vec4 instance_custom = vec4(unpackHalf2x16(instance_color_custom_data.z), unpackHalf2x16(instance_color_custom_data.w)); + vec4 instance_custom; + instance_custom.xy = unpackHalf2x16(instance_color_custom_data.z); + instance_custom.zw = unpackHalf2x16(instance_color_custom_data.w); #else vec4 instance_custom = vec4(0.0); #endif @@ -1749,7 +1753,8 @@ void main() { #endif //!MODE_UNSHADED #ifndef FOG_DISABLED - fog = vec4(unpackHalf2x16(fog_rg), unpackHalf2x16(fog_ba)); + fog.xy = unpackHalf2x16(fog_rg); + fog.zw = unpackHalf2x16(fog_ba); #ifndef DISABLE_FOG if (scene_data.fog_enabled) { @@ -1966,7 +1971,8 @@ void main() { vec3 additive_light_color = diffuse_light + specular_light; #ifndef FOG_DISABLED - fog = vec4(unpackHalf2x16(fog_rg), unpackHalf2x16(fog_ba)); + fog.xy = unpackHalf2x16(fog_rg); + fog.zw = unpackHalf2x16(fog_ba); #ifndef DISABLE_FOG if (scene_data.fog_enabled) { diff --git a/drivers/gles3/shaders/stdlib_inc.glsl b/drivers/gles3/shaders/stdlib_inc.glsl index 92bf2d87e4c7..029084c34c6c 100644 --- a/drivers/gles3/shaders/stdlib_inc.glsl +++ b/drivers/gles3/shaders/stdlib_inc.glsl @@ -1,5 +1,12 @@ -#ifdef USE_GLES_OVER_GL +// Compatibility renames. These are exposed with the "godot_" prefix +// to work around two distinct Adreno bugs: +// 1. Some Adreno devices expose ES310 functions in ES300 shaders. +// Internally, we must use the "godot_" prefix, but user shaders +// will be mapped automatically. +// 2. Adreno 3XX devices have poor implementations of the other packing +// functions, so we just use our own everywhere to keep it simple. + // Floating point pack/unpack functions are part of the GLSL ES 300 specification used by web and mobile. uint float2half(uint f) { uint e = f & uint(0x7f800000); @@ -17,40 +24,34 @@ uint half2float(uint h) { return ((h & uint(0x8000)) << uint(16)) | uint((h_e >> uint(10)) != uint(0)) * (((h_e + uint(0x1c000)) << uint(13)) | ((h & uint(0x03ff)) << uint(13))); } -uint packHalf2x16(vec2 v) { +uint godot_packHalf2x16(vec2 v) { return float2half(floatBitsToUint(v.x)) | float2half(floatBitsToUint(v.y)) << uint(16); } -vec2 unpackHalf2x16(uint v) { +vec2 godot_unpackHalf2x16(uint v) { return vec2(uintBitsToFloat(half2float(v & uint(0xffff))), uintBitsToFloat(half2float(v >> uint(16)))); } -uint packUnorm2x16(vec2 v) { +uint godot_packUnorm2x16(vec2 v) { uvec2 uv = uvec2(round(clamp(v, vec2(0.0), vec2(1.0)) * 65535.0)); return uv.x | uv.y << uint(16); } -vec2 unpackUnorm2x16(uint p) { +vec2 godot_unpackUnorm2x16(uint p) { return vec2(float(p & uint(0xffff)), float(p >> uint(16))) * 0.000015259021; // 1.0 / 65535.0 optimization } -uint packSnorm2x16(vec2 v) { +uint godot_packSnorm2x16(vec2 v) { uvec2 uv = uvec2(round(clamp(v, vec2(-1.0), vec2(1.0)) * 32767.0) + 32767.0); return uv.x | uv.y << uint(16); } -vec2 unpackSnorm2x16(uint p) { +vec2 godot_unpackSnorm2x16(uint p) { vec2 v = vec2(float(p & uint(0xffff)), float(p >> uint(16))); return clamp((v - 32767.0) * vec2(0.00003051851), vec2(-1.0), vec2(1.0)); } -#endif - -// Compatibility renames. These are exposed with the "godot_" prefix -// to work around an Adreno bug which was exposing these ES310 functions -// in ES300 shaders. Internally, we must use the "godot_" prefix, but user shaders -// will be mapped automatically. uint godot_packUnorm4x8(vec4 v) { uvec4 uv = uvec4(round(clamp(v, vec4(0.0), vec4(1.0)) * 255.0)); return uv.x | (uv.y << uint(8)) | (uv.z << uint(16)) | (uv.w << uint(24)); @@ -74,3 +75,9 @@ vec4 godot_unpackSnorm4x8(uint p) { #define unpackUnorm4x8 godot_unpackUnorm4x8 #define packSnorm4x8 godot_packSnorm4x8 #define unpackSnorm4x8 godot_unpackSnorm4x8 +#define packHalf2x16 godot_packHalf2x16 +#define unpackHalf2x16 godot_unpackHalf2x16 +#define packUnorm2x16 godot_packUnorm2x16 +#define unpackUnorm2x16 godot_unpackUnorm2x16 +#define packSnorm2x16 godot_packSnorm2x16 +#define unpackSnorm2x16 godot_unpackSnorm2x16 diff --git a/drivers/gles3/storage/particles_storage.cpp b/drivers/gles3/storage/particles_storage.cpp index e263acf88b1f..8a97ee9defe5 100644 --- a/drivers/gles3/storage/particles_storage.cpp +++ b/drivers/gles3/storage/particles_storage.cpp @@ -31,6 +31,8 @@ #ifdef GLES3_ENABLED #include "particles_storage.h" + +#include "config.h" #include "material_storage.h" #include "mesh_storage.h" #include "texture_storage.h" @@ -120,6 +122,8 @@ void ParticlesStorage::particles_set_mode(RID p_particles, RS::ParticlesMode p_m } void ParticlesStorage::particles_set_emitting(RID p_particles, bool p_emitting) { + ERR_FAIL_COND_MSG(GLES3::Config::get_singleton()->adreno_3xx_compatibility, "Due to driver bugs, GPUParticles are not supported on Adreno 3XX devices. Please use CPUParticles instead."); + Particles *particles = particles_owner.get_or_null(p_particles); ERR_FAIL_NULL(particles); @@ -127,6 +131,10 @@ void ParticlesStorage::particles_set_emitting(RID p_particles, bool p_emitting) } bool ParticlesStorage::particles_get_emitting(RID p_particles) { + if (GLES3::Config::get_singleton()->adreno_3xx_compatibility) { + return false; + } + ERR_FAIL_COND_V_MSG(RSG::threaded, false, "This function should never be used with threaded rendering, as it stalls the renderer."); Particles *particles = particles_owner.get_or_null(p_particles); ERR_FAIL_NULL_V(particles, false);