From ba832d83b2dafcdbb79b93b1e97fd518c12b97bf Mon Sep 17 00:00:00 2001 From: jfons Date: Mon, 4 Apr 2022 16:10:22 +0200 Subject: [PATCH] Initial TAA implementation Initial TAA support based on the implementation in Spartan Engine. Motion vectors are correctly generated for camera and mesh movement, but there is no support for other things like particles or skeleton deformations. --- COPYRIGHT.txt | 5 + core/math/camera_matrix.cpp | 5 + core/math/camera_matrix.h | 1 + doc/classes/ProjectSettings.xml | 4 + doc/classes/RenderingServer.xml | 10 + doc/classes/Viewport.xml | 6 + drivers/gles3/rasterizer_scene_gles3.cpp | 4 +- drivers/gles3/rasterizer_scene_gles3.h | 4 +- editor/plugins/node_3d_editor_plugin.cpp | 9 +- editor/plugins/node_3d_editor_plugin.h | 1 + scene/main/scene_tree.cpp | 3 + scene/main/viewport.cpp | 17 + scene/main/viewport.h | 5 + .../rendering/dummy/rasterizer_scene_dummy.h | 4 +- servers/rendering/renderer_rd/effects_rd.cpp | 40 +- servers/rendering/renderer_rd/effects_rd.h | 16 + .../render_forward_clustered.cpp | 183 +++++--- .../render_forward_clustered.h | 21 +- .../scene_shader_forward_clustered.cpp | 29 +- .../scene_shader_forward_clustered.h | 6 +- .../forward_mobile/render_forward_mobile.cpp | 6 +- .../forward_mobile/render_forward_mobile.h | 3 +- .../renderer_rd/renderer_scene_render_rd.cpp | 81 +++- .../renderer_rd/renderer_scene_render_rd.h | 26 +- .../shaders/scene_forward_clustered.glsl | 102 +++-- .../shaders/scene_forward_clustered_inc.glsl | 12 +- .../shaders/scene_forward_lights_inc.glsl | 26 +- .../shaders/scene_forward_mobile.glsl | 29 +- .../shaders/scene_forward_mobile_inc.glsl | 8 +- .../renderer_rd/shaders/taa_resolve.glsl | 393 ++++++++++++++++++ servers/rendering/renderer_scene.h | 4 +- servers/rendering/renderer_scene_cull.cpp | 38 +- servers/rendering/renderer_scene_cull.h | 7 +- servers/rendering/renderer_scene_render.cpp | 3 +- servers/rendering/renderer_scene_render.h | 7 +- servers/rendering/renderer_viewport.cpp | 31 +- servers/rendering/renderer_viewport.h | 9 + servers/rendering/rendering_server_default.h | 1 + servers/rendering_server.cpp | 2 + servers/rendering_server.h | 5 +- 40 files changed, 990 insertions(+), 176 deletions(-) create mode 100644 servers/rendering/renderer_rd/shaders/taa_resolve.glsl diff --git a/COPYRIGHT.txt b/COPYRIGHT.txt index 06a5643dd30b..277ced8c3d46 100644 --- a/COPYRIGHT.txt +++ b/COPYRIGHT.txt @@ -131,6 +131,11 @@ Comment: Intel ASSAO and related files Copyright: 2016, Intel Corporation License: Expat +Files: ./servers/rendering/renderer_rd/shaders/taa_resolve.glsl +Comment: Temporal Anti-Aliasing reslove implementation +Copyright: 2016, Panos Karabelas +License: Expat + Files: ./thirdparty/amd-fsr/ Comment: AMD FidelityFX Super Resolution Copyright: 2021, Advanced Micro Devices, Inc. diff --git a/core/math/camera_matrix.cpp b/core/math/camera_matrix.cpp index 9443addd223a..57c53b0adb04 100644 --- a/core/math/camera_matrix.cpp +++ b/core/math/camera_matrix.cpp @@ -710,6 +710,11 @@ void CameraMatrix::scale_translate_to_fit(const AABB &p_aabb) { matrix[3][3] = 1; } +void CameraMatrix::add_jitter_offset(const Vector2 &p_offset) { + matrix[3][0] += p_offset.x; + matrix[3][1] += p_offset.y; +} + CameraMatrix::operator Transform3D() const { Transform3D tr; const real_t *m = &matrix[0][0]; diff --git a/core/math/camera_matrix.h b/core/math/camera_matrix.h index f1aea5e4e876..a4051cee3b00 100644 --- a/core/math/camera_matrix.h +++ b/core/math/camera_matrix.h @@ -95,6 +95,7 @@ struct CameraMatrix { operator String() const; void scale_translate_to_fit(const AABB &p_aabb); + void add_jitter_offset(const Vector2 &p_offset); void make_scale(const Vector3 &p_scale); int get_pixels_per_meter(int p_for_pixel_width) const; operator Transform3D() const; diff --git a/doc/classes/ProjectSettings.xml b/doc/classes/ProjectSettings.xml index 98205e0e1653..0293769a1f5d 100644 --- a/doc/classes/ProjectSettings.xml +++ b/doc/classes/ProjectSettings.xml @@ -1584,6 +1584,10 @@ + + Enables Temporal Anti-Aliasing for the default screen [Viewport]. TAA works by jittering the camera and accumulating the images of the last rendered frames, motion vector rendering is used to account for camera and object motion. + [b]Note:[/b] The implementation is not complete yet, some visual instances such as particles and skinned meshes may show artifacts. + diff --git a/doc/classes/RenderingServer.xml b/doc/classes/RenderingServer.xml index 4d039227cef6..841e792d3132 100644 --- a/doc/classes/RenderingServer.xml +++ b/doc/classes/RenderingServer.xml @@ -3340,6 +3340,14 @@ + + + + + + If [code]true[/code], use Temporal Anti-Aliasing. + + @@ -4105,6 +4113,8 @@ + + diff --git a/doc/classes/Viewport.xml b/doc/classes/Viewport.xml index def99b8a7aba..4727bc389e02 100644 --- a/doc/classes/Viewport.xml +++ b/doc/classes/Viewport.xml @@ -279,6 +279,10 @@ If [code]true[/code], [OccluderInstance3D] nodes will be usable for occlusion culling in 3D for this viewport. For the root viewport, [member ProjectSettings.rendering/occlusion_culling/use_occlusion_culling] must be set to [code]true[/code] instead. [b]Note:[/b] Enabling occlusion culling has a cost on the CPU. Only enable occlusion culling if you actually plan to use it, and think whether your scene can actually benefit from occlusion culling. Large, open scenes with few or no objects blocking the view will generally not benefit much from occlusion culling. Large open scenes generally benefit more from mesh LOD and visibility ranges ([member GeometryInstance3D.visibility_range_begin] and [member GeometryInstance3D.visibility_range_end]) compared to occlusion culling. + + Enables Temporal Anti-Aliasing for this viewport. TAA works by jittering the camera and accumulating the images of the last rendered frames, motion vector rendering is used to account for camera and object motion. + [b]Note:[/b] The implementation is not complete yet, some visual instances such as particles and skinned meshes may show artifacts. + If [code]true[/code], the viewport will use the primary XR interface to render XR output. When applicable this can result in a stereoscopic image and the resulting render being output to a headset. @@ -441,6 +445,8 @@ + + The texture filter reads from the nearest pixel only. The simplest and fastest method of filtering, but the texture will look pixelized. diff --git a/drivers/gles3/rasterizer_scene_gles3.cpp b/drivers/gles3/rasterizer_scene_gles3.cpp index 7ce1300d0735..3fe0ae4876ca 100644 --- a/drivers/gles3/rasterizer_scene_gles3.cpp +++ b/drivers/gles3/rasterizer_scene_gles3.cpp @@ -1902,7 +1902,7 @@ void RasterizerSceneGLES3::_setup_lights(const RenderDataGLES3 *p_render_data, b glBindBuffer(GL_UNIFORM_BUFFER, 0); } -void RasterizerSceneGLES3::render_scene(RID p_render_buffers, const CameraData *p_camera_data, const PagedArray &p_instances, const PagedArray &p_lights, const PagedArray &p_reflection_probes, const PagedArray &p_voxel_gi_instances, const PagedArray &p_decals, const PagedArray &p_lightmaps, const PagedArray &p_fog_volumes, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data, RendererScene::RenderInfo *r_render_info) { +void RasterizerSceneGLES3::render_scene(RID p_render_buffers, const CameraData *p_camera_data, const CameraData *p_prev_camera_data, const PagedArray &p_instances, const PagedArray &p_lights, const PagedArray &p_reflection_probes, const PagedArray &p_voxel_gi_instances, const PagedArray &p_decals, const PagedArray &p_lightmaps, const PagedArray &p_fog_volumes, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data, RendererScene::RenderInfo *r_render_info) { GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton(); GLES3::Config *config = GLES3::Config::get_singleton(); RENDER_TIMESTAMP("Setup 3D Scene"); @@ -2466,7 +2466,7 @@ RID RasterizerSceneGLES3::render_buffers_create() { return render_buffers_owner.make_rid(rb); } -void RasterizerSceneGLES3::render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_internal_width, int p_internal_height, int p_width, int p_height, float p_fsr_sharpness, float p_fsr_mipmap_bias, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_debanding, uint32_t p_view_count) { +void RasterizerSceneGLES3::render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_internal_width, int p_internal_height, int p_width, int p_height, float p_fsr_sharpness, float p_fsr_mipmap_bias, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_taa, bool p_use_debanding, uint32_t p_view_count) { GLES3::TextureStorage *texture_storage = GLES3::TextureStorage::get_singleton(); RenderBuffers *rb = render_buffers_owner.get_or_null(p_render_buffers); diff --git a/drivers/gles3/rasterizer_scene_gles3.h b/drivers/gles3/rasterizer_scene_gles3.h index ea6145d2a8d8..f76861bc908f 100644 --- a/drivers/gles3/rasterizer_scene_gles3.h +++ b/drivers/gles3/rasterizer_scene_gles3.h @@ -904,7 +904,7 @@ public: void voxel_gi_set_quality(RS::VoxelGIQuality) override; - void render_scene(RID p_render_buffers, const CameraData *p_camera_data, const PagedArray &p_instances, const PagedArray &p_lights, const PagedArray &p_reflection_probes, const PagedArray &p_voxel_gi_instances, const PagedArray &p_decals, const PagedArray &p_lightmaps, const PagedArray &p_fog_volumes, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr, RendererScene::RenderInfo *r_render_info = nullptr) override; + void render_scene(RID p_render_buffers, const CameraData *p_camera_data, const CameraData *p_prev_camera_data, const PagedArray &p_instances, const PagedArray &p_lights, const PagedArray &p_reflection_probes, const PagedArray &p_voxel_gi_instances, const PagedArray &p_decals, const PagedArray &p_lightmaps, const PagedArray &p_fog_volumes, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr, RendererScene::RenderInfo *r_render_info = nullptr) override; void render_material(const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, const PagedArray &p_instances, RID p_framebuffer, const Rect2i &p_region) override; void render_particle_collider_heightfield(RID p_collider, const Transform3D &p_transform, const PagedArray &p_instances) override; @@ -923,7 +923,7 @@ public: } RID render_buffers_create() override; - void render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_internal_width, int p_internal_height, int p_width, int p_height, float p_fsr_sharpness, float p_fsr_mipmap_bias, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_debanding, uint32_t p_view_count) override; + void render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_internal_width, int p_internal_height, int p_width, int p_height, float p_fsr_sharpness, float p_fsr_mipmap_bias, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_taa, bool p_use_debanding, uint32_t p_view_count) override; void gi_set_use_half_resolution(bool p_enable) override; void screen_space_roughness_limiter_set_active(bool p_enable, float p_amount, float p_curve) override; diff --git a/editor/plugins/node_3d_editor_plugin.cpp b/editor/plugins/node_3d_editor_plugin.cpp index c2c5b6391722..0a3303390aa2 100644 --- a/editor/plugins/node_3d_editor_plugin.cpp +++ b/editor/plugins/node_3d_editor_plugin.cpp @@ -2401,6 +2401,9 @@ void Node3DEditorViewport::_project_settings_changed() { viewport->set_msaa(Viewport::MSAA(msaa_mode)); const int ssaa_mode = GLOBAL_GET("rendering/anti_aliasing/quality/screen_space_aa"); viewport->set_screen_space_aa(Viewport::ScreenSpaceAA(ssaa_mode)); + const bool use_taa = GLOBAL_GET("rendering/anti_aliasing/quality/use_taa"); + viewport->set_use_taa(use_taa); + const bool use_debanding = GLOBAL_GET("rendering/anti_aliasing/quality/use_debanding"); viewport->set_use_debanding(use_debanding); @@ -3126,7 +3129,8 @@ void Node3DEditorViewport::_menu_option(int p_option) { case VIEW_DISPLAY_DEBUG_CLUSTER_SPOT_LIGHTS: case VIEW_DISPLAY_DEBUG_CLUSTER_DECALS: case VIEW_DISPLAY_DEBUG_CLUSTER_REFLECTION_PROBES: - case VIEW_DISPLAY_DEBUG_OCCLUDERS: { + case VIEW_DISPLAY_DEBUG_OCCLUDERS: + case VIEW_DISPLAY_MOTION_VECTORS: { static const int display_options[] = { VIEW_DISPLAY_NORMAL, VIEW_DISPLAY_WIREFRAME, @@ -3154,6 +3158,7 @@ void Node3DEditorViewport::_menu_option(int p_option) { VIEW_DISPLAY_DEBUG_CLUSTER_DECALS, VIEW_DISPLAY_DEBUG_CLUSTER_REFLECTION_PROBES, VIEW_DISPLAY_DEBUG_OCCLUDERS, + VIEW_DISPLAY_MOTION_VECTORS, VIEW_MAX }; static const Viewport::DebugDraw debug_draw_modes[] = { @@ -3183,6 +3188,7 @@ void Node3DEditorViewport::_menu_option(int p_option) { Viewport::DEBUG_DRAW_CLUSTER_DECALS, Viewport::DEBUG_DRAW_CLUSTER_REFLECTION_PROBES, Viewport::DEBUG_DRAW_OCCLUDERS, + Viewport::DEBUG_DRAW_MOTION_VECTORS, }; int idx = 0; @@ -4547,6 +4553,7 @@ Node3DEditorViewport::Node3DEditorViewport(Node3DEditor *p_spatial_editor, int p display_submenu->add_radio_check_item(TTR("Decal Cluster"), VIEW_DISPLAY_DEBUG_CLUSTER_DECALS); display_submenu->add_radio_check_item(TTR("ReflectionProbe Cluster"), VIEW_DISPLAY_DEBUG_CLUSTER_REFLECTION_PROBES); display_submenu->add_radio_check_item(TTR("Occlusion Culling Buffer"), VIEW_DISPLAY_DEBUG_OCCLUDERS); + display_submenu->add_radio_check_item(TTR("Motion Vectors"), VIEW_DISPLAY_MOTION_VECTORS); display_submenu->set_name("display_advanced"); view_menu->get_popup()->add_submenu_item(TTR("Display Advanced..."), "display_advanced", VIEW_DISPLAY_ADVANCED); diff --git a/editor/plugins/node_3d_editor_plugin.h b/editor/plugins/node_3d_editor_plugin.h index 627443bd624d..894ec9a11992 100644 --- a/editor/plugins/node_3d_editor_plugin.h +++ b/editor/plugins/node_3d_editor_plugin.h @@ -141,6 +141,7 @@ class Node3DEditorViewport : public Control { VIEW_DISPLAY_DEBUG_CLUSTER_DECALS, VIEW_DISPLAY_DEBUG_CLUSTER_REFLECTION_PROBES, VIEW_DISPLAY_DEBUG_OCCLUDERS, + VIEW_DISPLAY_MOTION_VECTORS, VIEW_LOCK_ROTATION, VIEW_CINEMATIC_PREVIEW, diff --git a/scene/main/scene_tree.cpp b/scene/main/scene_tree.cpp index b695dddf6575..14daf2c10872 100644 --- a/scene/main/scene_tree.cpp +++ b/scene/main/scene_tree.cpp @@ -1381,6 +1381,9 @@ SceneTree::SceneTree() { ProjectSettings::get_singleton()->set_custom_property_info("rendering/anti_aliasing/quality/screen_space_aa", PropertyInfo(Variant::INT, "rendering/anti_aliasing/quality/screen_space_aa", PROPERTY_HINT_ENUM, "Disabled (Fastest),FXAA (Fast)")); root->set_screen_space_aa(Viewport::ScreenSpaceAA(ssaa_mode)); + const bool use_taa = GLOBAL_DEF("rendering/anti_aliasing/quality/use_taa", false); + root->set_use_taa(use_taa); + const bool use_debanding = GLOBAL_DEF("rendering/anti_aliasing/quality/use_debanding", false); root->set_use_debanding(use_debanding); diff --git a/scene/main/viewport.cpp b/scene/main/viewport.cpp index 7c7809fd75b6..772bb2bbd1d9 100644 --- a/scene/main/viewport.cpp +++ b/scene/main/viewport.cpp @@ -2897,6 +2897,18 @@ Viewport::ScreenSpaceAA Viewport::get_screen_space_aa() const { return screen_space_aa; } +void Viewport::set_use_taa(bool p_use_taa) { + if (use_taa == p_use_taa) { + return; + } + use_taa = p_use_taa; + RS::get_singleton()->viewport_set_use_taa(viewport, p_use_taa); +} + +bool Viewport::is_using_taa() const { + return use_taa; +} + void Viewport::set_use_debanding(bool p_use_debanding) { if (use_debanding == p_use_debanding) { return; @@ -3632,6 +3644,9 @@ void Viewport::_bind_methods() { ClassDB::bind_method(D_METHOD("set_screen_space_aa", "screen_space_aa"), &Viewport::set_screen_space_aa); ClassDB::bind_method(D_METHOD("get_screen_space_aa"), &Viewport::get_screen_space_aa); + ClassDB::bind_method(D_METHOD("set_use_taa", "enable"), &Viewport::set_use_taa); + ClassDB::bind_method(D_METHOD("is_using_taa"), &Viewport::is_using_taa); + ClassDB::bind_method(D_METHOD("set_use_debanding", "enable"), &Viewport::set_use_debanding); ClassDB::bind_method(D_METHOD("is_using_debanding"), &Viewport::is_using_debanding); @@ -3760,6 +3775,7 @@ void Viewport::_bind_methods() { ADD_GROUP("Rendering", ""); ADD_PROPERTY(PropertyInfo(Variant::INT, "msaa", PROPERTY_HINT_ENUM, String::utf8("Disabled (Fastest),2× (Average),4× (Slow),8× (Slowest)")), "set_msaa", "get_msaa"); ADD_PROPERTY(PropertyInfo(Variant::INT, "screen_space_aa", PROPERTY_HINT_ENUM, "Disabled (Fastest),FXAA (Fast)"), "set_screen_space_aa", "get_screen_space_aa"); + ADD_PROPERTY(PropertyInfo(Variant::BOOL, "use_taa"), "set_use_taa", "is_using_taa"); ADD_PROPERTY(PropertyInfo(Variant::BOOL, "use_debanding"), "set_use_debanding", "is_using_debanding"); ADD_PROPERTY(PropertyInfo(Variant::BOOL, "use_occlusion_culling"), "set_use_occlusion_culling", "is_using_occlusion_culling"); ADD_PROPERTY(PropertyInfo(Variant::FLOAT, "mesh_lod_threshold", PROPERTY_HINT_RANGE, "0,1024,0.1"), "set_mesh_lod_threshold", "get_mesh_lod_threshold"); @@ -3858,6 +3874,7 @@ void Viewport::_bind_methods() { BIND_ENUM_CONSTANT(DEBUG_DRAW_CLUSTER_DECALS); BIND_ENUM_CONSTANT(DEBUG_DRAW_CLUSTER_REFLECTION_PROBES); BIND_ENUM_CONSTANT(DEBUG_DRAW_OCCLUDERS) + BIND_ENUM_CONSTANT(DEBUG_DRAW_MOTION_VECTORS) BIND_ENUM_CONSTANT(DEFAULT_CANVAS_ITEM_TEXTURE_FILTER_NEAREST); BIND_ENUM_CONSTANT(DEFAULT_CANVAS_ITEM_TEXTURE_FILTER_LINEAR); diff --git a/scene/main/viewport.h b/scene/main/viewport.h index 833b302e972d..a22a2acf499e 100644 --- a/scene/main/viewport.h +++ b/scene/main/viewport.h @@ -160,6 +160,7 @@ public: DEBUG_DRAW_CLUSTER_DECALS, DEBUG_DRAW_CLUSTER_REFLECTION_PROBES, DEBUG_DRAW_OCCLUDERS, + DEBUG_DRAW_MOTION_VECTORS, }; enum DefaultCanvasItemTextureFilter { @@ -291,6 +292,7 @@ private: MSAA msaa = MSAA_DISABLED; ScreenSpaceAA screen_space_aa = SCREEN_SPACE_AA_DISABLED; + bool use_taa = false; Scaling3DMode scaling_3d_mode = SCALING_3D_MODE_BILINEAR; float scaling_3d_scale = 1.0; @@ -515,6 +517,9 @@ public: void set_screen_space_aa(ScreenSpaceAA p_screen_space_aa); ScreenSpaceAA get_screen_space_aa() const; + void set_use_taa(bool p_use_taa); + bool is_using_taa() const; + void set_scaling_3d_mode(Scaling3DMode p_scaling_3d_mode); Scaling3DMode get_scaling_3d_mode() const; diff --git a/servers/rendering/dummy/rasterizer_scene_dummy.h b/servers/rendering/dummy/rasterizer_scene_dummy.h index fa58322ea8cb..e6d2b93f9971 100644 --- a/servers/rendering/dummy/rasterizer_scene_dummy.h +++ b/servers/rendering/dummy/rasterizer_scene_dummy.h @@ -183,7 +183,7 @@ public: void voxel_gi_set_quality(RS::VoxelGIQuality) override {} - void render_scene(RID p_render_buffers, const CameraData *p_camera_data, const PagedArray &p_instances, const PagedArray &p_lights, const PagedArray &p_reflection_probes, const PagedArray &p_voxel_gi_instances, const PagedArray &p_decals, const PagedArray &p_lightmaps, const PagedArray &p_fog_volumes, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr, RendererScene::RenderInfo *r_info = nullptr) override {} + void render_scene(RID p_render_buffers, const CameraData *p_camera_data, const CameraData *p_prev_camera_data, const PagedArray &p_instances, const PagedArray &p_lights, const PagedArray &p_reflection_probes, const PagedArray &p_voxel_gi_instances, const PagedArray &p_decals, const PagedArray &p_lightmaps, const PagedArray &p_fog_volumes, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr, RendererScene::RenderInfo *r_info = nullptr) override {} void render_material(const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, const PagedArray &p_instances, RID p_framebuffer, const Rect2i &p_region) override {} void render_particle_collider_heightfield(RID p_collider, const Transform3D &p_transform, const PagedArray &p_instances) override {} @@ -192,7 +192,7 @@ public: void set_debug_draw_mode(RS::ViewportDebugDraw p_debug_draw) override {} RID render_buffers_create() override { return RID(); } - void render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_internal_width, int p_internal_height, int p_width, int p_height, float p_fsr_sharpness, float p_fsr_mipmap_bias, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_debanding, uint32_t p_view_count) override {} + void render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_internal_width, int p_internal_height, int p_width, int p_height, float p_fsr_sharpness, float p_fsr_mipmap_bias, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_taa, bool p_use_debanding, uint32_t p_view_count) override {} void gi_set_use_half_resolution(bool p_enable) override {} void screen_space_roughness_limiter_set_active(bool p_enable, float p_amount, float p_curve) override {} diff --git a/servers/rendering/renderer_rd/effects_rd.cpp b/servers/rendering/renderer_rd/effects_rd.cpp index 774745abdc19..bf97c6fbe968 100644 --- a/servers/rendering/renderer_rd/effects_rd.cpp +++ b/servers/rendering/renderer_rd/effects_rd.cpp @@ -252,6 +252,35 @@ void EffectsRD::fsr_upscale(RID p_source_rd_texture, RID p_secondary_texture, RI RD::get_singleton()->compute_list_end(compute_list); } +void EffectsRD::taa_resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_prev_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far) { + UniformSetCacheRD *uniform_set_cache = UniformSetCacheRD::get_singleton(); + ERR_FAIL_NULL(uniform_set_cache); + + RID shader = TAA_resolve.shader.version_get_shader(TAA_resolve.shader_version, 0); + ERR_FAIL_COND(shader.is_null()); + + memset(&TAA_resolve.push_constant, 0, sizeof(TAAResolvePushConstant)); + TAA_resolve.push_constant.resolution_width = p_resolution.width; + TAA_resolve.push_constant.resolution_height = p_resolution.height; + TAA_resolve.push_constant.disocclusion_threshold = 0.025f; + TAA_resolve.push_constant.disocclusion_scale = 10.0f; + + RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); + RD::get_singleton()->compute_list_bind_compute_pipeline(compute_list, TAA_resolve.pipeline); + + RD::Uniform u_frame_source(RD::UNIFORM_TYPE_IMAGE, 0, { p_frame }); + RD::Uniform u_depth(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 1, { default_sampler, p_depth }); + RD::Uniform u_velocity(RD::UNIFORM_TYPE_IMAGE, 2, { p_velocity }); + RD::Uniform u_prev_velocity(RD::UNIFORM_TYPE_IMAGE, 3, { p_prev_velocity }); + RD::Uniform u_history(RD::UNIFORM_TYPE_SAMPLER_WITH_TEXTURE, 4, { default_sampler, p_history }); + RD::Uniform u_frame_dest(RD::UNIFORM_TYPE_IMAGE, 5, { p_temp }); + + RD::get_singleton()->compute_list_bind_uniform_set(compute_list, uniform_set_cache->get_cache(shader, 0, u_frame_source, u_depth, u_velocity, u_prev_velocity, u_history, u_frame_dest), 0); + RD::get_singleton()->compute_list_set_push_constant(compute_list, &TAA_resolve.push_constant, sizeof(TAAResolvePushConstant)); + RD::get_singleton()->compute_list_dispatch_threads(compute_list, p_resolution.width, p_resolution.height, 1); + RD::get_singleton()->compute_list_end(); +} + void EffectsRD::screen_space_reflection(RID p_diffuse, RID p_normal_roughness, RenderingServer::EnvironmentSSRRoughnessQuality p_roughness_quality, RID p_blur_radius, RID p_blur_radius2, RID p_metallic, const Color &p_metallic_mask, RID p_depth, RID p_scale_depth, RID p_scale_normal, RID p_output, RID p_output_blur, const Size2i &p_screen_size, int p_max_steps, float p_fade_in, float p_fade_out, float p_tolerance, const CameraMatrix &p_camera) { RD::ComputeListID compute_list = RD::get_singleton()->compute_list_begin(); @@ -415,7 +444,7 @@ void EffectsRD::sub_surface_scattering(RID p_diffuse, RID p_diffuse2, RID p_dept } void EffectsRD::merge_specular(RID p_dest_framebuffer, RID p_specular, RID p_base, RID p_reflection) { - RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_DISCARD, Vector()); + RD::DrawListID draw_list = RD::get_singleton()->draw_list_begin(p_dest_framebuffer, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, Vector()); if (p_reflection.is_valid()) { if (p_base.is_valid()) { @@ -2012,6 +2041,14 @@ EffectsRD::EffectsRD(bool p_prefer_raster_effects) { } } + { + Vector taa_modes; + taa_modes.push_back("\n#define MODE_TAA_RESOLVE"); + TAA_resolve.shader.initialize(taa_modes); + TAA_resolve.shader_version = TAA_resolve.shader.version_create(); + TAA_resolve.pipeline = RD::get_singleton()->compute_pipeline_create(TAA_resolve.shader.version_get_shader(TAA_resolve.shader_version, 0)); + } + RD::SamplerState sampler; sampler.mag_filter = RD::SAMPLER_FILTER_LINEAR; sampler.min_filter = RD::SAMPLER_FILTER_LINEAR; @@ -2060,6 +2097,7 @@ EffectsRD::~EffectsRD() { RD::get_singleton()->free(filter.coefficient_buffer); FSR_upscale.shader.version_free(FSR_upscale.shader_version); + TAA_resolve.shader.version_free(TAA_resolve.shader_version); if (prefer_raster_effects) { luminance_reduce_raster.shader.version_free(luminance_reduce_raster.shader_version); roughness.raster_shader.version_free(roughness.shader_version); diff --git a/servers/rendering/renderer_rd/effects_rd.h b/servers/rendering/renderer_rd/effects_rd.h index 196393523609..787873642e0a 100644 --- a/servers/rendering/renderer_rd/effects_rd.h +++ b/servers/rendering/renderer_rd/effects_rd.h @@ -60,6 +60,7 @@ #include "servers/rendering/renderer_rd/shaders/ssil_importance_map.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/ssil_interleave.glsl.gen.h" #include "servers/rendering/renderer_rd/shaders/subsurface_scattering.glsl.gen.h" +#include "servers/rendering/renderer_rd/shaders/taa_resolve.glsl.gen.h" #include "servers/rendering/renderer_scene_render.h" #include "servers/rendering_server.h" @@ -90,6 +91,20 @@ private: RID pipeline; } FSR_upscale; + struct TAAResolvePushConstant { + float resolution_width; + float resolution_height; + float disocclusion_threshold; + float disocclusion_scale; + }; + + struct TAAResolve { + TAAResolvePushConstant push_constant; + TaaResolveShaderRD shader; + RID shader_version; + RID pipeline; + } TAA_resolve; + struct CubemapRoughnessPushConstant { uint32_t face_id; uint32_t sample_count; @@ -654,6 +669,7 @@ public: bool get_prefer_raster_effects(); void fsr_upscale(RID p_source_rd_texture, RID p_secondary_texture, RID p_destination_texture, const Size2i &p_internal_size, const Size2i &p_size, float p_fsr_upscale_sharpness); + void taa_resolve(RID p_frame, RID p_temp, RID p_depth, RID p_velocity, RID p_prev_velocity, RID p_history, Size2 p_resolution, float p_z_near, float p_z_far); void cubemap_roughness(RID p_source_rd_texture, RID p_dest_texture, uint32_t p_face_id, uint32_t p_sample_count, float p_roughness, float p_size); void cubemap_roughness_raster(RID p_source_rd_texture, RID p_dest_framebuffer, uint32_t p_face_id, uint32_t p_sample_count, float p_roughness, float p_size); diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp index 181d7819da9d..939ae6c324e9 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.cpp @@ -82,6 +82,27 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_specular() } } +void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_velocity() { + if (!velocity_buffer.is_valid()) { + RD::TextureFormat tf; + tf.format = RD::DATA_FORMAT_R16G16_SFLOAT; + tf.width = width; + tf.height = height; + tf.usage_bits = RD::TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | RD::TEXTURE_USAGE_SAMPLING_BIT | RD::TEXTURE_USAGE_STORAGE_BIT; + + if (msaa != RS::VIEWPORT_MSAA_DISABLED) { + RD::TextureFormat tf_aa = tf; + tf_aa.samples = texture_samples; + tf_aa.usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_FROM_BIT; + velocity_buffer_msaa = RD::get_singleton()->texture_create(tf_aa, RD::TextureView()); + + tf.usage_bits |= RD::TEXTURE_USAGE_CAN_COPY_TO_BIT; + } + + velocity_buffer = RD::get_singleton()->texture_create(tf, RD::TextureView()); + } +} + void RenderForwardClustered::RenderBufferDataForwardClustered::ensure_voxelgi() { if (!voxelgi_buffer.is_valid()) { RD::TextureFormat tf; @@ -169,12 +190,23 @@ void RenderForwardClustered::RenderBufferDataForwardClustered::clear() { if (!render_sdfgi_uniform_set.is_null() && RD::get_singleton()->uniform_set_is_valid(render_sdfgi_uniform_set)) { RD::get_singleton()->free(render_sdfgi_uniform_set); } + + if (velocity_buffer != RID()) { + RD::get_singleton()->free(velocity_buffer); + velocity_buffer = RID(); + } + + if (velocity_buffer_msaa != RID()) { + RD::get_singleton()->free(velocity_buffer_msaa); + velocity_buffer_msaa = RID(); + } } -void RenderForwardClustered::RenderBufferDataForwardClustered::configure(RID p_color_buffer, RID p_depth_buffer, RID p_target_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa, uint32_t p_view_count) { +void RenderForwardClustered::RenderBufferDataForwardClustered::configure(RID p_color_buffer, RID p_depth_buffer, RID p_target_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa, bool p_use_taa, uint32_t p_view_count) { clear(); msaa = p_msaa; + use_taa = p_use_taa; width = p_width; height = p_height; @@ -260,6 +292,13 @@ RID RenderForwardClustered::RenderBufferDataForwardClustered::get_color_pass_fb( fb.push_back(RID()); } + if (p_color_pass_flags & COLOR_PASS_FLAG_MOTION_VECTORS) { + ensure_velocity(); + fb.push_back(use_msaa ? velocity_buffer_msaa : velocity_buffer); + } else { + fb.push_back(RID()); + } + fb.push_back(use_msaa ? depth_msaa : depth); int v_count = (p_color_pass_flags & COLOR_PASS_FLAG_MULTIVIEW) ? view_count : 1; @@ -445,6 +484,10 @@ void RenderForwardClustered::_render_list_template(RenderingDevice::DrawListID p pipeline_color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR; } + if constexpr ((p_color_pass_flags & COLOR_PASS_FLAG_MOTION_VECTORS) != 0) { + pipeline_color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS; + } + if constexpr ((p_color_pass_flags & COLOR_PASS_FLAG_TRANSPARENT) != 0) { pipeline_color_pass_flags |= SceneShaderForwardClustered::PIPELINE_COLOR_PASS_FLAG_TRANSPARENT; } @@ -567,9 +610,14 @@ void RenderForwardClustered::_render_list(RenderingDevice::DrawListID p_draw_lis switch (p_params->color_pass_flags) { VALID_FLAG_COMBINATION(0); VALID_FLAG_COMBINATION(COLOR_PASS_FLAG_TRANSPARENT); - VALID_FLAG_COMBINATION(COLOR_PASS_FLAG_SEPARATE_SPECULAR); - VALID_FLAG_COMBINATION(COLOR_PASS_FLAG_MULTIVIEW); VALID_FLAG_COMBINATION(COLOR_PASS_FLAG_TRANSPARENT | COLOR_PASS_FLAG_MULTIVIEW); + VALID_FLAG_COMBINATION(COLOR_PASS_FLAG_TRANSPARENT | COLOR_PASS_FLAG_MOTION_VECTORS); + VALID_FLAG_COMBINATION(COLOR_PASS_FLAG_SEPARATE_SPECULAR); + VALID_FLAG_COMBINATION(COLOR_PASS_FLAG_SEPARATE_SPECULAR | COLOR_PASS_FLAG_MULTIVIEW); + VALID_FLAG_COMBINATION(COLOR_PASS_FLAG_SEPARATE_SPECULAR | COLOR_PASS_FLAG_MOTION_VECTORS); + VALID_FLAG_COMBINATION(COLOR_PASS_FLAG_MULTIVIEW); + VALID_FLAG_COMBINATION(COLOR_PASS_FLAG_MULTIVIEW | COLOR_PASS_FLAG_MOTION_VECTORS); + VALID_FLAG_COMBINATION(COLOR_PASS_FLAG_MOTION_VECTORS); default: { ERR_FAIL_MSG("Invalid color pass flag combination " + itos(p_params->color_pass_flags)); } @@ -631,6 +679,7 @@ void RenderForwardClustered::_setup_environment(const RenderDataRD *p_render_dat //projection.flip_y(); // Vulkan and modern APIs use Y-Down CameraMatrix correction; correction.set_depth_correction(p_flip_y); + correction.add_jitter_offset(p_render_data->taa_jitter); CameraMatrix projection = correction * p_render_data->cam_projection; //store camera into ubo @@ -645,6 +694,9 @@ void RenderForwardClustered::_setup_environment(const RenderDataRD *p_render_dat RendererStorageRD::store_camera(projection.inverse(), scene_state.ubo.inv_projection_matrix_view[v]); } + scene_state.ubo.taa_jitter[0] = p_render_data->taa_jitter.x; + scene_state.ubo.taa_jitter[1] = p_render_data->taa_jitter.y; + scene_state.ubo.z_far = p_render_data->z_far; scene_state.ubo.z_near = p_render_data->z_near; @@ -708,61 +760,7 @@ void RenderForwardClustered::_setup_environment(const RenderDataRD *p_render_dat } } } -#if 0 - if (p_render_data->render_buffers.is_valid() && render_buffers_is_sdfgi_enabled(p_render_data->render_buffers)) { - scene_state.ubo.sdfgi_cascade_count = render_buffers_get_sdfgi_cascade_count(p_render_data->render_buffers); - scene_state.ubo.sdfgi_probe_axis_size = render_buffers_get_sdfgi_cascade_probe_count(p_render_data->render_buffers); - scene_state.ubo.sdfgi_cascade_probe_size[0] = scene_state.ubo.sdfgi_probe_axis_size - 1; //float version for performance - scene_state.ubo.sdfgi_cascade_probe_size[1] = scene_state.ubo.sdfgi_probe_axis_size - 1; - scene_state.ubo.sdfgi_cascade_probe_size[2] = scene_state.ubo.sdfgi_probe_axis_size - 1; - float csize = render_buffers_get_sdfgi_cascade_size(p_render_data->render_buffers); - scene_state.ubo.sdfgi_probe_to_uvw = 1.0 / float(scene_state.ubo.sdfgi_cascade_probe_size[0]); - float occ_bias = 0.0; - scene_state.ubo.sdfgi_occlusion_bias = occ_bias / csize; - scene_state.ubo.sdfgi_use_occlusion = render_buffers_is_sdfgi_using_occlusion(p_render_data->render_buffers); - scene_state.ubo.sdfgi_energy = render_buffers_get_sdfgi_energy(p_render_data->render_buffers); - - float cascade_voxel_size = (csize / scene_state.ubo.sdfgi_cascade_probe_size[0]); - float occlusion_clamp = (cascade_voxel_size - 0.5) / cascade_voxel_size; - scene_state.ubo.sdfgi_occlusion_clamp[0] = occlusion_clamp; - scene_state.ubo.sdfgi_occlusion_clamp[1] = occlusion_clamp; - scene_state.ubo.sdfgi_occlusion_clamp[2] = occlusion_clamp; - scene_state.ubo.sdfgi_normal_bias = (render_buffers_get_sdfgi_normal_bias(p_render_data->render_buffers) / csize) * scene_state.ubo.sdfgi_cascade_probe_size[0]; - - //vec2 tex_pixel_size = 1.0 / vec2(ivec2( (OCT_SIZE+2) * params.probe_axis_size * params.probe_axis_size, (OCT_SIZE+2) * params.probe_axis_size ) ); - //vec3 probe_uv_offset = (ivec3(OCT_SIZE+2,OCT_SIZE+2,(OCT_SIZE+2) * params.probe_axis_size)) * tex_pixel_size.xyx; - - uint32_t oct_size = gi.sdfgi_get_lightprobe_octahedron_size(); - - scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[0] = 1.0 / ((oct_size + 2) * scene_state.ubo.sdfgi_probe_axis_size * scene_state.ubo.sdfgi_probe_axis_size); - scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[1] = 1.0 / ((oct_size + 2) * scene_state.ubo.sdfgi_probe_axis_size); - scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[2] = 1.0; - - scene_state.ubo.sdfgi_probe_uv_offset[0] = float(oct_size + 2) * scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[0]; - scene_state.ubo.sdfgi_probe_uv_offset[1] = float(oct_size + 2) * scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[1]; - scene_state.ubo.sdfgi_probe_uv_offset[2] = float((oct_size + 2) * scene_state.ubo.sdfgi_probe_axis_size) * scene_state.ubo.sdfgi_lightprobe_tex_pixel_size[0]; - - scene_state.ubo.sdfgi_occlusion_renormalize[0] = 0.5; - scene_state.ubo.sdfgi_occlusion_renormalize[1] = 1.0; - scene_state.ubo.sdfgi_occlusion_renormalize[2] = 1.0 / float(scene_state.ubo.sdfgi_cascade_count); - - for (uint32_t i = 0; i < scene_state.ubo.sdfgi_cascade_count; i++) { - SceneState::UBO::SDFGICascade &c = scene_state.ubo.sdfgi_cascades[i]; - Vector3 pos = render_buffers_get_sdfgi_cascade_offset(p_render_data->render_buffers, i); - pos -= p_render_data->cam_transform.origin; //make pos local to camera, to reduce numerical error - c.position[0] = pos.x; - c.position[1] = pos.y; - c.position[2] = pos.z; - c.to_probe = 1.0 / render_buffers_get_sdfgi_cascade_probe_size(p_render_data->render_buffers, i); - - Vector3i probe_ofs = render_buffers_get_sdfgi_cascade_probe_offset(p_render_data->render_buffers, i); - c.probe_world_offset[0] = probe_ofs.x; - c.probe_world_offset[1] = probe_ofs.y; - c.probe_world_offset[2] = probe_ofs.z; - } - } -#endif if (get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_UNSHADED) { scene_state.ubo.use_ambient_light = true; scene_state.ubo.ambient_light_color_energy[0] = 1; @@ -862,14 +860,41 @@ void RenderForwardClustered::_setup_environment(const RenderDataRD *p_render_dat scene_state.ubo.roughness_limiter_amount = screen_space_roughness_limiter_get_amount(); scene_state.ubo.roughness_limiter_limit = screen_space_roughness_limiter_get_limit(); + if (p_render_data->render_buffers.is_valid()) { + RenderBufferDataForwardClustered *render_buffers = static_cast(render_buffers_get_data(p_render_data->render_buffers)); + if (render_buffers->use_taa || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_MOTION_VECTORS) { + memcpy(&scene_state.prev_ubo, &scene_state.ubo, sizeof(SceneState::UBO)); + + CameraMatrix prev_correction; + prev_correction.set_depth_correction(true); + prev_correction.add_jitter_offset(p_render_data->prev_taa_jitter); + CameraMatrix prev_projection = prev_correction * p_render_data->prev_cam_projection; + + //store camera into ubo + RendererStorageRD::store_camera(prev_projection, scene_state.prev_ubo.projection_matrix); + RendererStorageRD::store_camera(prev_projection.inverse(), scene_state.prev_ubo.inv_projection_matrix); + RendererStorageRD::store_transform(p_render_data->prev_cam_transform, scene_state.prev_ubo.inv_view_matrix); + RendererStorageRD::store_transform(p_render_data->prev_cam_transform.affine_inverse(), scene_state.prev_ubo.view_matrix); + + for (uint32_t v = 0; v < p_render_data->view_count; v++) { + prev_projection = prev_correction * p_render_data->view_projection[v]; + RendererStorageRD::store_camera(prev_projection, scene_state.prev_ubo.projection_matrix_view[v]); + RendererStorageRD::store_camera(prev_projection.inverse(), scene_state.prev_ubo.inv_projection_matrix_view[v]); + } + scene_state.prev_ubo.taa_jitter[0] = p_render_data->prev_taa_jitter.x; + scene_state.prev_ubo.taa_jitter[1] = p_render_data->prev_taa_jitter.y; + scene_state.prev_ubo.time -= time_step; + } + } + if (p_index >= (int)scene_state.uniform_buffers.size()) { uint32_t from = scene_state.uniform_buffers.size(); scene_state.uniform_buffers.resize(p_index + 1); for (uint32_t i = from; i < scene_state.uniform_buffers.size(); i++) { - scene_state.uniform_buffers[i] = RD::get_singleton()->uniform_buffer_create(sizeof(SceneState::UBO)); + scene_state.uniform_buffers[i] = RD::get_singleton()->uniform_buffer_create(sizeof(SceneState::UBO) * 2); } } - RD::get_singleton()->buffer_update(scene_state.uniform_buffers[p_index], 0, sizeof(SceneState::UBO), &scene_state.ubo, RD::BARRIER_MASK_RASTER); + RD::get_singleton()->buffer_update(scene_state.uniform_buffers[p_index], 0, sizeof(SceneState::UBO) * 2, &scene_state.ubo_data, RD::BARRIER_MASK_RASTER); } void RenderForwardClustered::_update_instance_data_buffer(RenderListType p_render_list) { @@ -895,6 +920,7 @@ void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, i if (p_render_info) { p_render_info[RS::VIEWPORT_RENDER_INFO_OBJECTS_IN_FRAME] += element_total; } + uint64_t frame = RSG::rasterizer->get_frame_number(); uint32_t repeats = 0; GeometryInstanceSurfaceDataCache *prev_surface = nullptr; for (uint32_t i = 0; i < element_total; i++) { @@ -903,10 +929,17 @@ void RenderForwardClustered::_fill_instance_data(RenderListType p_render_list, i SceneState::InstanceData &instance_data = scene_state.instance_data[p_render_list][i + p_offset]; + if (inst->prev_transform_dirty && frame > inst->prev_transform_change_frame + 1 && inst->prev_transform_change_frame) { + inst->prev_transform = inst->transform; + inst->prev_transform_dirty = false; + } + if (inst->store_transform_cache) { RendererStorageRD::store_transform(inst->transform, instance_data.transform); + RendererStorageRD::store_transform(inst->prev_transform, instance_data.prev_transform); } else { RendererStorageRD::store_transform(Transform3D(), instance_data.transform); + RendererStorageRD::store_transform(Transform3D(), instance_data.prev_transform); } instance_data.flags = inst->flags_cache; @@ -1281,6 +1314,10 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co screen_size.x = render_buffer->width; screen_size.y = render_buffer->height; + if (render_buffer->use_taa || get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_MOTION_VECTORS) { + color_pass_flags |= COLOR_PASS_FLAG_MOTION_VECTORS; + } + if (p_render_data->voxel_gi_instances->size() > 0) { using_voxelgi = true; } @@ -1529,7 +1566,8 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co c.push_back(cc); if (render_buffer) { - c.push_back(Color(0, 0, 0, 0)); + c.push_back(Color(0, 0, 0, 0)); // Separate specular + c.push_back(Color(0, 0, 0, 0)); // Motion vectors } } @@ -1642,7 +1680,7 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co _setup_environment(p_render_data, p_render_data->reflection_probe.is_valid(), screen_size, !p_render_data->reflection_probe.is_valid(), p_default_bg_color, false); { - uint32_t transparent_color_pass_flags = (color_pass_flags | COLOR_PASS_FLAG_TRANSPARENT) & ~COLOR_PASS_FLAG_SEPARATE_SPECULAR; + uint32_t transparent_color_pass_flags = (color_pass_flags | COLOR_PASS_FLAG_TRANSPARENT) & ~(COLOR_PASS_FLAG_SEPARATE_SPECULAR); RID alpha_framebuffer = render_buffer ? render_buffer->get_color_pass_fb(transparent_color_pass_flags) : color_only_framebuffer; RenderListParameters render_list_params(render_list[RENDER_LIST_ALPHA].elements.ptr(), render_list[RENDER_LIST_ALPHA].element_info.ptr(), render_list[RENDER_LIST_ALPHA].elements.size(), false, PASS_MODE_COLOR, transparent_color_pass_flags, render_buffer == nullptr, p_render_data->directional_light_soft_shadows, rp_uniform_set, get_debug_draw_mode() == RS::VIEWPORT_DEBUG_DRAW_WIREFRAME, Vector2(), p_render_data->lod_camera_plane, p_render_data->lod_distance_multiplier, p_render_data->screen_mesh_lod_threshold, p_render_data->view_count); _render_list_with_threads(&render_list_params, alpha_framebuffer, can_continue_color ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ, can_continue_depth ? RD::INITIAL_ACTION_CONTINUE : RD::INITIAL_ACTION_KEEP, RD::FINAL_ACTION_READ); @@ -1656,6 +1694,9 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co if (render_buffer && render_buffer->msaa != RS::VIEWPORT_MSAA_DISABLED) { RD::get_singleton()->texture_resolve_multisample(render_buffer->color_msaa, render_buffer->color); + if (render_buffer->use_taa) { + RD::get_singleton()->texture_resolve_multisample(render_buffer->velocity_buffer_msaa, render_buffer->velocity_buffer); + } storage->get_effects()->resolve_depth(render_buffer->depth_msaa, render_buffer->depth, Vector2i(render_buffer->width, render_buffer->height), texture_multisamples[render_buffer->msaa]); } @@ -1668,6 +1709,11 @@ void RenderForwardClustered::_render_scene(RenderDataRD *p_render_data, const Co } RD::get_singleton()->draw_command_end_label(); + if (render_buffer && render_buffer->use_taa) { + RENDER_TIMESTAMP("TAA") + _process_taa(p_render_data->render_buffers, render_buffer->velocity_buffer, p_render_data->z_near, p_render_data->z_far); + } + if (p_render_data->render_buffers.is_valid()) { _debug_draw_cluster(p_render_data->render_buffers); @@ -2594,7 +2640,13 @@ RID RenderForwardClustered::_setup_sdfgi_render_pass_uniform_set(RID p_albedo_te RID RenderForwardClustered::_render_buffers_get_normal_texture(RID p_render_buffers) { RenderBufferDataForwardClustered *rb = static_cast(render_buffers_get_data(p_render_buffers)); - return rb->normal_roughness_buffer; + return rb->msaa == RS::VIEWPORT_MSAA_DISABLED ? rb->normal_roughness_buffer : rb->normal_roughness_buffer_msaa; +} + +RID RenderForwardClustered::_render_buffers_get_velocity_texture(RID p_render_buffers) { + RenderBufferDataForwardClustered *rb = static_cast(render_buffers_get_data(p_render_buffers)); + + return rb->msaa == RS::VIEWPORT_MSAA_DISABLED ? rb->velocity_buffer : rb->velocity_buffer_msaa; } RenderForwardClustered *RenderForwardClustered::singleton = nullptr; @@ -3014,6 +3066,13 @@ void RenderForwardClustered::geometry_instance_set_mesh_instance(GeometryInstanc void RenderForwardClustered::geometry_instance_set_transform(GeometryInstance *p_geometry_instance, const Transform3D &p_transform, const AABB &p_aabb, const AABB &p_transformed_aabb) { GeometryInstanceForwardClustered *ginstance = static_cast(p_geometry_instance); ERR_FAIL_COND(!ginstance); + + uint64_t frame = RSG::rasterizer->get_frame_number(); + if (frame != ginstance->prev_transform_change_frame) { + ginstance->prev_transform = ginstance->transform; + ginstance->prev_transform_change_frame = frame; + ginstance->prev_transform_dirty = true; + } ginstance->transform = p_transform; ginstance->mirror = p_transform.basis.determinant() < 0; ginstance->data->aabb = p_aabb; diff --git a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h index bec10f7f0e38..6ad42bf0ec3e 100644 --- a/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h +++ b/servers/rendering/renderer_rd/forward_clustered/render_forward_clustered.h @@ -89,9 +89,11 @@ class RenderForwardClustered : public RendererSceneRenderRD { RID specular; RID normal_roughness_buffer; RID voxelgi_buffer; + RID velocity_buffer; RS::ViewportMSAA msaa; RD::TextureSamples texture_samples; + bool use_taa; RID color_msaa; RID depth_msaa; @@ -99,6 +101,7 @@ class RenderForwardClustered : public RendererSceneRenderRD { RID normal_roughness_buffer_msaa; RID roughness_buffer_msaa; RID voxelgi_buffer_msaa; + RID velocity_buffer_msaa; RID depth_fb; RID depth_normal_roughness_fb; @@ -112,8 +115,9 @@ class RenderForwardClustered : public RendererSceneRenderRD { RID render_sdfgi_uniform_set; void ensure_specular(); void ensure_voxelgi(); + void ensure_velocity(); void clear(); - virtual void configure(RID p_color_buffer, RID p_depth_buffer, RID p_target_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa, uint32_t p_view_count); + virtual void configure(RID p_color_buffer, RID p_depth_buffer, RID p_target_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa, bool p_use_taa, uint32_t p_view_count); RID get_color_pass_fb(uint32_t p_color_pass_flags); ~RenderBufferDataForwardClustered(); @@ -128,6 +132,7 @@ class RenderForwardClustered : public RendererSceneRenderRD { virtual void _base_uniforms_changed() override; virtual RID _render_buffers_get_normal_texture(RID p_render_buffers) override; + virtual RID _render_buffers_get_velocity_texture(RID p_render_buffers) override; bool base_uniform_set_updated = false; void _update_render_base_uniform_set(); @@ -148,7 +153,8 @@ class RenderForwardClustered : public RendererSceneRenderRD { enum ColorPassFlags { COLOR_PASS_FLAG_TRANSPARENT = 1 << 0, COLOR_PASS_FLAG_SEPARATE_SPECULAR = 1 << 1, - COLOR_PASS_FLAG_MULTIVIEW = 1 << 2 + COLOR_PASS_FLAG_MULTIVIEW = 1 << 2, + COLOR_PASS_FLAG_MOTION_VECTORS = 1 << 3, }; struct GeometryInstanceSurfaceDataCache; @@ -300,6 +306,9 @@ class RenderForwardClustered : public RendererSceneRenderRD { float reflection_multiplier; uint32_t pancake_shadows; + + float taa_jitter[2]; + uint32_t pad[2]; }; struct PushConstant { @@ -310,6 +319,7 @@ class RenderForwardClustered : public RendererSceneRenderRD { struct InstanceData { float transform[16]; + float prev_transform[16]; uint32_t flags; uint32_t instance_uniforms_ofs; //base offset in global buffer for instance variables uint32_t gi_offset; //GI information when using lightmapping (VCT or lightmap index) @@ -317,7 +327,9 @@ class RenderForwardClustered : public RendererSceneRenderRD { float lightmap_uv_scale[4]; }; - UBO ubo; + UBO ubo_data[2]; + UBO &ubo = ubo_data[0]; + UBO &prev_ubo = ubo_data[1]; LocalVector uniform_buffers; @@ -492,7 +504,10 @@ class RenderForwardClustered : public RendererSceneRenderRD { //used during setup uint32_t base_flags = 0; + uint64_t prev_transform_change_frame = 0xFFFFFFFF; + bool prev_transform_dirty = true; Transform3D transform; + Transform3D prev_transform; RID voxel_gi_instances[MAX_VOXEL_GI_INSTANCESS_PER_INSTANCE]; RID lightmap_instance; GeometryInstanceLightmapSH *lightmap_sh = nullptr; diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp index bd39dd9c70c5..042968734276 100644 --- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp +++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.cpp @@ -235,10 +235,10 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) { } } - // Color pass -> attachment 0: Color/Diffuse, attachment 1: Separate Specular + // Color pass -> attachment 0: Color/Diffuse, attachment 1: Separate Specular, attachment 2: Motion Vectors RD::PipelineColorBlendState blend_state_color_blend; - blend_state_color_blend.attachments = { blend_attachment, RD::PipelineColorBlendState::Attachment() }; - RD::PipelineColorBlendState blend_state_color_opaque = RD::PipelineColorBlendState::create_disabled(2); + blend_state_color_blend.attachments = { blend_attachment, RD::PipelineColorBlendState::Attachment(), RD::PipelineColorBlendState::Attachment() }; + RD::PipelineColorBlendState blend_state_color_opaque = RD::PipelineColorBlendState::create_disabled(3); RD::PipelineColorBlendState blend_state_depth_normal_roughness = RD::PipelineColorBlendState::create_disabled(1); RD::PipelineColorBlendState blend_state_depth_normal_roughness_giprobe = RD::PipelineColorBlendState::create_disabled(2); @@ -326,6 +326,10 @@ void SceneShaderForwardClustered::ShaderData::set_code(const String &p_code) { } } + if (l & PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS) { + shader_flags |= SHADER_COLOR_PASS_FLAG_MOTION_VECTORS; + } + if (l & PIPELINE_COLOR_PASS_FLAG_LIGHTMAP) { shader_flags |= SHADER_COLOR_PASS_FLAG_LIGHTMAP; } @@ -532,6 +536,7 @@ void SceneShaderForwardClustered::init(RendererStorageRD *p_storage, const Strin "\n#define MODE_SEPARATE_SPECULAR\n", // SHADER_COLOR_PASS_FLAG_SEPARATE_SPECULAR "\n#define USE_LIGHTMAP\n", // SHADER_COLOR_PASS_FLAG_LIGHTMAP "\n#define USE_MULTIVIEW\n", // SHADER_COLOR_PASS_FLAG_MULTIVIEW + "\n#define MOTION_VECTORS\n", // SHADER_COLOR_PASS_FLAG_MOTION_VECTORS }; for (int i = 0; i < SHADER_COLOR_PASS_FLAG_COUNT; i++) { @@ -557,17 +562,29 @@ void SceneShaderForwardClustered::init(RendererStorageRD *p_storage, const Strin valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_TRANSPARENT); valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_TRANSPARENT | PIPELINE_COLOR_PASS_FLAG_LIGHTMAP); valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_TRANSPARENT | PIPELINE_COLOR_PASS_FLAG_MULTIVIEW); - valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_TRANSPARENT | PIPELINE_COLOR_PASS_FLAG_MULTIVIEW | PIPELINE_COLOR_PASS_FLAG_LIGHTMAP); + valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_TRANSPARENT | PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS); + valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_TRANSPARENT | PIPELINE_COLOR_PASS_FLAG_LIGHTMAP | PIPELINE_COLOR_PASS_FLAG_MULTIVIEW); + valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_TRANSPARENT | PIPELINE_COLOR_PASS_FLAG_LIGHTMAP | PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS); + valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_TRANSPARENT | PIPELINE_COLOR_PASS_FLAG_MULTIVIEW | PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS); + valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_TRANSPARENT | PIPELINE_COLOR_PASS_FLAG_LIGHTMAP | PIPELINE_COLOR_PASS_FLAG_MULTIVIEW | PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS); valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR); valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR | PIPELINE_COLOR_PASS_FLAG_LIGHTMAP); valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR | PIPELINE_COLOR_PASS_FLAG_MULTIVIEW); - valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR | PIPELINE_COLOR_PASS_FLAG_MULTIVIEW | PIPELINE_COLOR_PASS_FLAG_LIGHTMAP); + valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR | PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS); + valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR | PIPELINE_COLOR_PASS_FLAG_LIGHTMAP | PIPELINE_COLOR_PASS_FLAG_MULTIVIEW); + valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR | PIPELINE_COLOR_PASS_FLAG_LIGHTMAP | PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS); + valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR | PIPELINE_COLOR_PASS_FLAG_LIGHTMAP | PIPELINE_COLOR_PASS_FLAG_MULTIVIEW | PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS); valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_LIGHTMAP); valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_LIGHTMAP | PIPELINE_COLOR_PASS_FLAG_MULTIVIEW); + valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_LIGHTMAP | PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS); + valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_LIGHTMAP | PIPELINE_COLOR_PASS_FLAG_MULTIVIEW | PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS); valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_MULTIVIEW); + valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_MULTIVIEW | PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS); + + valid_color_pass_pipelines.insert(PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS); material_storage->shader_set_data_request_function(RendererRD::SHADER_TYPE_3D, _create_shader_funcs); material_storage->material_set_data_request_function(RendererRD::SHADER_TYPE_3D, _create_material_funcs); @@ -743,7 +760,7 @@ void SceneShaderForwardClustered::init(RendererStorageRD *p_storage, const Strin actions.base_texture_binding_index = 1; actions.texture_layout_set = RenderForwardClustered::MATERIAL_UNIFORM_SET; actions.base_uniform_string = "material."; - actions.base_varying_index = 10; + actions.base_varying_index = 11; actions.default_filter = ShaderLanguage::FILTER_LINEAR_MIPMAP; actions.default_repeat = ShaderLanguage::REPEAT_ENABLE; diff --git a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h index a4eb0656b742..79ccf10090de 100644 --- a/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h +++ b/servers/rendering/renderer_rd/forward_clustered/scene_shader_forward_clustered.h @@ -60,7 +60,8 @@ public: SHADER_COLOR_PASS_FLAG_SEPARATE_SPECULAR = 1 << 0, SHADER_COLOR_PASS_FLAG_LIGHTMAP = 1 << 1, SHADER_COLOR_PASS_FLAG_MULTIVIEW = 1 << 2, - SHADER_COLOR_PASS_FLAG_COUNT = 1 << 3 + SHADER_COLOR_PASS_FLAG_MOTION_VECTORS = 1 << 3, + SHADER_COLOR_PASS_FLAG_COUNT = 1 << 4 }; enum PipelineVersion { @@ -80,7 +81,8 @@ public: PIPELINE_COLOR_PASS_FLAG_SEPARATE_SPECULAR = 1 << 1, PIPELINE_COLOR_PASS_FLAG_LIGHTMAP = 1 << 2, PIPELINE_COLOR_PASS_FLAG_MULTIVIEW = 1 << 3, - PIPELINE_COLOR_PASS_FLAG_COUNT = 1 << 4, + PIPELINE_COLOR_PASS_FLAG_MOTION_VECTORS = 1 << 4, + PIPELINE_COLOR_PASS_FLAG_COUNT = 1 << 5, }; enum ShaderSpecializations { diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp index 1cbf804ece42..2ba5cabff23f 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.cpp @@ -87,7 +87,7 @@ void RenderForwardMobile::RenderBufferDataForwardMobile::clear() { } } -void RenderForwardMobile::RenderBufferDataForwardMobile::configure(RID p_color_buffer, RID p_depth_buffer, RID p_target_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa, uint32_t p_view_count) { +void RenderForwardMobile::RenderBufferDataForwardMobile::configure(RID p_color_buffer, RID p_depth_buffer, RID p_target_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa, bool p_use_taa, uint32_t p_view_count) { clear(); msaa = p_msaa; @@ -1324,6 +1324,10 @@ RID RenderForwardMobile::_render_buffers_get_normal_texture(RID p_render_buffers return RID(); } +RID RenderForwardMobile::_render_buffers_get_velocity_texture(RID p_render_buffers) { + return RID(); +} + _FORCE_INLINE_ static uint32_t _indices_to_primitives(RS::PrimitiveType p_primitive, uint32_t p_indices) { static const uint32_t divisor[RS::PRIMITIVE_MAX] = { 1, 2, 1, 3, 1 }; static const uint32_t subtractor[RS::PRIMITIVE_MAX] = { 0, 0, 1, 0, 1 }; diff --git a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h index 0a7e9731209d..fc6f32ecb0f4 100644 --- a/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h +++ b/servers/rendering/renderer_rd/forward_mobile/render_forward_mobile.h @@ -136,7 +136,7 @@ protected: uint32_t view_count; void clear(); - virtual void configure(RID p_color_buffer, RID p_depth_buffer, RID p_target_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa, uint32_t p_view_count); + virtual void configure(RID p_color_buffer, RID p_depth_buffer, RID p_target_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa, bool p_use_taa, uint32_t p_view_count); ~RenderBufferDataForwardMobile(); }; @@ -224,6 +224,7 @@ protected: virtual void _base_uniforms_changed() override; void _update_render_base_uniform_set(); virtual RID _render_buffers_get_normal_texture(RID p_render_buffers) override; + virtual RID _render_buffers_get_velocity_texture(RID p_render_buffers) override; void _fill_render_list(RenderListType p_render_list, const RenderDataRD *p_render_data, PassMode p_pass_mode, bool p_append = false); void _fill_element_info(RenderListType p_render_list, uint32_t p_offset = 0, int32_t p_max_elements = -1); diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp index 85a132e6dfef..a1c6ff540dc8 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.cpp @@ -1935,6 +1935,21 @@ void RendererSceneRenderRD::_free_render_buffer_data(RenderBuffers *rb) { rb->ssr.normal_scaled = RID(); } + if (rb->taa.history.is_valid()) { + RD::get_singleton()->free(rb->taa.history); + rb->taa.history = RID(); + } + + if (rb->taa.temp.is_valid()) { + RD::get_singleton()->free(rb->taa.temp); + rb->taa.temp = RID(); + } + + if (rb->taa.prev_velocity.is_valid()) { + RD::get_singleton()->free(rb->taa.prev_velocity); + rb->taa.prev_velocity = RID(); + } + if (rb->ambient_buffer.is_valid()) { RD::get_singleton()->free(rb->ambient_buffer); RD::get_singleton()->free(rb->reflection_buffer); @@ -2323,6 +2338,41 @@ void RendererSceneRenderRD::_copy_framebuffer_to_ssil(RID p_render_buffers) { } } +void RendererSceneRenderRD::_process_taa(RID p_render_buffers, RID p_velocity_buffer, float p_z_near, float p_z_far) { + RenderBuffers *rb = render_buffers_owner.get_or_null(p_render_buffers); + ERR_FAIL_COND(!rb); + + bool just_allocated = false; + if (rb->taa.history.is_null()) { + RD::TextureFormat tf; + if (rb->view_count > 1) { + tf.texture_type = RD::TEXTURE_TYPE_2D_ARRAY; + } + tf.format = _render_buffers_get_color_format(); + tf.width = rb->internal_width; + tf.height = rb->internal_height; + tf.array_layers = rb->view_count; // create a layer for every view + tf.usage_bits = RD::TEXTURE_USAGE_SAMPLING_BIT | (_render_buffers_can_be_storage() ? RD::TEXTURE_USAGE_STORAGE_BIT : 0); + + rb->taa.history = RD::get_singleton()->texture_create(tf, RD::TextureView()); + rb->taa.temp = RD::get_singleton()->texture_create(tf, RD::TextureView()); + + tf.format = RD::DATA_FORMAT_R16G16_SFLOAT; + rb->taa.prev_velocity = RD::get_singleton()->texture_create(tf, RD::TextureView()); + just_allocated = true; + } + + RD::get_singleton()->draw_command_begin_label("TAA"); + if (!just_allocated) { + storage->get_effects()->taa_resolve(rb->internal_texture, rb->taa.temp, rb->depth_texture, p_velocity_buffer, rb->taa.prev_velocity, rb->taa.history, Size2(rb->internal_width, rb->internal_height), p_z_near, p_z_far); + copy_effects->copy_to_rect(rb->taa.temp, rb->internal_texture, Rect2(0, 0, rb->internal_width, rb->internal_height)); + } + + copy_effects->copy_to_rect(rb->internal_texture, rb->taa.history, Rect2(0, 0, rb->internal_width, rb->internal_height)); + copy_effects->copy_to_rect(p_velocity_buffer, rb->taa.prev_velocity, Rect2(0, 0, rb->width, rb->height)); + RD::get_singleton()->draw_command_end_label(); +} + void RendererSceneRenderRD::_render_buffers_copy_screen_texture(const RenderDataRD *p_render_data) { RenderBuffers *rb = render_buffers_owner.get_or_null(p_render_data->render_buffers); ERR_FAIL_COND(!rb); @@ -2759,6 +2809,11 @@ void RendererSceneRenderRD::_render_buffers_debug_draw(RID p_render_buffers, RID copy_effects->copy_to_fb_rect(texture_storage->texture_get_rd_texture(p_occlusion_buffer), texture_storage->render_target_get_rd_framebuffer(rb->render_target), Rect2i(Vector2(), rtsize), true, false); } } + + if (debug_draw == RS::VIEWPORT_DEBUG_DRAW_MOTION_VECTORS && _render_buffers_get_velocity_texture(p_render_buffers).is_valid()) { + Size2 rtsize = texture_storage->render_target_get_size(rb->render_target); + copy_effects->copy_to_fb_rect(_render_buffers_get_velocity_texture(p_render_buffers), texture_storage->render_target_get_rd_framebuffer(rb->render_target), Rect2(Vector2(), rtsize), false, false); + } } void RendererSceneRenderRD::environment_set_adjustment(RID p_env, bool p_enable, float p_brightness, float p_contrast, float p_saturation, bool p_use_1d_color_correction, RID p_color_correction) { @@ -2976,7 +3031,7 @@ bool RendererSceneRenderRD::_render_buffers_can_be_storage() { return true; } -void RendererSceneRenderRD::render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_internal_width, int p_internal_height, int p_width, int p_height, float p_fsr_sharpness, float p_fsr_mipmap_bias, RS::ViewportMSAA p_msaa, RenderingServer::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_debanding, uint32_t p_view_count) { +void RendererSceneRenderRD::render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_internal_width, int p_internal_height, int p_width, int p_height, float p_fsr_sharpness, float p_fsr_mipmap_bias, RS::ViewportMSAA p_msaa, RenderingServer::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_taa, bool p_use_debanding, uint32_t p_view_count) { RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); @@ -3004,6 +3059,7 @@ void RendererSceneRenderRD::render_buffers_configure(RID p_render_buffers, RID p rb->render_target = p_render_target; rb->msaa = p_msaa; rb->screen_space_aa = p_screen_space_aa; + rb->use_taa = p_use_taa; rb->use_debanding = p_use_debanding; rb->view_count = p_view_count; @@ -3104,7 +3160,7 @@ void RendererSceneRenderRD::render_buffers_configure(RID p_render_buffers, RID p } RID target_texture = texture_storage->render_target_get_rd_texture(rb->render_target); - rb->data->configure(rb->internal_texture, rb->depth_texture, target_texture, p_internal_width, p_internal_height, p_msaa, p_view_count); + rb->data->configure(rb->internal_texture, rb->depth_texture, target_texture, p_internal_width, p_internal_height, p_msaa, p_use_taa, p_view_count); if (is_clustered_enabled()) { rb->cluster_builder->setup(Size2i(p_internal_width, p_internal_height), max_cluster_elements, rb->depth_texture, RendererRD::MaterialStorage::get_singleton()->sampler_rd_get_default(RS::CANVAS_ITEM_TEXTURE_FILTER_NEAREST, RS::CANVAS_ITEM_TEXTURE_REPEAT_DISABLED), rb->internal_texture); @@ -4134,7 +4190,7 @@ Vector3i RendererSceneRenderRD::_point_get_position_in_froxel_volume(const Vecto return Vector3i(fog_position); } -void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_environment, const CameraMatrix &p_cam_projection, const Transform3D &p_cam_transform, RID p_shadow_atlas, int p_directional_light_count, bool p_use_directional_shadows, int p_positional_light_count, int p_voxel_gi_count, const PagedArray &p_fog_volumes) { +void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_environment, const CameraMatrix &p_cam_projection, const Transform3D &p_cam_transform, const Transform3D &p_prev_cam_inv_transform, RID p_shadow_atlas, int p_directional_light_count, bool p_use_directional_shadows, int p_positional_light_count, int p_voxel_gi_count, const PagedArray &p_fog_volumes) { RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); RendererRD::MaterialStorage *material_storage = RendererRD::MaterialStorage::get_singleton(); @@ -4269,7 +4325,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e params.detail_spread = env->volumetric_fog_detail_spread; params.temporal_blend = env->volumetric_fog_temporal_reprojection_amount; - Transform3D to_prev_cam_view = rb->volumetric_fog->prev_cam_transform.affine_inverse() * p_cam_transform; + Transform3D to_prev_cam_view = p_prev_cam_inv_transform * p_cam_transform; storage->store_transform(to_prev_cam_view, params.to_prev_view); storage->store_transform(p_cam_transform, params.transform); @@ -4747,7 +4803,7 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e params.max_voxel_gi_instances = env->volumetric_fog_gi_inject > 0.001 ? p_voxel_gi_count : 0; params.temporal_frame = RSG::rasterizer->get_frame_number() % VolumetricFog::MAX_TEMPORAL_FRAMES; - Transform3D to_prev_cam_view = rb->volumetric_fog->prev_cam_transform.affine_inverse() * p_cam_transform; + Transform3D to_prev_cam_view = p_prev_cam_inv_transform * p_cam_transform; storage->store_transform(to_prev_cam_view, params.to_prev_view); params.use_temporal_reprojection = env->volumetric_fog_temporal_reprojection; @@ -4833,8 +4889,6 @@ void RendererSceneRenderRD::_update_volumetric_fog(RID p_render_buffers, RID p_e RENDER_TIMESTAMP("< Volumetric Fog"); RD::get_singleton()->draw_command_end_label(); RD::get_singleton()->draw_command_end_label(); - - rb->volumetric_fog->prev_cam_transform = p_cam_transform; } bool RendererSceneRenderRD::_needs_post_prepass_render(RenderDataRD *p_render_data, bool p_use_gi) { @@ -5032,12 +5086,12 @@ void RendererSceneRenderRD::_pre_opaque_render(RenderDataRD *p_render_data, bool } } if (is_volumetric_supported()) { - _update_volumetric_fog(p_render_data->render_buffers, p_render_data->environment, p_render_data->cam_projection, p_render_data->cam_transform, p_render_data->shadow_atlas, directional_light_count, directional_shadows, positional_light_count, render_state.voxel_gi_count, *p_render_data->fog_volumes); + _update_volumetric_fog(p_render_data->render_buffers, p_render_data->environment, p_render_data->cam_projection, p_render_data->cam_transform, p_render_data->prev_cam_transform.affine_inverse(), p_render_data->shadow_atlas, directional_light_count, directional_shadows, positional_light_count, render_state.voxel_gi_count, *p_render_data->fog_volumes); } } } -void RendererSceneRenderRD::render_scene(RID p_render_buffers, const CameraData *p_camera_data, const PagedArray &p_instances, const PagedArray &p_lights, const PagedArray &p_reflection_probes, const PagedArray &p_voxel_gi_instances, const PagedArray &p_decals, const PagedArray &p_lightmaps, const PagedArray &p_fog_volumes, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data, RendererScene::RenderInfo *r_render_info) { +void RendererSceneRenderRD::render_scene(RID p_render_buffers, const CameraData *p_camera_data, const CameraData *p_prev_camera_data, const PagedArray &p_instances, const PagedArray &p_lights, const PagedArray &p_reflection_probes, const PagedArray &p_voxel_gi_instances, const PagedArray &p_decals, const PagedArray &p_lightmaps, const PagedArray &p_fog_volumes, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data, RendererScene::RenderInfo *r_render_info) { RendererRD::TextureStorage *texture_storage = RendererRD::TextureStorage::get_singleton(); // getting this here now so we can direct call a bunch of things more easily @@ -5057,12 +5111,21 @@ void RendererSceneRenderRD::render_scene(RID p_render_buffers, const CameraData render_data.cam_projection = p_camera_data->main_projection; render_data.view_projection[0] = p_camera_data->main_projection; render_data.cam_orthogonal = p_camera_data->is_orthogonal; + render_data.taa_jitter = p_camera_data->taa_jitter; render_data.view_count = p_camera_data->view_count; for (uint32_t v = 0; v < p_camera_data->view_count; v++) { render_data.view_projection[v] = p_camera_data->view_projection[v]; } + render_data.prev_cam_transform = p_prev_camera_data->main_transform; + render_data.prev_cam_projection = p_prev_camera_data->main_projection; + render_data.prev_taa_jitter = p_prev_camera_data->taa_jitter; + + for (uint32_t v = 0; v < p_camera_data->view_count; v++) { + render_data.prev_view_projection[v] = p_prev_camera_data->view_projection[v]; + } + render_data.z_near = p_camera_data->main_projection.get_z_near(); render_data.z_far = p_camera_data->main_projection.get_z_far(); diff --git a/servers/rendering/renderer_rd/renderer_scene_render_rd.h b/servers/rendering/renderer_rd/renderer_scene_render_rd.h index a384f216cdd0..1b1df6469e3c 100644 --- a/servers/rendering/renderer_rd/renderer_scene_render_rd.h +++ b/servers/rendering/renderer_rd/renderer_scene_render_rd.h @@ -53,12 +53,18 @@ struct RenderDataRD { Transform3D cam_transform; CameraMatrix cam_projection; + Vector2 taa_jitter; bool cam_orthogonal = false; // For stereo rendering uint32_t view_count = 1; CameraMatrix view_projection[RendererSceneRender::MAX_RENDER_VIEWS]; + Transform3D prev_cam_transform; + CameraMatrix prev_cam_projection; + Vector2 prev_taa_jitter; + CameraMatrix prev_view_projection[RendererSceneRender::MAX_RENDER_VIEWS]; + float z_near = 0.0; float z_far = 0.0; @@ -103,7 +109,7 @@ protected: double time_step = 0.0; struct RenderBufferData { - virtual void configure(RID p_color_buffer, RID p_depth_buffer, RID p_target_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa, uint32_t p_view_count) = 0; + virtual void configure(RID p_color_buffer, RID p_depth_buffer, RID p_target_buffer, int p_width, int p_height, RS::ViewportMSAA p_msaa, bool p_use_taa, uint32_t p_view_count) = 0; virtual ~RenderBufferData() {} }; virtual RenderBufferData *_create_render_buffer_data() = 0; @@ -131,13 +137,14 @@ protected: virtual void _base_uniforms_changed() = 0; virtual RID _render_buffers_get_normal_texture(RID p_render_buffers) = 0; + virtual RID _render_buffers_get_velocity_texture(RID p_render_buffers) = 0; void _process_ssao(RID p_render_buffers, RID p_environment, RID p_normal_buffer, const CameraMatrix &p_projection); void _process_ssr(RID p_render_buffers, RID p_dest_framebuffer, RID p_normal_buffer, RID p_specular_buffer, RID p_metallic, const Color &p_metallic_mask, RID p_environment, const CameraMatrix &p_projection, bool p_use_additive); void _process_sss(RID p_render_buffers, const CameraMatrix &p_camera); void _process_ssil(RID p_render_buffers, RID p_environment, RID p_normal_buffer, const CameraMatrix &p_projection, const Transform3D &p_transform); void _copy_framebuffer_to_ssil(RID p_render_buffers); - void _ensure_ss_effects(RID p_render_buffers, bool p_using_ssil); + void _process_taa(RID p_render_buffers, RID p_velocity_buffer, float p_z_near, float p_z_far); bool _needs_post_prepass_render(RenderDataRD *p_render_data, bool p_use_gi); void _post_prepass_render(RenderDataRD *p_render_data, bool p_use_gi); @@ -472,6 +479,7 @@ private: float fsr_sharpness = 0.2f; RS::ViewportMSAA msaa = RS::VIEWPORT_MSAA_DISABLED; RS::ViewportScreenSpaceAA screen_space_aa = RS::VIEWPORT_SCREEN_SPACE_AA_DISABLED; + bool use_taa = false; bool use_debanding = false; uint32_t view_count = 1; @@ -592,6 +600,12 @@ private: RID blur_radius[2]; } ssr; + struct TAA { + RID history; + RID temp; + RID prev_velocity; // Last frame velocity buffer + } taa; + RID ambient_buffer; RID reflection_buffer; }; @@ -793,8 +807,6 @@ private: RID sky_uniform_set; int last_shadow_filter = -1; - - Transform3D prev_cam_transform; }; struct VolumetricFogShader { @@ -914,7 +926,7 @@ private: Vector3i _point_get_position_in_froxel_volume(const Vector3 &p_point, float fog_end, const Vector2 &fog_near_size, const Vector2 &fog_far_size, float volumetric_fog_detail_spread, const Vector3 &fog_size, const Transform3D &p_cam_transform); void _volumetric_fog_erase(RenderBuffers *rb); - void _update_volumetric_fog(RID p_render_buffers, RID p_environment, const CameraMatrix &p_cam_projection, const Transform3D &p_cam_transform, RID p_shadow_atlas, int p_directional_light_count, bool p_use_directional_shadows, int p_positional_light_count, int p_voxel_gi_count, const PagedArray &p_fog_volumes); + void _update_volumetric_fog(RID p_render_buffers, RID p_environment, const CameraMatrix &p_cam_projection, const Transform3D &p_cam_transform, const Transform3D &p_prev_cam_inv_transform, RID p_shadow_atlas, int p_directional_light_count, bool p_use_directional_shadows, int p_positional_light_count, int p_voxel_gi_count, const PagedArray &p_fog_volumes); struct FogShaderData : public RendererRD::ShaderData { bool valid = false; @@ -1385,7 +1397,7 @@ public: virtual RD::DataFormat _render_buffers_get_color_format(); virtual bool _render_buffers_can_be_storage(); virtual RID render_buffers_create() override; - virtual void render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_internal_width, int p_internal_height, int p_width, int p_height, float p_fsr_sharpness, float p_fsr_mipmap_bias, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_debanding, uint32_t p_view_count) override; + virtual void render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_internal_width, int p_internal_height, int p_width, int p_height, float p_fsr_sharpness, float p_fsr_mipmap_bias, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_taa, bool p_use_debanding, uint32_t p_view_count) override; virtual void gi_set_use_half_resolution(bool p_enable) override; RID render_buffers_get_depth_texture(RID p_render_buffers); @@ -1419,7 +1431,7 @@ public: virtual void update_uniform_sets(){}; - virtual void render_scene(RID p_render_buffers, const CameraData *p_camera_data, const PagedArray &p_instances, const PagedArray &p_lights, const PagedArray &p_reflection_probes, const PagedArray &p_voxel_gi_instances, const PagedArray &p_decals, const PagedArray &p_lightmaps, const PagedArray &p_fog_volumes, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr, RendererScene::RenderInfo *r_render_info = nullptr) override; + virtual void render_scene(RID p_render_buffers, const CameraData *p_camera_data, const CameraData *p_prev_camera_data, const PagedArray &p_instances, const PagedArray &p_lights, const PagedArray &p_reflection_probes, const PagedArray &p_voxel_gi_instances, const PagedArray &p_decals, const PagedArray &p_lightmaps, const PagedArray &p_fog_volumes, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr, RendererScene::RenderInfo *r_render_info = nullptr) override; virtual void render_material(const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, const PagedArray &p_instances, RID p_framebuffer, const Rect2i &p_region) override; diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl index 4f49c186a642..9f27cea84354 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_clustered.glsl @@ -83,6 +83,11 @@ layout(location = 5) out vec3 tangent_interp; layout(location = 6) out vec3 binormal_interp; #endif +#ifdef MOTION_VECTORS +layout(location = 7) out vec4 screen_position; +layout(location = 8) out vec4 prev_screen_position; +#endif + #ifdef MATERIAL_UNIFORMS_USED layout(set = MATERIAL_UNIFORM_SET, binding = 0, std140) uniform MaterialUniforms{ @@ -93,11 +98,11 @@ layout(set = MATERIAL_UNIFORM_SET, binding = 0, std140) uniform MaterialUniforms #ifdef MODE_DUAL_PARABOLOID -layout(location = 8) out float dp_clip; +layout(location = 9) out float dp_clip; #endif -layout(location = 9) out flat uint instance_index_interp; +layout(location = 10) out flat uint instance_index_interp; #ifdef USE_MULTIVIEW #ifdef has_VK_KHR_multiview @@ -115,23 +120,12 @@ invariant gl_Position; #GLOBALS -void main() { +void vertex_shader(in uint instance_index, in bool is_multimesh, in SceneData scene_data, in mat4 model_matrix, out vec4 screen_pos) { vec4 instance_custom = vec4(0.0); #if defined(COLOR_USED) color_interp = color_attrib; #endif - uint instance_index = draw_call.instance_index; - - bool is_multimesh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH); - if (!is_multimesh) { - instance_index += gl_InstanceIndex; - } - - instance_index_interp = instance_index; - - mat4 model_matrix = instances.data[instance_index].transform; - mat3 model_normal_matrix; if (bool(instances.data[instance_index].flags & INSTANCE_FLAGS_NON_UNIFORM_SCALE)) { model_normal_matrix = transpose(inverse(mat3(model_matrix))); @@ -321,6 +315,11 @@ void main() { #endif vertex_interp = vertex; + +#ifdef MOTION_VECTORS + screen_pos = projection_matrix * vec4(vertex_interp, 1.0); +#endif + #ifdef NORMAL_USED normal_interp = normal; #endif @@ -375,6 +374,27 @@ void main() { #endif } +void main() { + uint instance_index = draw_call.instance_index; + + bool is_multimesh = bool(instances.data[instance_index].flags & INSTANCE_FLAGS_MULTIMESH); + if (!is_multimesh) { + instance_index += gl_InstanceIndex; + } + + instance_index_interp = instance_index; + + SceneData scene_data = scene_data_block.data; + mat4 model_matrix = instances.data[instance_index].transform; +#if defined(MOTION_VECTORS) + vertex_shader(instance_index, is_multimesh, scene_data_block.prev_data, instances.data[instance_index].prev_transform, prev_screen_position); + vertex_shader(instance_index, is_multimesh, scene_data, model_matrix, screen_position); +#else + vec4 screen_position; + vertex_shader(instance_index, is_multimesh, scene_data, model_matrix, screen_position); +#endif +} + #[fragment] #version 450 @@ -431,13 +451,18 @@ layout(location = 5) in vec3 tangent_interp; layout(location = 6) in vec3 binormal_interp; #endif +#ifdef MOTION_VECTORS +layout(location = 7) in vec4 screen_position; +layout(location = 8) in vec4 prev_screen_position; +#endif + #ifdef MODE_DUAL_PARABOLOID -layout(location = 8) in float dp_clip; +layout(location = 9) in float dp_clip; #endif -layout(location = 9) in flat uint instance_index_interp; +layout(location = 10) in flat uint instance_index_interp; #ifdef USE_MULTIVIEW #ifdef has_VK_KHR_multiview @@ -510,6 +535,10 @@ layout(location = 0) out vec4 frag_color; #endif // RENDER DEPTH +#ifdef MOTION_VECTORS +layout(location = 2) out vec2 motion_vector; +#endif + #include "scene_forward_aa_inc.glsl" #if !defined(MODE_RENDER_DEPTH) && !defined(MODE_UNSHADED) @@ -528,24 +557,24 @@ layout(location = 0) out vec4 frag_color; #ifndef MODE_RENDER_DEPTH vec4 volumetric_fog_process(vec2 screen_uv, float z) { - vec3 fog_pos = vec3(screen_uv, z * scene_data.volumetric_fog_inv_length); + vec3 fog_pos = vec3(screen_uv, z * scene_data_block.data.volumetric_fog_inv_length); if (fog_pos.z < 0.0) { return vec4(0.0); } else if (fog_pos.z < 1.0) { - fog_pos.z = pow(fog_pos.z, scene_data.volumetric_fog_detail_spread); + fog_pos.z = pow(fog_pos.z, scene_data_block.data.volumetric_fog_detail_spread); } return texture(sampler3D(volumetric_fog_texture, material_samplers[SAMPLER_LINEAR_CLAMP]), fog_pos); } vec4 fog_process(vec3 vertex) { - vec3 fog_color = scene_data.fog_light_color; + vec3 fog_color = scene_data_block.data.fog_light_color; - if (scene_data.fog_aerial_perspective > 0.0) { + if (scene_data_block.data.fog_aerial_perspective > 0.0) { vec3 sky_fog_color = vec3(0.0); - vec3 cube_view = scene_data.radiance_inverse_xform * vertex; + vec3 cube_view = scene_data_block.data.radiance_inverse_xform * vertex; // mip_level always reads from the second mipmap and higher so the fog is always slightly blurred - float mip_level = mix(1.0 / MAX_ROUGHNESS_LOD, 1.0, 1.0 - (abs(vertex.z) - scene_data.z_near) / (scene_data.z_far - scene_data.z_near)); + float mip_level = mix(1.0 / MAX_ROUGHNESS_LOD, 1.0, 1.0 - (abs(vertex.z) - scene_data_block.data.z_near) / (scene_data_block.data.z_far - scene_data_block.data.z_near)); #ifdef USE_RADIANCE_CUBEMAP_ARRAY float lod, blend; blend = modf(mip_level * MAX_ROUGHNESS_LOD, lod); @@ -554,29 +583,29 @@ vec4 fog_process(vec3 vertex) { #else sky_fog_color = textureLod(samplerCube(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), cube_view, mip_level * MAX_ROUGHNESS_LOD).rgb; #endif //USE_RADIANCE_CUBEMAP_ARRAY - fog_color = mix(fog_color, sky_fog_color, scene_data.fog_aerial_perspective); + fog_color = mix(fog_color, sky_fog_color, scene_data_block.data.fog_aerial_perspective); } - if (scene_data.fog_sun_scatter > 0.001) { + if (scene_data_block.data.fog_sun_scatter > 0.001) { vec4 sun_scatter = vec4(0.0); float sun_total = 0.0; vec3 view = normalize(vertex); - for (uint i = 0; i < scene_data.directional_light_count; i++) { + for (uint i = 0; i < scene_data_block.data.directional_light_count; i++) { vec3 light_color = directional_lights.data[i].color * directional_lights.data[i].energy; float light_amount = pow(max(dot(view, directional_lights.data[i].direction), 0.0), 8.0); - fog_color += light_color * light_amount * scene_data.fog_sun_scatter; + fog_color += light_color * light_amount * scene_data_block.data.fog_sun_scatter; } } - float fog_amount = 1.0 - exp(min(0.0, -length(vertex) * scene_data.fog_density)); + float fog_amount = 1.0 - exp(min(0.0, -length(vertex) * scene_data_block.data.fog_density)); - if (abs(scene_data.fog_height_density) >= 0.0001) { - float y = (scene_data.inv_view_matrix * vec4(vertex, 1.0)).y; + if (abs(scene_data_block.data.fog_height_density) >= 0.0001) { + float y = (scene_data_block.data.inv_view_matrix * vec4(vertex, 1.0)).y; - float y_dist = y - scene_data.fog_height; + float y_dist = y - scene_data_block.data.fog_height; - float vfog_amount = 1.0 - exp(min(0.0, y_dist * scene_data.fog_height_density)); + float vfog_amount = 1.0 - exp(min(0.0, y_dist * scene_data_block.data.fog_height_density)); fog_amount = max(vfog_amount, fog_amount); } @@ -608,6 +637,8 @@ void main() { discard; #endif + SceneData scene_data = scene_data_block.data; + SceneData prev_scene_data = scene_data_block.prev_data; uint instance_index = instance_index_interp; //lay out everything, whatever is unused is optimized away anyway @@ -2015,4 +2046,13 @@ void main() { #endif //MODE_SEPARATE_SPECULAR #endif //MODE_RENDER_DEPTH +#ifdef MOTION_VECTORS + vec2 position_clip = (screen_position.xy / screen_position.w) - scene_data.taa_jitter; + vec2 prev_position_clip = (prev_screen_position.xy / prev_screen_position.w) - prev_scene_data.taa_jitter; + + vec2 position_uv = position_clip * vec2(0.5, 0.5); + vec2 prev_position_uv = prev_position_clip * vec2(0.5, 0.5); + + motion_vector = position_uv - prev_position_uv; +#endif } diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl index f2672f10e79a..b700e2154341 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_clustered_inc.glsl @@ -171,7 +171,7 @@ sdfgi; /* Set 1: Render Pass (changes per render pass) */ -layout(set = 1, binding = 0, std140) uniform SceneData { +struct SceneData { mat4 projection_matrix; mat4 inv_projection_matrix; mat4 inv_view_matrix; @@ -249,11 +249,19 @@ layout(set = 1, binding = 0, std140) uniform SceneData { float reflection_multiplier; // one normally, zero when rendering reflections bool pancake_shadows; + vec2 taa_jitter; + uvec2 pad; +}; + +layout(set = 1, binding = 0, std140) uniform SceneDataBlock { + SceneData data; + SceneData prev_data; } -scene_data; +scene_data_block; struct InstanceData { mat4 transform; + mat4 prev_transform; uint flags; uint instance_uniforms_ofs; //base offset in global buffer for instance variables uint gi_offset; //GI information when using lightmapping (VCT or lightmap index) diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl index bd1c2b575832..5a308bbd02d3 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_lights_inc.glsl @@ -262,7 +262,7 @@ float sample_directional_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, ve float avg = 0.0; for (uint i = 0; i < sc_directional_soft_shadow_samples; i++) { - avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos + shadow_pixel_size * (disk_rotation * scene_data.directional_soft_shadow_kernel[i].xy), depth, 1.0)); + avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos + shadow_pixel_size * (disk_rotation * scene_data_block.data.directional_soft_shadow_kernel[i].xy), depth, 1.0)); } return avg * (1.0 / float(sc_directional_soft_shadow_samples)); @@ -288,7 +288,7 @@ float sample_pcf_shadow(texture2D shadow, vec2 shadow_pixel_size, vec3 coord) { float avg = 0.0; for (uint i = 0; i < sc_soft_shadow_samples; i++) { - avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos + shadow_pixel_size * (disk_rotation * scene_data.soft_shadow_kernel[i].xy), depth, 1.0)); + avg += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(pos + shadow_pixel_size * (disk_rotation * scene_data_block.data.soft_shadow_kernel[i].xy), depth, 1.0)); } return avg * (1.0 / float(sc_soft_shadow_samples)); @@ -311,10 +311,10 @@ float sample_omni_pcf_shadow(texture2D shadow, float blur_scale, vec2 coord, vec } float avg = 0.0; - vec2 offset_scale = blur_scale * 2.0 * scene_data.shadow_atlas_pixel_size / uv_rect.zw; + vec2 offset_scale = blur_scale * 2.0 * scene_data_block.data.shadow_atlas_pixel_size / uv_rect.zw; for (uint i = 0; i < sc_soft_shadow_samples; i++) { - vec2 offset = offset_scale * (disk_rotation * scene_data.soft_shadow_kernel[i].xy); + vec2 offset = offset_scale * (disk_rotation * scene_data_block.data.soft_shadow_kernel[i].xy); vec2 sample_coord = coord + offset; float sample_coord_length_sqaured = dot(sample_coord, sample_coord); @@ -351,7 +351,7 @@ float sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex } for (uint i = 0; i < sc_directional_penumbra_shadow_samples; i++) { - vec2 suv = pssm_coord.xy + (disk_rotation * scene_data.directional_penumbra_shadow_kernel[i].xy) * tex_scale; + vec2 suv = pssm_coord.xy + (disk_rotation * scene_data_block.data.directional_penumbra_shadow_kernel[i].xy) * tex_scale; float d = textureLod(sampler2D(shadow, material_samplers[SAMPLER_LINEAR_CLAMP]), suv, 0.0).r; if (d < pssm_coord.z) { blocker_average += d; @@ -367,7 +367,7 @@ float sample_directional_soft_shadow(texture2D shadow, vec3 pssm_coord, vec2 tex float s = 0.0; for (uint i = 0; i < sc_directional_penumbra_shadow_samples; i++) { - vec2 suv = pssm_coord.xy + (disk_rotation * scene_data.directional_penumbra_shadow_kernel[i].xy) * tex_scale; + vec2 suv = pssm_coord.xy + (disk_rotation * scene_data_block.data.directional_penumbra_shadow_kernel[i].xy) * tex_scale; s += textureProj(sampler2DShadow(shadow, shadow_sampler), vec4(suv, pssm_coord.z, 1.0)); } @@ -394,7 +394,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { #ifndef SHADOWS_DISABLED if (omni_lights.data[idx].shadow_enabled) { // there is a shadowmap - vec2 texel_size = scene_data.shadow_atlas_pixel_size; + vec2 texel_size = scene_data_block.data.shadow_atlas_pixel_size; vec4 base_uv_rect = omni_lights.data[idx].atlas_rect; base_uv_rect.xy += texel_size; base_uv_rect.zw -= texel_size * 2.0; @@ -438,7 +438,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { bitangent *= omni_lights.data[idx].soft_shadow_size * omni_lights.data[idx].soft_shadow_scale; for (uint i = 0; i < sc_penumbra_shadow_samples; i++) { - vec2 disk = disk_rotation * scene_data.penumbra_shadow_kernel[i].xy; + vec2 disk = disk_rotation * scene_data_block.data.penumbra_shadow_kernel[i].xy; vec3 pos = local_vert + tangent * disk.x + bitangent * disk.y; @@ -474,7 +474,7 @@ float light_process_omni_shadow(uint idx, vec3 vertex, vec3 normal) { shadow = 0.0; for (uint i = 0; i < sc_penumbra_shadow_samples; i++) { - vec2 disk = disk_rotation * scene_data.penumbra_shadow_kernel[i].xy; + vec2 disk = disk_rotation * scene_data_block.data.penumbra_shadow_kernel[i].xy; vec3 pos = local_vert + tangent * disk.x + bitangent * disk.y; pos = normalize(pos); @@ -579,7 +579,7 @@ void light_process_omni(uint idx, vec3 vertex, vec3 eye_vec, vec3 normal, vec3 v splane.xy = splane.xy * 0.5 + 0.5; splane.z = shadow_len * omni_lights.data[idx].inv_radius; splane.xy = clamp_rect.xy + splane.xy * clamp_rect.zw; - // splane.xy = clamp(splane.xy,clamp_rect.xy + scene_data.shadow_atlas_pixel_size,clamp_rect.xy + clamp_rect.zw - scene_data.shadow_atlas_pixel_size ); + // splane.xy = clamp(splane.xy,clamp_rect.xy + scene_data_block.data.shadow_atlas_pixel_size,clamp_rect.xy + clamp_rect.zw - scene_data_block.data.shadow_atlas_pixel_size ); splane.w = 1.0; //needed? i think it should be 1 already float shadow_z = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), splane.xy, 0.0).r; @@ -709,7 +709,7 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) { float uv_size = spot_lights.data[idx].soft_shadow_size * z_norm * spot_lights.data[idx].soft_shadow_scale; vec2 clamp_max = spot_lights.data[idx].atlas_rect.xy + spot_lights.data[idx].atlas_rect.zw; for (uint i = 0; i < sc_penumbra_shadow_samples; i++) { - vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size; + vec2 suv = shadow_uv + (disk_rotation * scene_data_block.data.penumbra_shadow_kernel[i].xy) * uv_size; suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); float d = textureLod(sampler2D(shadow_atlas, material_samplers[SAMPLER_LINEAR_CLAMP]), suv, 0.0).r; if (d < splane.z) { @@ -726,7 +726,7 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) { shadow = 0.0; for (uint i = 0; i < sc_penumbra_shadow_samples; i++) { - vec2 suv = shadow_uv + (disk_rotation * scene_data.penumbra_shadow_kernel[i].xy) * uv_size; + vec2 suv = shadow_uv + (disk_rotation * scene_data_block.data.penumbra_shadow_kernel[i].xy) * uv_size; suv = clamp(suv, spot_lights.data[idx].atlas_rect.xy, clamp_max); shadow += textureProj(sampler2DShadow(shadow_atlas, shadow_sampler), vec4(suv, splane.z, 1.0)); } @@ -740,7 +740,7 @@ float light_process_spot_shadow(uint idx, vec3 vertex, vec3 normal) { } else { //hard shadow vec3 shadow_uv = vec3(splane.xy * spot_lights.data[idx].atlas_rect.zw + spot_lights.data[idx].atlas_rect.xy, splane.z); - shadow = sample_pcf_shadow(shadow_atlas, spot_lights.data[idx].soft_shadow_scale * scene_data.shadow_atlas_pixel_size, shadow_uv); + shadow = sample_pcf_shadow(shadow_atlas, spot_lights.data[idx].soft_shadow_scale * scene_data_block.data.shadow_atlas_pixel_size, shadow_uv); } return shadow; diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl index fd0fefc5fd00..e15ebbfc9131 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_mobile.glsl @@ -115,6 +115,8 @@ invariant gl_Position; #GLOBALS void main() { + SceneData scene_data = scene_data_block.data; + vec4 instance_custom = vec4(0.0); #if defined(COLOR_USED) color_interp = color_attrib; @@ -527,13 +529,13 @@ layout(location = 0) out mediump vec4 frag_color; */ vec4 fog_process(vec3 vertex) { - vec3 fog_color = scene_data.fog_light_color; + vec3 fog_color = scene_data_block.data.fog_light_color; - if (scene_data.fog_aerial_perspective > 0.0) { + if (scene_data_block.data.fog_aerial_perspective > 0.0) { vec3 sky_fog_color = vec3(0.0); - vec3 cube_view = scene_data.radiance_inverse_xform * vertex; + vec3 cube_view = scene_data_block.data.radiance_inverse_xform * vertex; // mip_level always reads from the second mipmap and higher so the fog is always slightly blurred - float mip_level = mix(1.0 / MAX_ROUGHNESS_LOD, 1.0, 1.0 - (abs(vertex.z) - scene_data.z_near) / (scene_data.z_far - scene_data.z_near)); + float mip_level = mix(1.0 / MAX_ROUGHNESS_LOD, 1.0, 1.0 - (abs(vertex.z) - scene_data_block.data.z_near) / (scene_data_block.data.z_far - scene_data_block.data.z_near)); #ifdef USE_RADIANCE_CUBEMAP_ARRAY float lod, blend; blend = modf(mip_level * MAX_ROUGHNESS_LOD, lod); @@ -542,29 +544,29 @@ vec4 fog_process(vec3 vertex) { #else sky_fog_color = textureLod(samplerCube(radiance_cubemap, material_samplers[SAMPLER_LINEAR_WITH_MIPMAPS_CLAMP]), cube_view, mip_level * MAX_ROUGHNESS_LOD).rgb; #endif //USE_RADIANCE_CUBEMAP_ARRAY - fog_color = mix(fog_color, sky_fog_color, scene_data.fog_aerial_perspective); + fog_color = mix(fog_color, sky_fog_color, scene_data_block.data.fog_aerial_perspective); } - if (scene_data.fog_sun_scatter > 0.001) { + if (scene_data_block.data.fog_sun_scatter > 0.001) { vec4 sun_scatter = vec4(0.0); float sun_total = 0.0; vec3 view = normalize(vertex); - for (uint i = 0; i < scene_data.directional_light_count; i++) { + for (uint i = 0; i < scene_data_block.data.directional_light_count; i++) { vec3 light_color = directional_lights.data[i].color * directional_lights.data[i].energy; float light_amount = pow(max(dot(view, directional_lights.data[i].direction), 0.0), 8.0); - fog_color += light_color * light_amount * scene_data.fog_sun_scatter; + fog_color += light_color * light_amount * scene_data_block.data.fog_sun_scatter; } } - float fog_amount = 1.0 - exp(min(0.0, -length(vertex) * scene_data.fog_density)); + float fog_amount = 1.0 - exp(min(0.0, -length(vertex) * scene_data_block.data.fog_density)); - if (abs(scene_data.fog_height_density) >= 0.0001) { - float y = (scene_data.inv_view_matrix * vec4(vertex, 1.0)).y; + if (abs(scene_data_block.data.fog_height_density) >= 0.0001) { + float y = (scene_data_block.data.inv_view_matrix * vec4(vertex, 1.0)).y; - float y_dist = y - scene_data.fog_height; + float y_dist = y - scene_data_block.data.fog_height; - float vfog_amount = 1.0 - exp(min(0.0, y_dist * scene_data.fog_height_density)); + float vfog_amount = 1.0 - exp(min(0.0, y_dist * scene_data_block.data.fog_height_density)); fog_amount = max(vfog_amount, fog_amount); } @@ -580,6 +582,7 @@ void main() { if (dp_clip > 0.0) discard; #endif + SceneData scene_data = scene_data_block.data; //lay out everything, whatever is unused is optimized away anyway vec3 vertex = vertex_interp; diff --git a/servers/rendering/renderer_rd/shaders/scene_forward_mobile_inc.glsl b/servers/rendering/renderer_rd/shaders/scene_forward_mobile_inc.glsl index 91ef19ab675e..dd14a158372a 100644 --- a/servers/rendering/renderer_rd/shaders/scene_forward_mobile_inc.glsl +++ b/servers/rendering/renderer_rd/shaders/scene_forward_mobile_inc.glsl @@ -125,7 +125,7 @@ global_variables; /* Set 1: Render Pass (changes per render pass) */ -layout(set = 1, binding = 0, std140) uniform SceneData { +struct SceneData { highp mat4 projection_matrix; highp mat4 inv_projection_matrix; highp mat4 inv_view_matrix; @@ -189,8 +189,12 @@ layout(set = 1, binding = 0, std140) uniform SceneData { uint pad1; uint pad2; uint pad3; +}; + +layout(set = 1, binding = 0, std140) uniform SceneDataBlock { + SceneData data; } -scene_data; +scene_data_block; #ifdef USE_RADIANCE_CUBEMAP_ARRAY diff --git a/servers/rendering/renderer_rd/shaders/taa_resolve.glsl b/servers/rendering/renderer_rd/shaders/taa_resolve.glsl new file mode 100644 index 000000000000..a1a77b95aa4c --- /dev/null +++ b/servers/rendering/renderer_rd/shaders/taa_resolve.glsl @@ -0,0 +1,393 @@ +/////////////////////////////////////////////////////////////////////////////////// +// Copyright(c) 2016-2022 Panos Karabelas +// +// Permission is hereby granted, free of charge, to any person obtaining a copy +// of this software and associated documentation files (the "Software"), to deal +// in the Software without restriction, including without limitation the rights +// to use, copy, modify, merge, publish, distribute, sublicense, and / or sell +// copies of the Software, and to permit persons to whom the Software is furnished +// to do so, subject to the following conditions : +// +// The above copyright notice and this permission notice shall be included in +// all copies or substantial portions of the Software. +// +// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS +// FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.IN NO EVENT SHALL THE AUTHORS OR +// COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +// IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +// CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +/////////////////////////////////////////////////////////////////////////////////// +// File changes (yyyy-mm-dd) +// 2022-05-06: Panos Karabelas: first commit +// 2020-12-05: Joan Fons: convert to Vulkan and Godot +/////////////////////////////////////////////////////////////////////////////////// + +#[compute] + +#version 450 + +#VERSION_DEFINES + +// Based on Spartan Engine's TAA implementation https://github.com/PanosK92/SpartanEngine/blob/master/Data/shaders/temporal_antialiasing.hlsl + +#define USE_SUBGROUPS + +#define GROUP_SIZE 8 +#define FLT_MIN 0.00000001 +#define FLT_MAX 32767.0 +#define RPC_9 0.11111111111 +#define RPC_16 0.0625 + +#ifdef USE_SUBGROUPS +layout(local_size_x = GROUP_SIZE, local_size_y = GROUP_SIZE, local_size_z = 1) in; +#endif + +layout(rgba16f, set = 0, binding = 0) uniform restrict readonly image2D color_buffer; +layout(set = 0, binding = 1) uniform sampler2D depth_buffer; +layout(rg16f, set = 0, binding = 2) uniform restrict readonly image2D velocity_buffer; +layout(rg16f, set = 0, binding = 3) uniform restrict readonly image2D last_velocity_buffer; +layout(set = 0, binding = 4) uniform sampler2D history_buffer; +layout(rgba16f, set = 0, binding = 5) uniform restrict writeonly image2D output_buffer; + +layout(push_constant, std430) uniform Params { + vec2 resolution; + float disocclusion_threshold; // 0.1 / max(params.resolution.x, params.resolution.y + float disocclusion_scale; +} +params; + +const ivec2 kOffsets3x3[9] = { + ivec2(-1, -1), + ivec2(0, -1), + ivec2(1, -1), + ivec2(-1, 0), + ivec2(0, 0), + ivec2(1, 0), + ivec2(-1, 1), + ivec2(0, 1), + ivec2(1, 1), +}; + +/*------------------------------------------------------------------------------ + THREAD GROUP SHARED MEMORY (LDS) +------------------------------------------------------------------------------*/ + +const int kBorderSize = 1; +const int kGroupSize = GROUP_SIZE; +const int kTileDimension = kGroupSize + kBorderSize * 2; +const int kTileDimension2 = kTileDimension * kTileDimension; + +vec3 reinhard(vec3 hdr) { + return hdr / (hdr + 1.0); +} +vec3 reinhard_inverse(vec3 sdr) { + return sdr / (1.0 - sdr); +} + +float get_depth(ivec2 thread_id) { + return texelFetch(depth_buffer, thread_id, 0).r; +} + +#ifdef USE_SUBGROUPS +shared vec3 tile_color[kTileDimension][kTileDimension]; +shared float tile_depth[kTileDimension][kTileDimension]; + +vec3 load_color(uvec2 group_thread_id) { + group_thread_id += kBorderSize; + return tile_color[group_thread_id.x][group_thread_id.y]; +} + +void store_color(uvec2 group_thread_id, vec3 color) { + tile_color[group_thread_id.x][group_thread_id.y] = color; +} + +float load_depth(uvec2 group_thread_id) { + group_thread_id += kBorderSize; + return tile_depth[group_thread_id.x][group_thread_id.y]; +} + +void store_depth(uvec2 group_thread_id, float depth) { + tile_depth[group_thread_id.x][group_thread_id.y] = depth; +} + +void store_color_depth(uvec2 group_thread_id, ivec2 thread_id) { + // out of bounds clamp + thread_id = clamp(thread_id, ivec2(0, 0), ivec2(params.resolution) - ivec2(1, 1)); + + store_color(group_thread_id, imageLoad(color_buffer, thread_id).rgb); + store_depth(group_thread_id, get_depth(thread_id)); +} + +void populate_group_shared_memory(uvec2 group_id, uint group_index) { + // Populate group shared memory + ivec2 group_top_left = ivec2(group_id) * kGroupSize - kBorderSize; + if (group_index < (kTileDimension2 >> 2)) { + ivec2 group_thread_id_1 = ivec2(group_index % kTileDimension, group_index / kTileDimension); + ivec2 group_thread_id_2 = ivec2((group_index + (kTileDimension2 >> 2)) % kTileDimension, (group_index + (kTileDimension2 >> 2)) / kTileDimension); + ivec2 group_thread_id_3 = ivec2((group_index + (kTileDimension2 >> 1)) % kTileDimension, (group_index + (kTileDimension2 >> 1)) / kTileDimension); + ivec2 group_thread_id_4 = ivec2((group_index + kTileDimension2 * 3 / 4) % kTileDimension, (group_index + kTileDimension2 * 3 / 4) / kTileDimension); + + store_color_depth(group_thread_id_1, group_top_left + group_thread_id_1); + store_color_depth(group_thread_id_2, group_top_left + group_thread_id_2); + store_color_depth(group_thread_id_3, group_top_left + group_thread_id_3); + store_color_depth(group_thread_id_4, group_top_left + group_thread_id_4); + } + + // Wait for group threads to load store data. + groupMemoryBarrier(); + barrier(); +} +#else +vec3 load_color(uvec2 screen_pos) { + return imageLoad(color_buffer, ivec2(screen_pos)).rgb; +} + +float load_depth(uvec2 screen_pos) { + return get_depth(ivec2(screen_pos)); +} +#endif + +/*------------------------------------------------------------------------------ + VELOCITY +------------------------------------------------------------------------------*/ + +void depth_test_min(uvec2 pos, inout float min_depth, inout uvec2 min_pos) { + float depth = load_depth(pos); + + if (depth < min_depth) { + min_depth = depth; + min_pos = pos; + } +} + +// Returns velocity with closest depth (3x3 neighborhood) +void get_closest_pixel_velocity_3x3(in uvec2 group_pos, uvec2 group_top_left, out vec2 velocity) { + float min_depth = 1.0; + uvec2 min_pos = group_pos; + + depth_test_min(group_pos + kOffsets3x3[0], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[1], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[2], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[3], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[4], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[5], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[6], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[7], min_depth, min_pos); + depth_test_min(group_pos + kOffsets3x3[8], min_depth, min_pos); + + // Velocity out + velocity = imageLoad(velocity_buffer, ivec2(group_top_left + min_pos)).xy; +} + +/*------------------------------------------------------------------------------ + HISTORY SAMPLING +------------------------------------------------------------------------------*/ + +vec3 sample_catmull_rom_9(sampler2D stex, vec2 uv, vec2 resolution) { + // Source: https://gist.github.com/TheRealMJP/c83b8c0f46b63f3a88a5986f4fa982b1 + // License: https://gist.github.com/TheRealMJP/bc503b0b87b643d3505d41eab8b332ae + + // We're going to sample a a 4x4 grid of texels surrounding the target UV coordinate. We'll do this by rounding + // down the sample location to get the exact center of our "starting" texel. The starting texel will be at + // location [1, 1] in the grid, where [0, 0] is the top left corner. + vec2 sample_pos = uv * resolution; + vec2 texPos1 = floor(sample_pos - 0.5f) + 0.5f; + + // Compute the fractional offset from our starting texel to our original sample location, which we'll + // feed into the Catmull-Rom spline function to get our filter weights. + vec2 f = sample_pos - texPos1; + + // Compute the Catmull-Rom weights using the fractional offset that we calculated earlier. + // These equations are pre-expanded based on our knowledge of where the texels will be located, + // which lets us avoid having to evaluate a piece-wise function. + vec2 w0 = f * (-0.5f + f * (1.0f - 0.5f * f)); + vec2 w1 = 1.0f + f * f * (-2.5f + 1.5f * f); + vec2 w2 = f * (0.5f + f * (2.0f - 1.5f * f)); + vec2 w3 = f * f * (-0.5f + 0.5f * f); + + // Work out weighting factors and sampling offsets that will let us use bilinear filtering to + // simultaneously evaluate the middle 2 samples from the 4x4 grid. + vec2 w12 = w1 + w2; + vec2 offset12 = w2 / (w1 + w2); + + // Compute the final UV coordinates we'll use for sampling the texture + vec2 texPos0 = texPos1 - 1.0f; + vec2 texPos3 = texPos1 + 2.0f; + vec2 texPos12 = texPos1 + offset12; + + texPos0 /= resolution; + texPos3 /= resolution; + texPos12 /= resolution; + + vec3 result = vec3(0.0f, 0.0f, 0.0f); + + result += textureLod(stex, vec2(texPos0.x, texPos0.y), 0.0).xyz * w0.x * w0.y; + result += textureLod(stex, vec2(texPos12.x, texPos0.y), 0.0).xyz * w12.x * w0.y; + result += textureLod(stex, vec2(texPos3.x, texPos0.y), 0.0).xyz * w3.x * w0.y; + + result += textureLod(stex, vec2(texPos0.x, texPos12.y), 0.0).xyz * w0.x * w12.y; + result += textureLod(stex, vec2(texPos12.x, texPos12.y), 0.0).xyz * w12.x * w12.y; + result += textureLod(stex, vec2(texPos3.x, texPos12.y), 0.0).xyz * w3.x * w12.y; + + result += textureLod(stex, vec2(texPos0.x, texPos3.y), 0.0).xyz * w0.x * w3.y; + result += textureLod(stex, vec2(texPos12.x, texPos3.y), 0.0).xyz * w12.x * w3.y; + result += textureLod(stex, vec2(texPos3.x, texPos3.y), 0.0).xyz * w3.x * w3.y; + + return max(result, 0.0f); +} + +/*------------------------------------------------------------------------------ + HISTORY CLIPPING +------------------------------------------------------------------------------*/ + +// Based on "Temporal Reprojection Anti-Aliasing" - https://github.com/playdeadgames/temporal +vec3 clip_aabb(vec3 aabb_min, vec3 aabb_max, vec3 p, vec3 q) { + vec3 r = q - p; + vec3 rmax = (aabb_max - p.xyz); + vec3 rmin = (aabb_min - p.xyz); + + if (r.x > rmax.x + FLT_MIN) + r *= (rmax.x / r.x); + if (r.y > rmax.y + FLT_MIN) + r *= (rmax.y / r.y); + if (r.z > rmax.z + FLT_MIN) + r *= (rmax.z / r.z); + + if (r.x < rmin.x - FLT_MIN) + r *= (rmin.x / r.x); + if (r.y < rmin.y - FLT_MIN) + r *= (rmin.y / r.y); + if (r.z < rmin.z - FLT_MIN) + r *= (rmin.z / r.z); + + return p + r; +} + +// Clip history to the neighbourhood of the current sample +vec3 clip_history_3x3(uvec2 group_pos, vec3 color_history, vec2 velocity_closest) { + // Sample a 3x3 neighbourhood + vec3 s1 = load_color(group_pos + kOffsets3x3[0]); + vec3 s2 = load_color(group_pos + kOffsets3x3[1]); + vec3 s3 = load_color(group_pos + kOffsets3x3[2]); + vec3 s4 = load_color(group_pos + kOffsets3x3[3]); + vec3 s5 = load_color(group_pos + kOffsets3x3[4]); + vec3 s6 = load_color(group_pos + kOffsets3x3[5]); + vec3 s7 = load_color(group_pos + kOffsets3x3[6]); + vec3 s8 = load_color(group_pos + kOffsets3x3[7]); + vec3 s9 = load_color(group_pos + kOffsets3x3[8]); + + // Compute min and max (with an adaptive box size, which greatly reduces ghosting) + vec3 color_avg = (s1 + s2 + s3 + s4 + s5 + s6 + s7 + s8 + s9) * RPC_9; + vec3 color_avg2 = ((s1 * s1) + (s2 * s2) + (s3 * s3) + (s4 * s4) + (s5 * s5) + (s6 * s6) + (s7 * s7) + (s8 * s8) + (s9 * s9)) * RPC_9; + float box_size = mix(0.0f, 2.5f, smoothstep(0.02f, 0.0f, length(velocity_closest))); + vec3 dev = sqrt(abs(color_avg2 - (color_avg * color_avg))) * box_size; + vec3 color_min = color_avg - dev; + vec3 color_max = color_avg + dev; + + // Variance clipping + vec3 color = clip_aabb(color_min, color_max, clamp(color_avg, color_min, color_max), color_history); + + // Clamp to prevent NaNs + color = clamp(color, FLT_MIN, FLT_MAX); + + return color; +} + +/*------------------------------------------------------------------------------ + TAA +------------------------------------------------------------------------------*/ + +const vec3 lumCoeff = vec3(0.299f, 0.587f, 0.114f); + +float luminance(vec3 color) { + return max(dot(color, lumCoeff), 0.0001f); +} + +float get_factor_disocclusion(vec2 uv_reprojected, vec2 velocity) { + vec2 velocity_previous = imageLoad(last_velocity_buffer, ivec2(uv_reprojected * params.resolution)).xy; + vec2 velocity_texels = velocity * params.resolution; + vec2 prev_velocity_texels = velocity_previous * params.resolution; + float disocclusion = length(prev_velocity_texels - velocity_texels) - params.disocclusion_threshold; + return clamp(disocclusion * params.disocclusion_scale, 0.0, 1.0); +} + +vec3 temporal_antialiasing(uvec2 pos_group_top_left, uvec2 pos_group, uvec2 pos_screen, vec2 uv, sampler2D tex_history) { + // Get the velocity of the current pixel + vec2 velocity = imageLoad(velocity_buffer, ivec2(pos_screen)).xy; + + // Get reprojected uv + vec2 uv_reprojected = uv - velocity; + + // Get input color + vec3 color_input = load_color(pos_group); + + // Get history color (catmull-rom reduces a lot of the blurring that you get under motion) + vec3 color_history = sample_catmull_rom_9(tex_history, uv_reprojected, params.resolution).rgb; + + // Clip history to the neighbourhood of the current sample (fixes a lot of the ghosting). + vec2 velocity_closest = vec2(0.0); // This is best done by using the velocity with the closest depth. + get_closest_pixel_velocity_3x3(pos_group, pos_group_top_left, velocity_closest); + color_history = clip_history_3x3(pos_group, color_history, velocity_closest); + + // Compute blend factor + float blend_factor = RPC_16; // We want to be able to accumulate as many jitter samples as we generated, that is, 16. + { + // If re-projected UV is out of screen, converge to current color immediatel + float factor_screen = any(lessThan(uv_reprojected, vec2(0.0))) || any(greaterThan(uv_reprojected, vec2(1.0))) ? 1.0 : 0.0; + + // Increase blend factor when there is disocclusion (fixes a lot of the remaining ghosting). + float factor_disocclusion = get_factor_disocclusion(uv_reprojected, velocity); + + // Add to the blend factor + blend_factor = clamp(blend_factor + factor_screen + factor_disocclusion, 0.0, 1.0); + } + + // Resolve + vec3 color_resolved = vec3(0.0); + { + // Tonemap + color_history = reinhard(color_history); + color_input = reinhard(color_input); + + // Reduce flickering + float lum_color = luminance(color_input); + float lum_history = luminance(color_history); + float diff = abs(lum_color - lum_history) / max(lum_color, max(lum_history, 1.001)); + diff = 1.0 - diff; + diff = diff * diff; + blend_factor = mix(0.0, blend_factor, diff); + + // Lerp/blend + color_resolved = mix(color_history, color_input, blend_factor); + + // Inverse tonemap + color_resolved = reinhard_inverse(color_resolved); + } + + return color_resolved; +} + +void main() { +#ifdef USE_SUBGROUPS + populate_group_shared_memory(gl_WorkGroupID.xy, gl_LocalInvocationIndex); +#endif + + // Out of bounds check + if (any(greaterThanEqual(vec2(gl_GlobalInvocationID.xy), params.resolution))) { + return; + } + +#ifdef USE_SUBGROUPS + const uvec2 pos_group = gl_LocalInvocationID.xy; + const uvec2 pos_group_top_left = gl_WorkGroupID.xy * kGroupSize - kBorderSize; +#else + const uvec2 pos_group = gl_GlobalInvocationID.xy; + const uvec2 pos_group_top_left = uvec2(0, 0); +#endif + const uvec2 pos_screen = gl_GlobalInvocationID.xy; + const vec2 uv = (gl_GlobalInvocationID.xy + 0.5f) / params.resolution; + + vec3 result = temporal_antialiasing(pos_group_top_left, pos_group, pos_screen, uv, history_buffer); + imageStore(output_buffer, ivec2(gl_GlobalInvocationID.xy), vec4(result, 1.0)); +} diff --git a/servers/rendering/renderer_scene.h b/servers/rendering/renderer_scene.h index 43d5b0786991..b773ed61f42a 100644 --- a/servers/rendering/renderer_scene.h +++ b/servers/rendering/renderer_scene.h @@ -194,7 +194,7 @@ public: virtual RID render_buffers_create() = 0; - virtual void render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_internal_width, int p_internal_height, int p_width, int p_height, float p_fsr_sharpness, float p_fsr_mipmap_bias, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_debanding, uint32_t p_view_count) = 0; + virtual void render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_internal_width, int p_internal_height, int p_width, int p_height, float p_fsr_sharpness, float p_fsr_mipmap_bias, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_taa, bool p_use_debanding, uint32_t p_view_count) = 0; virtual void gi_set_use_half_resolution(bool p_enable) = 0; @@ -211,7 +211,7 @@ public: int info[RS::VIEWPORT_RENDER_INFO_TYPE_MAX][RS::VIEWPORT_RENDER_INFO_MAX] = {}; }; - virtual void render_camera(RID p_render_buffers, RID p_camera, RID p_scenario, RID p_viewport, Size2 p_viewport_size, float p_mesh_lod_threshold, RID p_shadow_atlas, Ref &p_xr_interface, RenderInfo *r_render_info = nullptr) = 0; + virtual void render_camera(RID p_render_buffers, RID p_camera, RID p_scenario, RID p_viewport, Size2 p_viewport_size, bool p_use_taa, float p_mesh_lod_threshold, RID p_shadow_atlas, Ref &p_xr_interface, RenderInfo *r_render_info = nullptr) = 0; virtual void update() = 0; virtual void render_probes() = 0; diff --git a/servers/rendering/renderer_scene_cull.cpp b/servers/rendering/renderer_scene_cull.cpp index 2cb4e1658333..6f427272b555 100644 --- a/servers/rendering/renderer_scene_cull.cpp +++ b/servers/rendering/renderer_scene_cull.cpp @@ -2433,12 +2433,17 @@ bool RendererSceneCull::_light_instance_update_shadow(Instance *p_instance, cons return animated_material_found; } -void RendererSceneCull::render_camera(RID p_render_buffers, RID p_camera, RID p_scenario, RID p_viewport, Size2 p_viewport_size, float p_screen_mesh_lod_threshold, RID p_shadow_atlas, Ref &p_xr_interface, RenderInfo *r_render_info) { +void RendererSceneCull::render_camera(RID p_render_buffers, RID p_camera, RID p_scenario, RID p_viewport, Size2 p_viewport_size, bool p_use_taa, float p_screen_mesh_lod_threshold, RID p_shadow_atlas, Ref &p_xr_interface, RenderInfo *r_render_info) { #ifndef _3D_DISABLED Camera *camera = camera_owner.get_or_null(p_camera); ERR_FAIL_COND(!camera); + Vector2 jitter; + if (p_use_taa) { + jitter = taa_jitter_array[RSG::rasterizer->get_frame_number() % TAA_JITTER_COUNT] / p_viewport_size; + } + RendererSceneRender::CameraData camera_data; // Setup Camera(s) @@ -2479,7 +2484,7 @@ void RendererSceneCull::render_camera(RID p_render_buffers, RID p_camera, RID p_ } break; } - camera_data.set_camera(transform, projection, is_orthogonal, vaspect); + camera_data.set_camera(transform, projection, is_orthogonal, vaspect, jitter); } else { // Setup our camera for our XR interface. // We can support multiple views here each with their own camera @@ -2501,7 +2506,7 @@ void RendererSceneCull::render_camera(RID p_render_buffers, RID p_camera, RID p_ } if (view_count == 1) { - camera_data.set_camera(transforms[0], projections[0], false, camera->vaspect); + camera_data.set_camera(transforms[0], projections[0], false, camera->vaspect, jitter); } else if (view_count == 2) { camera_data.set_multiview_camera(view_count, transforms, projections, false, camera->vaspect); } else { @@ -3216,12 +3221,18 @@ void RendererSceneCull::_render_scene(const RendererSceneRender::CameraData *p_c /* PROCESS GEOMETRY AND DRAW SCENE */ RID occluders_tex; + const RendererSceneRender::CameraData *prev_camera_data = p_camera_data; if (p_viewport.is_valid()) { occluders_tex = RSG::viewport->viewport_get_occluder_debug_texture(p_viewport); + prev_camera_data = RSG::viewport->viewport_get_prev_camera_data(p_viewport); } RENDER_TIMESTAMP("Render 3D Scene"); - scene_render->render_scene(p_render_buffers, p_camera_data, scene_cull_result.geometry_instances, scene_cull_result.light_instances, scene_cull_result.reflections, scene_cull_result.voxel_gi_instances, scene_cull_result.decals, scene_cull_result.lightmaps, scene_cull_result.fog_volumes, p_environment, camera_effects, p_shadow_atlas, occluders_tex, p_reflection_probe.is_valid() ? RID() : scenario->reflection_atlas, p_reflection_probe, p_reflection_probe_pass, p_screen_mesh_lod_threshold, render_shadow_data, max_shadows_used, render_sdfgi_data, cull.sdfgi.region_count, &sdfgi_update_data, r_render_info); + scene_render->render_scene(p_render_buffers, p_camera_data, prev_camera_data, scene_cull_result.geometry_instances, scene_cull_result.light_instances, scene_cull_result.reflections, scene_cull_result.voxel_gi_instances, scene_cull_result.decals, scene_cull_result.lightmaps, scene_cull_result.fog_volumes, p_environment, camera_effects, p_shadow_atlas, occluders_tex, p_reflection_probe.is_valid() ? RID() : scenario->reflection_atlas, p_reflection_probe, p_reflection_probe_pass, p_screen_mesh_lod_threshold, render_shadow_data, max_shadows_used, render_sdfgi_data, cull.sdfgi.region_count, &sdfgi_update_data, r_render_info); + + if (p_viewport.is_valid()) { + RSG::viewport->viewport_set_prev_camera_data(p_viewport, p_camera_data); + } for (uint32_t i = 0; i < max_shadows_used; i++) { render_shadow_data[i].instances.clear(); @@ -3271,7 +3282,7 @@ void RendererSceneCull::render_empty_scene(RID p_render_buffers, RID p_scenario, RendererSceneRender::CameraData camera_data; camera_data.set_camera(Transform3D(), CameraMatrix(), true, false); - scene_render->render_scene(p_render_buffers, &camera_data, PagedArray(), PagedArray(), PagedArray(), PagedArray(), PagedArray(), PagedArray(), PagedArray(), RID(), RID(), p_shadow_atlas, RID(), scenario->reflection_atlas, RID(), 0, 0, nullptr, 0, nullptr, 0, nullptr); + scene_render->render_scene(p_render_buffers, &camera_data, nullptr, PagedArray(), PagedArray(), PagedArray(), PagedArray(), PagedArray(), PagedArray(), PagedArray(), RID(), RID(), p_shadow_atlas, RID(), scenario->reflection_atlas, RID(), 0, 0, nullptr, 0, nullptr, 0, nullptr); #endif } @@ -3993,6 +4004,17 @@ void RendererSceneCull::set_scene_render(RendererSceneRender *p_scene_render) { geometry_instance_pair_mask = scene_render->geometry_instance_get_pair_mask(); } +float get_halton_value(int index, int base) { + float f = 1; + float r = 0; + while (index > 0) { + f = f / static_cast(base); + r = r + f * (index % base); + index = index / base; + } + return r * 2.0f - 1.0f; +}; + RendererSceneCull::RendererSceneCull() { render_pass = 1; singleton = this; @@ -4017,6 +4039,12 @@ RendererSceneCull::RendererSceneCull() { thread_cull_threshold = GLOBAL_GET("rendering/limits/spatial_indexer/threaded_cull_minimum_instances"); thread_cull_threshold = MAX(thread_cull_threshold, (uint32_t)RendererThreadPool::singleton->thread_work_pool.get_thread_count()); //make sure there is at least one thread per CPU + taa_jitter_array.resize(TAA_JITTER_COUNT); + for (int i = 0; i < TAA_JITTER_COUNT; i++) { + taa_jitter_array[i].x = get_halton_value(i, 2); + taa_jitter_array[i].y = get_halton_value(i, 3); + } + dummy_occlusion_culling = memnew(RendererSceneOcclusionCull); } diff --git a/servers/rendering/renderer_scene_cull.h b/servers/rendering/renderer_scene_cull.h index 60983f994479..d1d348487138 100644 --- a/servers/rendering/renderer_scene_cull.h +++ b/servers/rendering/renderer_scene_cull.h @@ -923,6 +923,9 @@ public: uint32_t geometry_instance_pair_mask = 0; // used in traditional forward, unnecessary on clustered + const int TAA_JITTER_COUNT = 16; + LocalVector taa_jitter_array; + virtual RID instance_allocate(); virtual void instance_initialize(RID p_rid); @@ -1054,7 +1057,7 @@ public: void _render_scene(const RendererSceneRender::CameraData *p_camera_data, RID p_render_buffers, RID p_environment, RID p_force_camera_effects, uint32_t p_visible_layers, RID p_scenario, RID p_viewport, RID p_shadow_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, bool p_using_shadows = true, RenderInfo *r_render_info = nullptr); void render_empty_scene(RID p_render_buffers, RID p_scenario, RID p_shadow_atlas); - void render_camera(RID p_render_buffers, RID p_camera, RID p_scenario, RID p_viewport, Size2 p_viewport_size, float p_screen_mesh_lod_threshold, RID p_shadow_atlas, Ref &p_xr_interface, RendererScene::RenderInfo *r_render_info = nullptr); + void render_camera(RID p_render_buffers, RID p_camera, RID p_scenario, RID p_viewport, Size2 p_viewport_size, bool p_use_taa, float p_screen_mesh_lod_threshold, RID p_shadow_atlas, Ref &p_xr_interface, RendererScene::RenderInfo *r_render_info = nullptr); void update_dirty_instances(); void render_particle_colliders(); @@ -1155,7 +1158,7 @@ public: /* Render Buffers */ PASS0R(RID, render_buffers_create) - PASS12(render_buffers_configure, RID, RID, int, int, int, int, float, float, RS::ViewportMSAA, RS::ViewportScreenSpaceAA, bool, uint32_t) + PASS13(render_buffers_configure, RID, RID, int, int, int, int, float, float, RS::ViewportMSAA, RS::ViewportScreenSpaceAA, bool, bool, uint32_t) PASS1(gi_set_use_half_resolution, bool) /* Shadow Atlas */ diff --git a/servers/rendering/renderer_scene_render.cpp b/servers/rendering/renderer_scene_render.cpp index a4d281fac459..600908cf166b 100644 --- a/servers/rendering/renderer_scene_render.cpp +++ b/servers/rendering/renderer_scene_render.cpp @@ -30,7 +30,7 @@ #include "renderer_scene_render.h" -void RendererSceneRender::CameraData::set_camera(const Transform3D p_transform, const CameraMatrix p_projection, bool p_is_orthogonal, bool p_vaspect) { +void RendererSceneRender::CameraData::set_camera(const Transform3D p_transform, const CameraMatrix p_projection, bool p_is_orthogonal, bool p_vaspect, const Vector2 &p_taa_jitter) { view_count = 1; is_orthogonal = p_is_orthogonal; vaspect = p_vaspect; @@ -40,6 +40,7 @@ void RendererSceneRender::CameraData::set_camera(const Transform3D p_transform, view_offset[0] = Transform3D(); view_projection[0] = p_projection; + taa_jitter = p_taa_jitter; } void RendererSceneRender::CameraData::set_multiview_camera(uint32_t p_view_count, const Transform3D *p_transforms, const CameraMatrix *p_projections, bool p_is_orthogonal, bool p_vaspect) { diff --git a/servers/rendering/renderer_scene_render.h b/servers/rendering/renderer_scene_render.h index d43bfb170e84..280277c6d8d9 100644 --- a/servers/rendering/renderer_scene_render.h +++ b/servers/rendering/renderer_scene_render.h @@ -243,12 +243,13 @@ public: Transform3D view_offset[RendererSceneRender::MAX_RENDER_VIEWS]; CameraMatrix view_projection[RendererSceneRender::MAX_RENDER_VIEWS]; + Vector2 taa_jitter; - void set_camera(const Transform3D p_transform, const CameraMatrix p_projection, bool p_is_orthogonal, bool p_vaspect); + void set_camera(const Transform3D p_transform, const CameraMatrix p_projection, bool p_is_orthogonal, bool p_vaspect, const Vector2 &p_taa_jitter = Vector2()); void set_multiview_camera(uint32_t p_view_count, const Transform3D *p_transforms, const CameraMatrix *p_projections, bool p_is_orthogonal, bool p_vaspect); }; - virtual void render_scene(RID p_render_buffers, const CameraData *p_camera_data, const PagedArray &p_instances, const PagedArray &p_lights, const PagedArray &p_reflection_probes, const PagedArray &p_voxel_gi_instances, const PagedArray &p_decals, const PagedArray &p_lightmaps, const PagedArray &p_fog_volumes, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr, RendererScene::RenderInfo *r_render_info = nullptr) = 0; + virtual void render_scene(RID p_render_buffers, const CameraData *p_camera_data, const CameraData *p_prev_camera_data, const PagedArray &p_instances, const PagedArray &p_lights, const PagedArray &p_reflection_probes, const PagedArray &p_voxel_gi_instances, const PagedArray &p_decals, const PagedArray &p_lightmaps, const PagedArray &p_fog_volumes, RID p_environment, RID p_camera_effects, RID p_shadow_atlas, RID p_occluder_debug_tex, RID p_reflection_atlas, RID p_reflection_probe, int p_reflection_probe_pass, float p_screen_mesh_lod_threshold, const RenderShadowData *p_render_shadows, int p_render_shadow_count, const RenderSDFGIData *p_render_sdfgi_regions, int p_render_sdfgi_region_count, const RenderSDFGIUpdateData *p_sdfgi_update_data = nullptr, RendererScene::RenderInfo *r_render_info = nullptr) = 0; virtual void render_material(const Transform3D &p_cam_transform, const CameraMatrix &p_cam_projection, bool p_cam_orthogonal, const PagedArray &p_instances, RID p_framebuffer, const Rect2i &p_region) = 0; virtual void render_particle_collider_heightfield(RID p_collider, const Transform3D &p_transform, const PagedArray &p_instances) = 0; @@ -258,7 +259,7 @@ public: virtual void set_debug_draw_mode(RS::ViewportDebugDraw p_debug_draw) = 0; virtual RID render_buffers_create() = 0; - virtual void render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_internal_width, int p_internal_height, int p_width, int p_height, float p_fsr_sharpness, float p_fsr_mipmap_bias, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_debanding, uint32_t p_view_count) = 0; + virtual void render_buffers_configure(RID p_render_buffers, RID p_render_target, int p_internal_width, int p_internal_height, int p_width, int p_height, float p_fsr_sharpness, float p_fsr_mipmap_bias, RS::ViewportMSAA p_msaa, RS::ViewportScreenSpaceAA p_screen_space_aa, bool p_use_taa, bool p_use_debanding, uint32_t p_view_count) = 0; virtual void gi_set_use_half_resolution(bool p_enable) = 0; virtual void screen_space_roughness_limiter_set_active(bool p_enable, float p_amount, float p_limit) = 0; diff --git a/servers/rendering/renderer_viewport.cpp b/servers/rendering/renderer_viewport.cpp index 74fafe83810c..2e0c4e0f7955 100644 --- a/servers/rendering/renderer_viewport.cpp +++ b/servers/rendering/renderer_viewport.cpp @@ -138,7 +138,7 @@ void RendererViewport::_configure_3d_render_buffers(Viewport *p_viewport) { p_viewport->internal_size = Size2(render_width, render_height); - RSG::scene->render_buffers_configure(p_viewport->render_buffers, p_viewport->render_target, render_width, render_height, width, height, p_viewport->fsr_sharpness, p_viewport->fsr_mipmap_bias, p_viewport->msaa, p_viewport->screen_space_aa, p_viewport->use_debanding, p_viewport->get_view_count()); + RSG::scene->render_buffers_configure(p_viewport->render_buffers, p_viewport->render_target, render_width, render_height, width, height, p_viewport->fsr_sharpness, p_viewport->fsr_mipmap_bias, p_viewport->msaa, p_viewport->screen_space_aa, p_viewport->use_taa, p_viewport->use_debanding, p_viewport->get_view_count()); } } } @@ -167,7 +167,7 @@ void RendererViewport::_draw_3d(Viewport *p_viewport) { } float screen_mesh_lod_threshold = p_viewport->mesh_lod_threshold / float(p_viewport->size.width); - RSG::scene->render_camera(p_viewport->render_buffers, p_viewport->camera, p_viewport->scenario, p_viewport->self, p_viewport->internal_size, screen_mesh_lod_threshold, p_viewport->shadow_atlas, xr_interface, &p_viewport->render_info); + RSG::scene->render_camera(p_viewport->render_buffers, p_viewport->camera, p_viewport->scenario, p_viewport->self, p_viewport->internal_size, p_viewport->use_taa, screen_mesh_lod_threshold, p_viewport->shadow_atlas, xr_interface, &p_viewport->render_info); RENDER_TIMESTAMP("< Render 3D Scene"); } @@ -900,6 +900,22 @@ RID RendererViewport::viewport_get_occluder_debug_texture(RID p_viewport) const return RID(); } +void RendererViewport::viewport_set_prev_camera_data(RID p_viewport, const RendererSceneRender::CameraData *p_camera_data) { + Viewport *viewport = viewport_owner.get_or_null(p_viewport); + ERR_FAIL_COND(!viewport); + uint64_t frame = RSG::rasterizer->get_frame_number(); + if (viewport->prev_camera_data_frame != frame) { + viewport->prev_camera_data = *p_camera_data; + viewport->prev_camera_data_frame = frame; + } +} + +const RendererSceneRender::CameraData *RendererViewport::viewport_get_prev_camera_data(RID p_viewport) { + const Viewport *viewport = viewport_owner.get_or_null(p_viewport); + ERR_FAIL_COND_V(!viewport, nullptr); + return &viewport->prev_camera_data; +} + void RendererViewport::viewport_set_disable_2d(RID p_viewport, bool p_disable) { Viewport *viewport = viewport_owner.get_or_null(p_viewport); ERR_FAIL_COND(!viewport); @@ -1039,6 +1055,17 @@ void RendererViewport::viewport_set_screen_space_aa(RID p_viewport, RS::Viewport _configure_3d_render_buffers(viewport); } +void RendererViewport::viewport_set_use_taa(RID p_viewport, bool p_use_taa) { + Viewport *viewport = viewport_owner.get_or_null(p_viewport); + ERR_FAIL_COND(!viewport); + + if (viewport->use_taa == p_use_taa) { + return; + } + viewport->use_taa = p_use_taa; + _configure_3d_render_buffers(viewport); +} + void RendererViewport::viewport_set_use_debanding(RID p_viewport, bool p_use_debanding) { Viewport *viewport = viewport_owner.get_or_null(p_viewport); ERR_FAIL_COND(!viewport); diff --git a/servers/rendering/renderer_viewport.h b/servers/rendering/renderer_viewport.h index da8cf5396c7a..18385d161331 100644 --- a/servers/rendering/renderer_viewport.h +++ b/servers/rendering/renderer_viewport.h @@ -35,6 +35,7 @@ #include "core/templates/rid_owner.h" #include "core/templates/self_list.h" #include "servers/rendering/renderer_scene.h" +#include "servers/rendering/renderer_scene_render.h" #include "servers/rendering_server.h" #include "servers/xr/xr_interface.h" @@ -66,8 +67,12 @@ public: RS::ViewportMSAA msaa; RS::ViewportScreenSpaceAA screen_space_aa; + bool use_taa; bool use_debanding; + RendererSceneRender::CameraData prev_camera_data; + uint64_t prev_camera_data_frame = 0; + bool use_occlusion_culling; bool occlusion_buffer_dirty; @@ -233,6 +238,9 @@ public: RID viewport_get_texture(RID p_viewport) const; RID viewport_get_occluder_debug_texture(RID p_viewport) const; + void viewport_set_prev_camera_data(RID p_viewport, const RendererSceneRender::CameraData *p_camera_data); + const RendererSceneRender::CameraData *viewport_get_prev_camera_data(RID p_viewport); + void viewport_set_disable_2d(RID p_viewport, bool p_disable); void viewport_set_disable_environment(RID p_viewport, bool p_disable); void viewport_set_disable_3d(RID p_viewport, bool p_disable); @@ -252,6 +260,7 @@ public: void viewport_set_msaa(RID p_viewport, RS::ViewportMSAA p_msaa); void viewport_set_screen_space_aa(RID p_viewport, RS::ViewportScreenSpaceAA p_mode); + void viewport_set_use_taa(RID p_viewport, bool p_use_taa); void viewport_set_use_debanding(RID p_viewport, bool p_use_debanding); void viewport_set_use_occlusion_culling(RID p_viewport, bool p_use_occlusion_culling); void viewport_set_occlusion_rays_per_thread(int p_rays_per_thread); diff --git a/servers/rendering/rendering_server_default.h b/servers/rendering/rendering_server_default.h index 9d4059b9dfbb..6008f2b00eaf 100644 --- a/servers/rendering/rendering_server_default.h +++ b/servers/rendering/rendering_server_default.h @@ -616,6 +616,7 @@ public: FUNC3(viewport_set_shadow_atlas_quadrant_subdivision, RID, int, int) FUNC2(viewport_set_msaa, RID, ViewportMSAA) FUNC2(viewport_set_screen_space_aa, RID, ViewportScreenSpaceAA) + FUNC2(viewport_set_use_taa, RID, bool) FUNC2(viewport_set_use_debanding, RID, bool) FUNC2(viewport_set_use_occlusion_culling, RID, bool) FUNC1(viewport_set_occlusion_rays_per_thread, int) diff --git a/servers/rendering_server.cpp b/servers/rendering_server.cpp index 366c011f8638..26ab8b659e98 100644 --- a/servers/rendering_server.cpp +++ b/servers/rendering_server.cpp @@ -2211,6 +2211,7 @@ void RenderingServer::_bind_methods() { ClassDB::bind_method(D_METHOD("viewport_set_shadow_atlas_quadrant_subdivision", "viewport", "quadrant", "subdivision"), &RenderingServer::viewport_set_shadow_atlas_quadrant_subdivision); ClassDB::bind_method(D_METHOD("viewport_set_msaa", "viewport", "msaa"), &RenderingServer::viewport_set_msaa); ClassDB::bind_method(D_METHOD("viewport_set_screen_space_aa", "viewport", "mode"), &RenderingServer::viewport_set_screen_space_aa); + ClassDB::bind_method(D_METHOD("viewport_set_use_taa", "viewport", "enable"), &RenderingServer::viewport_set_use_taa); ClassDB::bind_method(D_METHOD("viewport_set_use_debanding", "viewport", "enable"), &RenderingServer::viewport_set_use_debanding); ClassDB::bind_method(D_METHOD("viewport_set_use_occlusion_culling", "viewport", "enable"), &RenderingServer::viewport_set_use_occlusion_culling); ClassDB::bind_method(D_METHOD("viewport_set_occlusion_rays_per_thread", "rays_per_thread"), &RenderingServer::viewport_set_occlusion_rays_per_thread); @@ -2297,6 +2298,7 @@ void RenderingServer::_bind_methods() { BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_CLUSTER_DECALS); BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES); BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_OCCLUDERS); + BIND_ENUM_CONSTANT(VIEWPORT_DEBUG_DRAW_MOTION_VECTORS); /* SKY API */ diff --git a/servers/rendering_server.h b/servers/rendering_server.h index c6c4b9d27591..39484e532ae7 100644 --- a/servers/rendering_server.h +++ b/servers/rendering_server.h @@ -877,11 +877,13 @@ public: virtual void viewport_set_screen_space_aa(RID p_viewport, ViewportScreenSpaceAA p_mode) = 0; + virtual void viewport_set_use_taa(RID p_viewport, bool p_use_taa) = 0; + virtual void viewport_set_use_debanding(RID p_viewport, bool p_use_debanding) = 0; virtual void viewport_set_mesh_lod_threshold(RID p_viewport, float p_pixels) = 0; - virtual void viewport_set_use_occlusion_culling(RID p_viewport, bool p_use_debanding) = 0; + virtual void viewport_set_use_occlusion_culling(RID p_viewport, bool p_use_occlusion_culling) = 0; virtual void viewport_set_occlusion_rays_per_thread(int p_rays_per_thread) = 0; enum ViewportOcclusionCullingBuildQuality { @@ -933,6 +935,7 @@ public: VIEWPORT_DEBUG_DRAW_CLUSTER_DECALS, VIEWPORT_DEBUG_DRAW_CLUSTER_REFLECTION_PROBES, VIEWPORT_DEBUG_DRAW_OCCLUDERS, + VIEWPORT_DEBUG_DRAW_MOTION_VECTORS, }; virtual void viewport_set_debug_draw(RID p_viewport, ViewportDebugDraw p_draw) = 0;