From 0d7deca4e278e28d5707d941790d4a99a663d9be Mon Sep 17 00:00:00 2001 From: Dario Date: Tue, 8 Aug 2023 14:41:34 -0300 Subject: [PATCH] Add buffer_copy method to RenderingDevice interface and an implementation for the Vulkan driver. Direct buffer copies are required to perform certain operations more efficiently, as the only current alternative is to download the buffer to the CPU and upload it again. As the first use case, the new function is used when enabling motion vectors on multimeshes. --- drivers/vulkan/rendering_device_vulkan.cpp | 58 +++++++++++++++++++ drivers/vulkan/rendering_device_vulkan.h | 1 + .../renderer_rd/storage_rd/mesh_storage.cpp | 9 +-- servers/rendering/rendering_device.h | 1 + 4 files changed, 63 insertions(+), 6 deletions(-) diff --git a/drivers/vulkan/rendering_device_vulkan.cpp b/drivers/vulkan/rendering_device_vulkan.cpp index 1ed6839dd7e7..a55abe6a82bb 100644 --- a/drivers/vulkan/rendering_device_vulkan.cpp +++ b/drivers/vulkan/rendering_device_vulkan.cpp @@ -5905,6 +5905,64 @@ void RenderingDeviceVulkan::uniform_set_set_invalidation_callback(RID p_uniform_ us->invalidated_callback_userdata = p_userdata; } +Error RenderingDeviceVulkan::buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size, BitField p_post_barrier) { + _THREAD_SAFE_METHOD_ + + ERR_FAIL_COND_V_MSG(draw_list, ERR_INVALID_PARAMETER, + "Copying buffers is forbidden during creation of a draw list"); + ERR_FAIL_COND_V_MSG(compute_list, ERR_INVALID_PARAMETER, + "Copying buffers is forbidden during creation of a compute list"); + + // This method assumes the barriers have been pushed prior to being called, therefore no barriers are pushed + // for the source or destination buffers before performing the copy. These masks are effectively ignored. + VkPipelineShaderStageCreateFlags src_stage_mask = 0; + VkAccessFlags src_access_mask = 0; + Buffer *src_buffer = _get_buffer_from_owner(p_src_buffer, src_stage_mask, src_access_mask, BARRIER_MASK_NO_BARRIER); + if (!src_buffer) { + ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Source buffer argument is not a valid buffer of any type."); + } + + VkPipelineStageFlags dst_stage_mask = 0; + VkAccessFlags dst_access = 0; + if (p_post_barrier.has_flag(BARRIER_MASK_TRANSFER)) { + // If the post barrier mask defines it, we indicate the destination buffer will require a barrier with these flags set + // after the copy command is queued. + dst_stage_mask = VK_PIPELINE_STAGE_TRANSFER_BIT; + dst_access = VK_ACCESS_TRANSFER_WRITE_BIT; + } + + Buffer *dst_buffer = _get_buffer_from_owner(p_dst_buffer, dst_stage_mask, dst_access, p_post_barrier); + if (!dst_buffer) { + ERR_FAIL_V_MSG(ERR_INVALID_PARAMETER, "Destination buffer argument is not a valid buffer of any type."); + } + + // Validate the copy's dimensions for both buffers. + ERR_FAIL_COND_V_MSG((p_size + p_src_offset) > src_buffer->size, ERR_INVALID_PARAMETER, "Size is larger than the source buffer."); + ERR_FAIL_COND_V_MSG((p_size + p_dst_offset) > dst_buffer->size, ERR_INVALID_PARAMETER, "Size is larger than the destination buffer."); + + // Perform the copy. + VkBufferCopy region; + region.srcOffset = p_src_offset; + region.dstOffset = p_dst_offset; + region.size = p_size; + vkCmdCopyBuffer(frames[frame].draw_command_buffer, src_buffer->buffer, dst_buffer->buffer, 1, ®ion); + +#ifdef FORCE_FULL_BARRIER + _full_barrier(true); +#else + if (dst_stage_mask == 0) { + dst_stage_mask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT; + } + + // As indicated by the post barrier mask, push a new barrier. + if (p_post_barrier != RD::BARRIER_MASK_NO_BARRIER) { + _buffer_memory_barrier(dst_buffer->buffer, p_dst_offset, p_size, VK_PIPELINE_STAGE_TRANSFER_BIT, dst_stage_mask, VK_ACCESS_TRANSFER_WRITE_BIT, dst_access, true); + } +#endif + + return OK; +} + Error RenderingDeviceVulkan::buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField p_post_barrier) { _THREAD_SAFE_METHOD_ diff --git a/drivers/vulkan/rendering_device_vulkan.h b/drivers/vulkan/rendering_device_vulkan.h index fd832312acb5..edff19a70cd7 100644 --- a/drivers/vulkan/rendering_device_vulkan.h +++ b/drivers/vulkan/rendering_device_vulkan.h @@ -1152,6 +1152,7 @@ public: virtual bool uniform_set_is_valid(RID p_uniform_set); virtual void uniform_set_set_invalidation_callback(RID p_uniform_set, InvalidationCallback p_callback, void *p_userdata); + virtual Error buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); // Works for any buffer. virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS); virtual Vector buffer_get_data(RID p_buffer, uint32_t p_offset = 0, uint32_t p_size = 0); diff --git a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp index 4bfec8ae8dce..97fc38fab418 100644 --- a/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp +++ b/servers/rendering/renderer_rd/storage_rd/mesh_storage.cpp @@ -1309,13 +1309,10 @@ void MeshStorage::_multimesh_enable_motion_vectors(MultiMesh *multimesh) { RID new_buffer = RD::get_singleton()->storage_buffer_create(new_buffer_size); if (multimesh->buffer_set && multimesh->data_cache.is_empty()) { - // If the buffer was set but there's no data cached in the CPU, we must download it from the GPU and - // upload it because RD does not provide a way to copy the buffer directly yet. + // If the buffer was set but there's no data cached in the CPU, we copy the buffer directly on the GPU. RD::get_singleton()->barrier(); - Vector buffer_data = RD::get_singleton()->buffer_get_data(multimesh->buffer); - ERR_FAIL_COND(buffer_data.size() != int(buffer_size)); - RD::get_singleton()->buffer_update(new_buffer, 0, buffer_size, buffer_data.ptr(), RD::BARRIER_MASK_NO_BARRIER); - RD::get_singleton()->buffer_update(new_buffer, buffer_size, buffer_size, buffer_data.ptr()); + RD::get_singleton()->buffer_copy(multimesh->buffer, new_buffer, 0, 0, buffer_size, RD::BARRIER_MASK_NO_BARRIER); + RD::get_singleton()->buffer_copy(multimesh->buffer, new_buffer, 0, buffer_size, buffer_size); } else if (!multimesh->data_cache.is_empty()) { // Simply upload the data cached in the CPU, which should already be doubled in size. ERR_FAIL_COND(multimesh->data_cache.size() != int(new_buffer_size)); diff --git a/servers/rendering/rendering_device.h b/servers/rendering/rendering_device.h index 30d9b1c7b721..1ade1b25c4d5 100644 --- a/servers/rendering/rendering_device.h +++ b/servers/rendering/rendering_device.h @@ -840,6 +840,7 @@ public: virtual bool uniform_set_is_valid(RID p_uniform_set) = 0; virtual void uniform_set_set_invalidation_callback(RID p_uniform_set, InvalidationCallback p_callback, void *p_userdata) = 0; + virtual Error buffer_copy(RID p_src_buffer, RID p_dst_buffer, uint32_t p_src_offset, uint32_t p_dst_offset, uint32_t p_size, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS) = 0; virtual Error buffer_update(RID p_buffer, uint32_t p_offset, uint32_t p_size, const void *p_data, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS) = 0; virtual Error buffer_clear(RID p_buffer, uint32_t p_offset, uint32_t p_size, BitField p_post_barrier = BARRIER_MASK_ALL_BARRIERS) = 0; virtual Vector buffer_get_data(RID p_buffer, uint32_t p_offset = 0, uint32_t p_size = 0) = 0; // This causes stall, only use to retrieve large buffers for saving.