Improve stage and slice tracking behavior of RenderingDeviceGraph to fix various synchronization issues.

This commit is contained in:
Dario 2024-01-22 16:11:26 -03:00
parent 17e7f85c06
commit 39f279710c
3 changed files with 201 additions and 68 deletions

View file

@ -35,6 +35,8 @@
#include "thirdparty/misc/smolv.h"
#include "vulkan_context.h"
#define PRINT_NATIVE_COMMANDS 0
/*****************/
/**** GENERIC ****/
/*****************/
@ -622,6 +624,10 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create(const TextureFormat &
tex_info->allocation.handle = allocation;
vmaGetAllocationInfo(allocator, tex_info->allocation.handle, &tex_info->allocation.info);
#if PRINT_NATIVE_COMMANDS
print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX", uint64_t(vk_image_view), uint64_t(vk_image)));
#endif
return TextureID(tex_info);
}
@ -710,6 +716,10 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared(TextureID p_or
tex_info->vk_view_create_info = image_view_create_info;
tex_info->allocation = {};
#if PRINT_NATIVE_COMMANDS
print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX", uint64_t(new_vk_image_view), uint64_t(owner_tex_info->vk_view_create_info.image)));
#endif
return TextureID(tex_info);
}
@ -759,6 +769,10 @@ RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared_from_slice(Tex
tex_info->vk_view_create_info = image_view_create_info;
tex_info->allocation = {};
#if PRINT_NATIVE_COMMANDS
print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX (%d %d %d %d)", uint64_t(new_vk_image_view), uint64_t(owner_tex_info->vk_view_create_info.image), p_mipmap, p_mipmaps, p_layer, p_layers));
#endif
return TextureID(tex_info);
}
@ -1071,6 +1085,23 @@ void RenderingDeviceDriverVulkan::command_pipeline_barrier(
vk_image_barriers[i].subresourceRange.layerCount = p_texture_barriers[i].subresources.layer_count;
}
#if PRINT_NATIVE_COMMANDS
print_line(vformat("vkCmdPipelineBarrier MEMORY %d BUFFER %d TEXTURE %d", p_memory_barriers.size(), p_buffer_barriers.size(), p_texture_barriers.size()));
for (uint32_t i = 0; i < p_memory_barriers.size(); i++) {
print_line(vformat(" VkMemoryBarrier #%d src 0x%uX dst 0x%uX", i, vk_memory_barriers[i].srcAccessMask, vk_memory_barriers[i].dstAccessMask));
}
for (uint32_t i = 0; i < p_buffer_barriers.size(); i++) {
print_line(vformat(" VkBufferMemoryBarrier #%d src 0x%uX dst 0x%uX buffer 0x%ux", i, vk_buffer_barriers[i].srcAccessMask, vk_buffer_barriers[i].dstAccessMask, uint64_t(vk_buffer_barriers[i].buffer)));
}
for (uint32_t i = 0; i < p_texture_barriers.size(); i++) {
print_line(vformat(" VkImageMemoryBarrier #%d src 0x%uX dst 0x%uX image 0x%ux old %d new %d (%d %d %d %d)", i, vk_image_barriers[i].srcAccessMask, vk_image_barriers[i].dstAccessMask,
uint64_t(vk_image_barriers[i].image), vk_image_barriers[i].oldLayout, vk_image_barriers[i].newLayout, vk_image_barriers[i].subresourceRange.baseMipLevel, vk_image_barriers[i].subresourceRange.levelCount,
vk_image_barriers[i].subresourceRange.baseArrayLayer, vk_image_barriers[i].subresourceRange.layerCount));
}
#endif
vkCmdPipelineBarrier(
(VkCommandBuffer)p_cmd_buffer.id,
(VkPipelineStageFlags)p_src_stages,
@ -1225,6 +1256,14 @@ RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID
VkResult err = vkCreateFramebuffer(vk_device, &framebuffer_create_info, nullptr, &vk_framebuffer);
ERR_FAIL_COND_V_MSG(err, FramebufferID(), "vkCreateFramebuffer failed with error " + itos(err) + ".");
#if PRINT_NATIVE_COMMANDS
print_line(vformat("vkCreateFramebuffer 0x%uX with %d attachments", uint64_t(vk_framebuffer), p_attachments.size()));
for (uint32_t i = 0; i < p_attachments.size(); i++) {
const TextureInfo *attachment_info = (const TextureInfo *)p_attachments[i].id;
print_line(vformat(" Attachment #%d: IMAGE 0x%uX VIEW 0x%uX", i, uint64_t(attachment_info->vk_view_create_info.image), uint64_t(attachment_info->vk_view)));
}
#endif
return FramebufferID(vk_framebuffer);
}
@ -2467,10 +2506,18 @@ void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cm
VkSubpassContents vk_subpass_contents = p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY ? VK_SUBPASS_CONTENTS_INLINE : VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS;
vkCmdBeginRenderPass((VkCommandBuffer)p_cmd_buffer.id, &render_pass_begin, vk_subpass_contents);
#if PRINT_NATIVE_COMMANDS
print_line(vformat("vkCmdBeginRenderPass Pass 0x%uX Framebuffer 0x%uX", p_render_pass.id, p_framebuffer.id));
#endif
}
void RenderingDeviceDriverVulkan::command_end_render_pass(CommandBufferID p_cmd_buffer) {
vkCmdEndRenderPass((VkCommandBuffer)p_cmd_buffer.id);
#if PRINT_NATIVE_COMMANDS
print_line("vkCmdEndRenderPass");
#endif
}
void RenderingDeviceDriverVulkan::command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) {

View file

@ -33,6 +33,7 @@
#define PRINT_RENDER_GRAPH 0
#define FORCE_FULL_ACCESS_BITS 0
#define PRINT_RESOURCE_TRACKER_TOTAL 0
#define PRINT_COMMAND_RECORDING 0
RenderingDeviceGraph::RenderingDeviceGraph() {
// Default initialization.
@ -163,20 +164,35 @@ void RenderingDeviceGraph::_add_adjacent_command(int32_t p_previous_command_inde
const uint32_t previous_command_data_offset = command_data_offsets[p_previous_command_index];
RecordedCommand &previous_command = *reinterpret_cast<RecordedCommand *>(&command_data[previous_command_data_offset]);
previous_command.adjacent_command_list_index = _add_to_command_list(p_command_index, previous_command.adjacent_command_list_index);
r_command->src_stages = r_command->src_stages | previous_command.dst_stages;
previous_command.next_stages = previous_command.next_stages | r_command->self_stages;
r_command->previous_stages = r_command->previous_stages | previous_command.self_stages;
}
int32_t RenderingDeviceGraph::_add_to_write_list(int32_t p_command_index, Rect2i suberesources, int32_t p_list_index) {
int32_t RenderingDeviceGraph::_add_to_slice_read_list(int32_t p_command_index, Rect2i p_subresources, int32_t p_list_index) {
DEV_ASSERT(p_command_index < int32_t(command_count));
DEV_ASSERT(p_list_index < int32_t(write_list_nodes.size()));
DEV_ASSERT(p_list_index < int32_t(read_slice_list_nodes.size()));
int32_t next_index = int32_t(write_list_nodes.size());
write_list_nodes.resize(next_index + 1);
int32_t next_index = int32_t(read_slice_list_nodes.size());
read_slice_list_nodes.resize(next_index + 1);
RecordedWriteListNode &new_node = write_list_nodes[next_index];
RecordedSliceListNode &new_node = read_slice_list_nodes[next_index];
new_node.command_index = p_command_index;
new_node.next_list_index = p_list_index;
new_node.subresources = suberesources;
new_node.subresources = p_subresources;
return next_index;
}
int32_t RenderingDeviceGraph::_add_to_write_list(int32_t p_command_index, Rect2i p_subresources, int32_t p_list_index) {
DEV_ASSERT(p_command_index < int32_t(command_count));
DEV_ASSERT(p_list_index < int32_t(write_slice_list_nodes.size()));
int32_t next_index = int32_t(write_slice_list_nodes.size());
write_slice_list_nodes.resize(next_index + 1);
RecordedSliceListNode &new_node = write_slice_list_nodes[next_index];
new_node.command_index = p_command_index;
new_node.next_list_index = p_list_index;
new_node.subresources = p_subresources;
return next_index;
}
@ -203,6 +219,9 @@ RenderingDeviceGraph::ComputeListInstruction *RenderingDeviceGraph::_allocate_co
}
void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_trackers, ResourceUsage *p_resource_usages, uint32_t p_resource_count, int32_t p_command_index, RecordedCommand *r_command) {
// Assign the next stages derived from the stages the command requires first.
r_command->next_stages = r_command->self_stages;
if (command_label_index >= 0) {
// If a label is active, tag the command with the label.
r_command->label_index = command_label_index;
@ -242,6 +261,10 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
resource_tracker->reset_if_outdated(tracking_frame);
const RDD::TextureSubresourceRange &subresources = resource_tracker->texture_subresources;
const Rect2i resource_tracker_rect(subresources.base_mipmap, subresources.base_layer, subresources.mipmap_count, subresources.layer_count);
Rect2i search_tracker_rect = resource_tracker_rect;
ResourceUsage new_resource_usage = p_resource_usages[i];
bool write_usage = _is_write_usage(new_resource_usage);
BitField<RDD::BarrierAccessBits> new_usage_access = _usage_to_access_bits(new_resource_usage);
@ -264,9 +287,14 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
// If the parent hasn't been used yet, we assign the usage of the slice to the entire resource.
resource_tracker->parent->usage = new_resource_usage;
// Also assign the usage to the slice and consider it a write operation.
// Also assign the usage to the slice and consider it a write operation. Consider the parent's current usage access as its own.
resource_tracker->usage = new_resource_usage;
resource_tracker->usage_access = resource_tracker->parent->usage_access;
write_usage = true;
// Indicate the area that should be tracked is the entire resource.
const RDD::TextureSubresourceRange &parent_subresources = resource_tracker->parent->texture_subresources;
search_tracker_rect = Rect2i(parent_subresources.base_mipmap, parent_subresources.base_layer, parent_subresources.mipmap_count, parent_subresources.layer_count);
} else if (resource_tracker->in_parent_dirty_list) {
if (resource_tracker->parent->usage == new_resource_usage) {
// The slice will be transitioned to the resource of the parent and can be deleted from the dirty list.
@ -274,6 +302,8 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
ResourceTracker *current_tracker = resource_tracker->parent->dirty_shared_list;
bool initialized_dirty_rect = false;
while (current_tracker != nullptr) {
current_tracker->reset_if_outdated(tracking_frame);
if (current_tracker == resource_tracker) {
current_tracker->in_parent_dirty_list = false;
@ -305,6 +335,8 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
ResourceTracker *current_tracker = resource_tracker->parent->dirty_shared_list;
bool initialized_dirty_rect = false;
while (current_tracker != nullptr) {
current_tracker->reset_if_outdated(tracking_frame);
if (current_tracker->texture_slice_or_dirty_rect.intersects(resource_tracker->texture_slice_or_dirty_rect)) {
if (current_tracker->command_frame == tracking_frame && current_tracker->texture_slice_command_index == p_command_index) {
ERR_FAIL_MSG("Texture slices that overlap can't be used in the same command.");
@ -312,6 +344,10 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
// Delete the slice from the dirty list and revert it to the usage of the parent.
if (current_tracker->texture_driver_id != 0) {
_add_texture_barrier_to_command(current_tracker->texture_driver_id, current_tracker->usage_access, new_usage_access, current_tracker->usage, resource_tracker->parent->usage, current_tracker->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count);
// Merge the area of the slice with the current tracking area of the command and indicate it's a write usage as well.
search_tracker_rect = search_tracker_rect.merge(current_tracker->texture_slice_or_dirty_rect);
write_usage = true;
}
current_tracker->in_parent_dirty_list = false;
@ -339,8 +375,9 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
}
}
// If it wasn't in the list, assume the usage is the same as the parent.
// If it wasn't in the list, assume the usage is the same as the parent. Consider the parent's current usage access as its own.
resource_tracker->usage = resource_tracker->parent->usage;
resource_tracker->usage_access = resource_tracker->parent->usage_access;
if (resource_tracker->usage != new_resource_usage) {
// Insert to the dirty list if the requested usage is different.
@ -355,27 +392,30 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
}
}
} else {
if (resource_tracker->dirty_shared_list != nullptr) {
ResourceTracker *current_tracker = resource_tracker->dirty_shared_list;
if (current_tracker != nullptr) {
// Consider the usage as write if we must transition any of the slices.
write_usage = true;
}
while (resource_tracker->dirty_shared_list != nullptr) {
if (resource_tracker->dirty_shared_list->texture_driver_id != 0) {
while (current_tracker != nullptr) {
current_tracker->reset_if_outdated(tracking_frame);
if (current_tracker->texture_driver_id != 0) {
// Transition all slices to the layout of the parent resource.
_add_texture_barrier_to_command(resource_tracker->dirty_shared_list->texture_driver_id, resource_tracker->dirty_shared_list->usage_access, new_usage_access, resource_tracker->dirty_shared_list->usage, resource_tracker->usage, resource_tracker->dirty_shared_list->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count);
_add_texture_barrier_to_command(current_tracker->texture_driver_id, current_tracker->usage_access, new_usage_access, current_tracker->usage, resource_tracker->usage, current_tracker->texture_subresources, command_normalization_barriers, r_command->normalization_barrier_index, r_command->normalization_barrier_count);
}
resource_tracker->dirty_shared_list->in_parent_dirty_list = false;
resource_tracker->dirty_shared_list = resource_tracker->dirty_shared_list->next_shared;
current_tracker->in_parent_dirty_list = false;
current_tracker = current_tracker->next_shared;
}
resource_tracker->dirty_shared_list = nullptr;
}
// Use the resource's parent tracker directly for all search operations.
bool resource_has_parent = resource_tracker->parent != nullptr;
ResourceTracker *search_tracker = resource_has_parent ? resource_tracker->parent : resource_tracker;
const RDD::TextureSubresourceRange &subresources = resource_tracker->texture_subresources;
Rect2i resource_tracker_rect(subresources.base_mipmap, subresources.base_layer, subresources.mipmap_count, subresources.layer_count);
bool different_usage = resource_tracker->usage != new_resource_usage;
bool write_usage_after_write = (write_usage && search_tracker->write_command_or_list_index >= 0);
if (different_usage || write_usage_after_write) {
@ -418,18 +458,18 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
int32_t previous_write_list_index = -1;
int32_t write_list_index = search_tracker->write_command_or_list_index;
while (write_list_index >= 0) {
const RecordedWriteListNode &write_list_node = write_list_nodes[write_list_index];
if (!resource_has_parent || resource_tracker_rect.intersects(write_list_node.subresources)) {
const RecordedSliceListNode &write_list_node = write_slice_list_nodes[write_list_index];
if (!resource_has_parent || search_tracker_rect.intersects(write_list_node.subresources)) {
if (write_list_node.command_index == p_command_index) {
ERR_FAIL_COND_MSG(!resource_has_parent, "Command can't have itself as a dependency.");
} else {
// Command is dependent on this command. Add this command to the adjacency list of the write command.
_add_adjacent_command(write_list_node.command_index, p_command_index, r_command);
if (resource_has_parent && write_usage && resource_tracker_rect.encloses(write_list_node.subresources)) {
if (resource_has_parent && write_usage && search_tracker_rect.encloses(write_list_node.subresources)) {
// Eliminate redundant writes from the list.
if (previous_write_list_index >= 0) {
RecordedWriteListNode &previous_list_node = write_list_nodes[previous_write_list_index];
RecordedSliceListNode &previous_list_node = write_slice_list_nodes[previous_write_list_index];
previous_list_node.next_list_index = write_list_node.next_list_index;
} else {
search_tracker->write_command_or_list_index = write_list_node.next_list_index;
@ -463,47 +503,69 @@ void RenderingDeviceGraph::_add_command_to_graph(ResourceTracker **p_resource_tr
search_tracker->write_command_or_list_index = _add_to_write_list(search_tracker->write_command_or_list_index, tracker_rect, -1);
}
search_tracker->write_command_or_list_index = _add_to_write_list(p_command_index, resource_tracker_rect, search_tracker->write_command_or_list_index);
search_tracker->write_command_or_list_index = _add_to_write_list(p_command_index, search_tracker_rect, search_tracker->write_command_or_list_index);
search_tracker->write_command_list_enabled = true;
} else {
search_tracker->write_command_or_list_index = p_command_index;
search_tracker->write_command_list_enabled = false;
}
// We add this command to the adjacency list of all commands that were reading from this resource. We clear the list in the process.
int32_t previous_command_list_index = -1;
int32_t read_command_list_index = search_tracker->read_command_list_index;
while (read_command_list_index >= 0) {
const RecordedCommandListNode &command_list_node = command_list_nodes[read_command_list_index];
// We add this command to the adjacency list of all commands that were reading from the entire resource.
int32_t read_full_command_list_index = search_tracker->read_full_command_list_index;
while (read_full_command_list_index >= 0) {
const RecordedCommandListNode &command_list_node = command_list_nodes[read_full_command_list_index];
if (command_list_node.command_index == p_command_index) {
if (!resource_has_parent) {
// Slices are allowed to be in different usages in the same command as they are guaranteed to have no overlap in the same command.
// Only slices are allowed to be in different usages in the same command as they are guaranteed to have no overlap in the same command.
ERR_FAIL_MSG("Command can't have itself as a dependency.");
} else {
// Advance to the next element.
read_command_list_index = command_list_node.next_list_index;
previous_command_list_index = read_command_list_index;
}
} else {
if (previous_command_list_index >= 0) {
// Erase this element and connect the previous one to the next element.
command_list_nodes[previous_command_list_index].next_list_index = command_list_node.next_list_index;
read_command_list_index = command_list_node.next_list_index;
previous_command_list_index = read_command_list_index;
} else {
// Erase this element from the head of the list.
DEV_ASSERT(search_tracker->read_command_list_index == read_command_list_index);
read_command_list_index = command_list_node.next_list_index;
search_tracker->read_command_list_index = read_command_list_index;
}
// Add this command to the adjacency list of each command that was reading this resource.
_add_adjacent_command(command_list_node.command_index, p_command_index, r_command);
}
read_full_command_list_index = command_list_node.next_list_index;
}
if (!resource_has_parent) {
// Clear the full list if this resource is not a slice.
search_tracker->read_full_command_list_index = -1;
}
// We add this command to the adjacency list of all commands that were reading from resource slices.
int32_t previous_slice_command_list_index = -1;
int32_t read_slice_command_list_index = search_tracker->read_slice_command_list_index;
while (read_slice_command_list_index >= 0) {
const RecordedSliceListNode &read_list_node = read_slice_list_nodes[read_slice_command_list_index];
if (!resource_has_parent || search_tracker_rect.encloses(read_list_node.subresources)) {
if (previous_slice_command_list_index >= 0) {
// Erase this element and connect the previous one to the next element.
read_slice_list_nodes[previous_slice_command_list_index].next_list_index = read_list_node.next_list_index;
} else {
// Erase this element from the head of the list.
DEV_ASSERT(search_tracker->read_slice_command_list_index == read_slice_command_list_index);
search_tracker->read_slice_command_list_index = read_list_node.next_list_index;
}
// Advance to the next element.
read_slice_command_list_index = read_list_node.next_list_index;
} else {
previous_slice_command_list_index = read_slice_command_list_index;
read_slice_command_list_index = read_list_node.next_list_index;
}
if (!resource_has_parent || search_tracker_rect.intersects(read_list_node.subresources)) {
// Add this command to the adjacency list of each command that was reading this resource.
// We only add the dependency if there's an intersection between slices or this resource isn't a slice.
_add_adjacent_command(read_list_node.command_index, p_command_index, r_command);
}
}
} else if (resource_has_parent) {
// We add a read dependency to the tracker to indicate this command reads from the resource slice.
search_tracker->read_slice_command_list_index = _add_to_slice_read_list(p_command_index, resource_tracker_rect, search_tracker->read_slice_command_list_index);
} else {
// We add a read dependency to the tracker to indicate this command reads from the resource.
search_tracker->read_command_list_index = _add_to_command_list(p_command_index, search_tracker->read_command_list_index);
// We add a read dependency to the tracker to indicate this command reads from the entire resource.
search_tracker->read_full_command_list_index = _add_to_command_list(p_command_index, search_tracker->read_full_command_list_index);
}
}
}
@ -913,9 +975,13 @@ void RenderingDeviceGraph::_group_barriers_for_render_commands(RDD::CommandBuffe
const uint32_t command_data_offset = command_data_offsets[command_index];
const RecordedCommand *command = reinterpret_cast<RecordedCommand *>(&command_data[command_data_offset]);
#if PRINT_COMMAND_RECORDING
print_line(vformat("Grouping barriers for #%d", command_index));
#endif
// Merge command's stage bits with the barrier group.
barrier_group.src_stages = barrier_group.src_stages | command->src_stages;
barrier_group.dst_stages = barrier_group.dst_stages | command->dst_stages;
barrier_group.src_stages = barrier_group.src_stages | command->previous_stages;
barrier_group.dst_stages = barrier_group.dst_stages | command->next_stages;
// Merge command's memory barrier bits with the barrier group.
barrier_group.memory_barrier.src_access = barrier_group.memory_barrier.src_access | command->memory_barrier.src_access;
@ -925,11 +991,17 @@ void RenderingDeviceGraph::_group_barriers_for_render_commands(RDD::CommandBuffe
for (int32_t j = 0; j < command->normalization_barrier_count; j++) {
const RDD::TextureBarrier &recorded_barrier = command_normalization_barriers[command->normalization_barrier_index + j];
barrier_group.normalization_barriers.push_back(recorded_barrier);
#if PRINT_COMMAND_RECORDING
print_line(vformat("Normalization Barrier #%d", barrier_group.normalization_barriers.size() - 1));
#endif
}
for (int32_t j = 0; j < command->transition_barrier_count; j++) {
const RDD::TextureBarrier &recorded_barrier = command_transition_barriers[command->transition_barrier_index + j];
barrier_group.transition_barriers.push_back(recorded_barrier);
#if PRINT_COMMAND_RECORDING
print_line(vformat("Transition Barrier #%d", barrier_group.transition_barriers.size() - 1));
#endif
}
#if USE_BUFFER_BARRIERS
@ -1202,7 +1274,8 @@ void RenderingDeviceGraph::begin() {
command_label_colors.clear();
command_label_offsets.clear();
command_list_nodes.clear();
write_list_nodes.clear();
read_slice_list_nodes.clear();
write_slice_list_nodes.clear();
command_count = 0;
command_label_count = 0;
command_timestamp_index = -1;
@ -1225,7 +1298,7 @@ void RenderingDeviceGraph::add_buffer_clear(RDD::BufferID p_dst, ResourceTracker
int32_t command_index;
RecordedBufferClearCommand *command = static_cast<RecordedBufferClearCommand *>(_allocate_command(sizeof(RecordedBufferClearCommand), command_index));
command->type = RecordedCommand::TYPE_BUFFER_CLEAR;
command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->buffer = p_dst;
command->offset = p_offset;
command->size = p_size;
@ -1241,7 +1314,7 @@ void RenderingDeviceGraph::add_buffer_copy(RDD::BufferID p_src, ResourceTracker
int32_t command_index;
RecordedBufferCopyCommand *command = static_cast<RecordedBufferCopyCommand *>(_allocate_command(sizeof(RecordedBufferCopyCommand), command_index));
command->type = RecordedCommand::TYPE_BUFFER_COPY;
command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->source = p_src;
command->destination = p_dst;
command->region = p_region;
@ -1256,7 +1329,7 @@ void RenderingDeviceGraph::add_buffer_get_data(RDD::BufferID p_src, ResourceTrac
int32_t command_index;
RecordedBufferGetDataCommand *command = static_cast<RecordedBufferGetDataCommand *>(_allocate_command(sizeof(RecordedBufferGetDataCommand), command_index));
command->type = RecordedCommand::TYPE_BUFFER_GET_DATA;
command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->source = p_src;
command->destination = p_dst;
command->region = p_region;
@ -1277,7 +1350,7 @@ void RenderingDeviceGraph::add_buffer_update(RDD::BufferID p_dst, ResourceTracke
int32_t command_index;
RecordedBufferUpdateCommand *command = static_cast<RecordedBufferUpdateCommand *>(_allocate_command(command_size, command_index));
command->type = RecordedCommand::TYPE_BUFFER_UPDATE;
command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->destination = p_dst;
command->buffer_copies_count = p_buffer_copies.size();
@ -1369,7 +1442,7 @@ void RenderingDeviceGraph::add_compute_list_end() {
uint32_t command_size = sizeof(RecordedComputeListCommand) + instruction_data_size;
RecordedComputeListCommand *command = static_cast<RecordedComputeListCommand *>(_allocate_command(command_size, command_index));
command->type = RecordedCommand::TYPE_COMPUTE_LIST;
command->dst_stages = compute_instruction_list.stages;
command->self_stages = compute_instruction_list.stages;
command->instruction_data_size = instruction_data_size;
memcpy(command->instruction_data(), compute_instruction_list.data.ptr(), instruction_data_size);
_add_command_to_graph(compute_instruction_list.command_trackers.ptr(), compute_instruction_list.command_tracker_usages.ptr(), compute_instruction_list.command_trackers.size(), command_index, command);
@ -1579,7 +1652,7 @@ void RenderingDeviceGraph::add_draw_list_end() {
uint32_t command_size = sizeof(RecordedDrawListCommand) + clear_values_size + instruction_data_size;
RecordedDrawListCommand *command = static_cast<RecordedDrawListCommand *>(_allocate_command(command_size, command_index));
command->type = RecordedCommand::TYPE_DRAW_LIST;
command->dst_stages = draw_instruction_list.stages;
command->self_stages = draw_instruction_list.stages;
command->instruction_data_size = instruction_data_size;
command->render_pass = draw_instruction_list.render_pass;
command->framebuffer = draw_instruction_list.framebuffer;
@ -1602,7 +1675,7 @@ void RenderingDeviceGraph::add_texture_clear(RDD::TextureID p_dst, ResourceTrack
int32_t command_index;
RecordedTextureClearCommand *command = static_cast<RecordedTextureClearCommand *>(_allocate_command(sizeof(RecordedTextureClearCommand), command_index));
command->type = RecordedCommand::TYPE_TEXTURE_CLEAR;
command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->texture = p_dst;
command->color = p_color;
command->range = p_range;
@ -1618,7 +1691,7 @@ void RenderingDeviceGraph::add_texture_copy(RDD::TextureID p_src, ResourceTracke
int32_t command_index;
RecordedTextureCopyCommand *command = static_cast<RecordedTextureCopyCommand *>(_allocate_command(sizeof(RecordedTextureCopyCommand), command_index));
command->type = RecordedCommand::TYPE_TEXTURE_COPY;
command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->from_texture = p_src;
command->to_texture = p_dst;
command->region = p_region;
@ -1635,7 +1708,7 @@ void RenderingDeviceGraph::add_texture_get_data(RDD::TextureID p_src, ResourceTr
uint64_t command_size = sizeof(RecordedTextureGetDataCommand) + p_buffer_texture_copy_regions.size() * sizeof(RDD::BufferTextureCopyRegion);
RecordedTextureGetDataCommand *command = static_cast<RecordedTextureGetDataCommand *>(_allocate_command(command_size, command_index));
command->type = RecordedCommand::TYPE_TEXTURE_GET_DATA;
command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->from_texture = p_src;
command->to_buffer = p_dst;
command->buffer_texture_copy_regions_count = p_buffer_texture_copy_regions.size();
@ -1656,7 +1729,7 @@ void RenderingDeviceGraph::add_texture_resolve(RDD::TextureID p_src, ResourceTra
int32_t command_index;
RecordedTextureResolveCommand *command = static_cast<RecordedTextureResolveCommand *>(_allocate_command(sizeof(RecordedTextureResolveCommand), command_index));
command->type = RecordedCommand::TYPE_TEXTURE_RESOLVE;
command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->from_texture = p_src;
command->to_texture = p_dst;
command->src_layer = p_src_layer;
@ -1676,7 +1749,7 @@ void RenderingDeviceGraph::add_texture_update(RDD::TextureID p_dst, ResourceTrac
uint64_t command_size = sizeof(RecordedTextureUpdateCommand) + p_buffer_copies.size() * sizeof(RecordedBufferToTextureCopy);
RecordedTextureUpdateCommand *command = static_cast<RecordedTextureUpdateCommand *>(_allocate_command(command_size, command_index));
command->type = RecordedCommand::TYPE_TEXTURE_UPDATE;
command->dst_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->self_stages = RDD::PIPELINE_STAGE_TRANSFER_BIT;
command->to_texture = p_dst;
command->buffer_to_texture_copies_count = p_buffer_copies.size();
@ -1693,7 +1766,7 @@ void RenderingDeviceGraph::add_capture_timestamp(RDD::QueryPoolID p_query_pool,
int32_t command_index;
RecordedCaptureTimestampCommand *command = static_cast<RecordedCaptureTimestampCommand *>(_allocate_command(sizeof(RecordedCaptureTimestampCommand), command_index));
command->type = RecordedCommand::TYPE_CAPTURE_TIMESTAMP;
command->dst_stages = 0;
command->self_stages = 0;
command->pool = p_query_pool;
command->index = p_index;
_add_command_to_graph(nullptr, nullptr, 0, command_index, command);
@ -1852,6 +1925,10 @@ void RenderingDeviceGraph::end(RDD::CommandBufferID p_command_buffer, bool p_reo
_print_render_commands(commands_sorted.ptr(), command_count);
#endif
#if PRINT_COMMAND_RECORDING
print_line(vformat("Recording %d commands", command_count));
#endif
uint32_t boosted_priority = 0;
uint32_t current_level = commands_sorted[0].level;
uint32_t current_level_start = 0;
@ -1884,6 +1961,10 @@ void RenderingDeviceGraph::end(RDD::CommandBufferID p_command_buffer, bool p_reo
}
_run_label_command_change(p_command_buffer, -1, -1, true, false, nullptr, 0, current_label_index, current_label_level);
#if PRINT_COMMAND_RECORDING
print_line(vformat("Recorded %d commands", command_count));
#endif
}
// Advance the frame counter. It's not necessary to do this if no commands are recorded because that means no secondary command buffers were used.

View file

@ -112,8 +112,9 @@ public:
int32_t buffer_barrier_count = 0;
#endif
int32_t label_index = -1;
BitField<RDD::PipelineStageBits> src_stages;
BitField<RDD::PipelineStageBits> dst_stages;
BitField<RDD::PipelineStageBits> previous_stages;
BitField<RDD::PipelineStageBits> next_stages;
BitField<RDD::PipelineStageBits> self_stages;
};
struct RecordedBufferCopy {
@ -150,7 +151,8 @@ public:
struct ResourceTracker {
uint32_t reference_count = 0;
int64_t command_frame = -1;
int32_t read_command_list_index = -1;
int32_t read_full_command_list_index = -1;
int32_t read_slice_command_list_index = -1;
int32_t write_command_or_list_index = -1;
int32_t draw_list_index = -1;
int32_t compute_list_index = -1;
@ -171,7 +173,8 @@ public:
if (new_command_frame != command_frame) {
usage_access.clear();
command_frame = new_command_frame;
read_command_list_index = -1;
read_full_command_list_index = -1;
read_slice_command_list_index = -1;
write_command_or_list_index = -1;
draw_list_index = -1;
compute_list_index = -1;
@ -237,7 +240,7 @@ private:
int32_t next_list_index = -1;
};
struct RecordedWriteListNode {
struct RecordedSliceListNode {
int32_t command_index = -1;
int32_t next_list_index = -1;
Rect2i subresources;
@ -572,7 +575,8 @@ private:
uint32_t command_count = 0;
uint32_t command_label_count = 0;
LocalVector<RecordedCommandListNode> command_list_nodes;
LocalVector<RecordedWriteListNode> write_list_nodes;
LocalVector<RecordedSliceListNode> read_slice_list_nodes;
LocalVector<RecordedSliceListNode> write_slice_list_nodes;
int32_t command_timestamp_index = -1;
int32_t command_synchronization_index = -1;
bool command_synchronization_pending = false;
@ -590,7 +594,8 @@ private:
static RDD::BarrierAccessBits _usage_to_access_bits(ResourceUsage p_usage);
int32_t _add_to_command_list(int32_t p_command_index, int32_t p_list_index);
void _add_adjacent_command(int32_t p_previous_command_index, int32_t p_command_index, RecordedCommand *r_command);
int32_t _add_to_write_list(int32_t p_command_index, Rect2i suberesources, int32_t p_list_index);
int32_t _add_to_slice_read_list(int32_t p_command_index, Rect2i p_subresources, int32_t p_list_index);
int32_t _add_to_write_list(int32_t p_command_index, Rect2i p_subresources, int32_t p_list_index);
RecordedCommand *_allocate_command(uint32_t p_command_size, int32_t &r_command_index);
DrawListInstruction *_allocate_draw_list_instruction(uint32_t p_instruction_size);
ComputeListInstruction *_allocate_compute_list_instruction(uint32_t p_instruction_size);