vk: Move frame present synchronization to the driver

- Just use a semaphore and let the driver handle it instead of manual framepacing.
  We lose framepace control but drivers have matured in the past few years so it should work fine.
This commit is contained in:
kd-11 2019-06-09 12:48:19 +03:00 committed by kd-11
parent 527f7a437a
commit 57196f0504
4 changed files with 109 additions and 125 deletions

View file

@ -505,7 +505,8 @@ VKGSRender::VKGSRender() : GSRender()
for (auto &ctx : frame_context_storage)
{
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.present_semaphore);
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.present_wait_semaphore);
vkCreateSemaphore((*m_device), &semaphore_info, nullptr, &ctx.acquire_signal_semaphore);
ctx.descriptor_pool.create(*m_device, sizes.data(), static_cast<uint32_t>(sizes.size()), DESCRIPTOR_MAX_DRAW_CALLS, 1);
}
@ -616,7 +617,8 @@ VKGSRender::~VKGSRender()
//NOTE: aux_context uses descriptor pools borrowed from the main queues and any allocations will be automatically freed when pool is destroyed
for (auto &ctx : frame_context_storage)
{
vkDestroySemaphore((*m_device), ctx.present_semaphore, nullptr);
vkDestroySemaphore((*m_device), ctx.present_wait_semaphore, nullptr);
vkDestroySemaphore((*m_device), ctx.acquire_signal_semaphore, nullptr);
ctx.descriptor_pool.destroy();
ctx.buffer_views_to_clean.clear();
@ -826,15 +828,11 @@ void VKGSRender::check_heap_status(u32 flags)
std::chrono::time_point<steady_clock> submit_start = steady_clock::now();
frame_context_t *target_frame = nullptr;
u64 earliest_sync_time = UINT64_MAX;
for (s32 i = 0; i < VK_MAX_ASYNC_FRAMES; ++i)
if (!m_queued_frames.empty())
{
auto ctx = &frame_context_storage[i];
if (ctx->swap_command_buffer)
if (m_current_frame != &m_aux_frame_context)
{
if (ctx->last_frame_sync_time > m_last_heap_sync_time &&
ctx->last_frame_sync_time < earliest_sync_time)
target_frame = ctx;
target_frame = m_queued_frames.front();
}
}
@ -857,14 +855,8 @@ void VKGSRender::check_heap_status(u32 flags)
}
else
{
target_frame->swap_command_buffer->poke();
while (target_frame->swap_command_buffer->pending)
{
if (!target_frame->swap_command_buffer->poke())
std::this_thread::yield();
}
process_swap_request(target_frame, true);
// Flush the frame context
frame_context_cleanup(target_frame, true);
}
std::chrono::time_point<steady_clock> submit_end = steady_clock::now();
@ -874,7 +866,7 @@ void VKGSRender::check_heap_status(u32 flags)
void VKGSRender::check_present_status()
{
if (!m_queued_frames.empty())
while (!m_queued_frames.empty())
{
auto ctx = m_queued_frames.front();
if (ctx->swap_command_buffer->pending)
@ -885,7 +877,7 @@ void VKGSRender::check_present_status()
}
}
process_swap_request(ctx, true);
frame_context_cleanup(ctx, true);
}
}
@ -1061,10 +1053,6 @@ void VKGSRender::begin()
m_current_frame->flags &= ~frame_context_state::dirty;
}
else
{
check_present_status();
}
}
void VKGSRender::update_draw_state()
@ -2177,24 +2165,27 @@ void VKGSRender::clear_surface(u32 mask)
void VKGSRender::flush_command_queue(bool hard_sync)
{
close_and_submit_command_buffer({}, m_current_command_buffer->submit_fence);
close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
if (hard_sync)
{
// swap handler checks the pending flag, so call it here
process_swap_request(m_current_frame);
//wait for the latest instruction to execute
// wait for the latest instruction to execute
m_current_command_buffer->pending = true;
m_current_command_buffer->reset();
//Clear all command buffer statuses
// Clear all command buffer statuses
for (auto &cb : m_primary_cb_list)
{
if (cb.pending)
cb.poke();
}
// Drain present queue
while (!m_queued_frames.empty())
{
check_present_status();
}
m_flush_requests.clear_pending_flag();
}
else
@ -2206,14 +2197,10 @@ void VKGSRender::flush_command_queue(bool hard_sync)
m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT;
m_current_command_buffer = &m_primary_cb_list[m_current_cb_index];
// Soft sync if a present has not yet occured before consuming the wait event
for (auto &ctx : frame_context_storage)
{
if (ctx.swap_command_buffer == m_current_command_buffer)
process_swap_request(&ctx, true);
}
m_current_command_buffer->reset();
// Just in case a queued frame holds a ref to this cb, drain the present queue
check_present_status();
}
open_command_buffer();
@ -2276,7 +2263,7 @@ void VKGSRender::present(frame_context_t *ctx)
if (!present_surface_dirty_flag)
{
switch (VkResult error = m_swapchain->present(ctx->present_image))
switch (VkResult error = m_swapchain->present(ctx->present_wait_semaphore, ctx->present_image))
{
case VK_SUCCESS:
break;
@ -2292,62 +2279,44 @@ void VKGSRender::present(frame_context_t *ctx)
// Presentation image released; reset value
ctx->present_image = UINT32_MAX;
// Remove from queued list
while (!m_queued_frames.empty())
{
auto frame = m_queued_frames.front();
m_queued_frames.pop_front();
if (frame == ctx)
{
break;
}
}
vk::advance_completed_frame_counter();
}
void VKGSRender::queue_swap_request()
{
// Buffer the swap request and return
if (m_current_frame->swap_command_buffer &&
m_current_frame->swap_command_buffer->pending)
{
// Its probable that no actual drawing took place
process_swap_request(m_current_frame);
}
verify(HERE), !m_current_frame->swap_command_buffer;
m_current_frame->swap_command_buffer = m_current_command_buffer;
if (m_swapchain->is_headless())
{
m_swapchain->end_frame(*m_current_command_buffer, m_current_frame->present_image);
close_and_submit_command_buffer({}, m_current_command_buffer->submit_fence);
close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
}
else
{
close_and_submit_command_buffer({ m_current_frame->present_semaphore },
m_current_command_buffer->submit_fence,
close_and_submit_command_buffer(m_current_command_buffer->submit_fence,
m_current_frame->acquire_signal_semaphore,
m_current_frame->present_wait_semaphore,
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT | VK_PIPELINE_STAGE_TRANSFER_BIT);
}
// Set up a present request for this frame as well
present(m_current_frame);
m_current_frame->swap_command_buffer->pending = true;
//Grab next cb in line and make it usable
// Grab next cb in line and make it usable
m_current_cb_index = (m_current_cb_index + 1) % VK_MAX_ASYNC_CB_COUNT;
m_current_command_buffer = &m_primary_cb_list[m_current_cb_index];
m_current_command_buffer->reset();
//Set up new pointers for the next frame
// Set up new pointers for the next frame
advance_queued_frames();
open_command_buffer();
}
void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)
void VKGSRender::frame_context_cleanup(frame_context_t *ctx, bool free_resources)
{
if (!ctx->swap_command_buffer)
return;
verify(HERE), ctx->swap_command_buffer;
if (ctx->swap_command_buffer->pending)
{
@ -2362,9 +2331,6 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)
free_resources = true;
}
//Always present
present(ctx);
if (free_resources)
{
if (g_cfg.video.overlay)
@ -2425,6 +2391,20 @@ void VKGSRender::process_swap_request(frame_context_t *ctx, bool free_resources)
}
ctx->swap_command_buffer = nullptr;
// Remove from queued list
while (!m_queued_frames.empty())
{
auto frame = m_queued_frames.front();
m_queued_frames.pop_front();
if (frame == ctx)
{
break;
}
}
vk::advance_completed_frame_counter();
}
void VKGSRender::do_local_task(rsx::FIFO_state state)
@ -2797,7 +2777,7 @@ void VKGSRender::write_buffers()
{
}
void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags)
void VKGSRender::close_and_submit_command_buffer(VkFence fence, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkPipelineStageFlags pipeline_stage_flags)
{
if (m_attrib_ring_info.dirty() ||
m_fragment_env_ring_info.dirty() ||
@ -2823,12 +2803,16 @@ void VKGSRender::close_and_submit_command_buffer(const std::vector<VkSemaphore>
m_texture_upload_buffer_ring_info.sync(m_secondary_command_buffer);
m_secondary_command_buffer.end();
m_secondary_command_buffer.submit(m_swapchain->get_graphics_queue(), {}, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
m_secondary_command_buffer.submit(m_swapchain->get_graphics_queue(),
VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT);
}
m_current_command_buffer->end();
m_current_command_buffer->tag();
m_current_command_buffer->submit(m_swapchain->get_graphics_queue(), semaphores, fence, pipeline_stage_flags);
m_current_command_buffer->submit(m_swapchain->get_graphics_queue(),
wait_semaphore, signal_semaphore, fence, pipeline_stage_flags);
}
void VKGSRender::open_command_buffer()
@ -3065,19 +3049,14 @@ void VKGSRender::reinitialize_swapchain()
const auto new_width = m_frame->client_width();
const auto new_height = m_frame->client_height();
//Reject requests to acquire new swapchain if the window is minimized
//The NVIDIA driver will spam VK_ERROR_OUT_OF_DATE_KHR if you try to acquire an image from the swapchain and the window is minimized
//However, any attempt to actually renew the swapchain will crash the driver with VK_ERROR_DEVICE_LOST while the window is in this state
// Reject requests to acquire new swapchain if the window is minimized
// The NVIDIA driver will spam VK_ERROR_OUT_OF_DATE_KHR if you try to acquire an image from the swapchain and the window is minimized
// However, any attempt to actually renew the swapchain will crash the driver with VK_ERROR_DEVICE_LOST while the window is in this state
if (new_width == 0 || new_height == 0)
return;
/**
* Waiting for the commands to process does not work reliably as the fence can be signaled before swap images are released
* and there are no explicit methods to ensure that the presentation engine is not using the images at all.
*/
//NOTE: This operation will create a hard sync point
close_and_submit_command_buffer({}, m_current_command_buffer->submit_fence);
// NOTE: This operation will create a hard sync point
close_and_submit_command_buffer(m_current_command_buffer->submit_fence);
m_current_command_buffer->pending = true;
m_current_command_buffer->reset();
@ -3087,13 +3066,11 @@ void VKGSRender::reinitialize_swapchain()
continue;
// Release present image by presenting it
ctx.swap_command_buffer->wait(FRAME_PRESENT_TIMEOUT);
ctx.swap_command_buffer = nullptr;
present(&ctx);
frame_context_cleanup(&ctx, true);
}
// Remove any old refs to the old images as they are about to be destroyed
//m_framebuffers_to_clean;
// Drain all the queues
vkDeviceWaitIdle(*m_device);
// Rebuild swapchain. Old swapchain destruction is handled by the init_swapchain call
if (!m_swapchain->init(new_width, new_height))
@ -3108,7 +3085,7 @@ void VKGSRender::reinitialize_swapchain()
m_client_width = new_width;
m_client_height = new_height;
//Prepare new swapchain images for use
// Prepare new swapchain images for use
open_command_buffer();
for (u32 i = 0; i < m_swapchain->get_swap_image_count(); ++i)
@ -3131,7 +3108,7 @@ void VKGSRender::reinitialize_swapchain()
vkCreateFence((*m_device), &infos, nullptr, &resize_fence);
//Flush the command buffer
close_and_submit_command_buffer({}, resize_fence);
close_and_submit_command_buffer(resize_fence);
vk::wait_for_fence(resize_fence);
vkDestroyFence((*m_device), resize_fence, nullptr);
@ -3167,12 +3144,11 @@ void VKGSRender::flip(int buffer, bool emu_flip)
m_current_frame = &frame_context_storage[m_current_queue_index];
if (m_current_frame->swap_command_buffer)
{
// Always present if pending swap is present.
// Its possible this flip request is triggered by overlays and the flip queue is in undefined state
process_swap_request(m_current_frame, true);
frame_context_cleanup(m_current_frame, true);
}
//swap aux storage and current frame; aux storage should always be ready for use at all times
// Swap aux storage and current frame; aux storage should always be ready for use at all times
m_current_frame->swap_storage(m_aux_frame_context);
m_current_frame->grab_resources(m_aux_frame_context);
}
@ -3184,8 +3160,8 @@ void VKGSRender::flip(int buffer, bool emu_flip)
LOG_ERROR(RSX, "Possible data corruption on frame context storage detected");
}
//There were no draws and back-to-back flips happened
process_swap_request(m_current_frame, true);
// There were no draws and back-to-back flips happened
frame_context_cleanup(m_current_frame, true);
}
if (present_surface_dirty_flag)
@ -3248,7 +3224,7 @@ void VKGSRender::flip(int buffer, bool emu_flip)
verify(HERE), m_current_frame->swap_command_buffer == nullptr;
u64 timeout = m_swapchain->get_swap_image_count() <= VK_MAX_ASYNC_FRAMES? 0ull: 100000000ull;
while (VkResult status = m_swapchain->acquire_next_swapchain_image(m_current_frame->present_semaphore, timeout, &m_current_frame->present_image))
while (VkResult status = m_swapchain->acquire_next_swapchain_image(m_current_frame->acquire_signal_semaphore, timeout, &m_current_frame->present_image))
{
switch (status)
{
@ -3263,18 +3239,7 @@ void VKGSRender::flip(int buffer, bool emu_flip)
//Whatever returned from status, this is now a spin
timeout = 0ull;
for (auto &ctx : frame_context_storage)
{
if (ctx.swap_command_buffer)
{
ctx.swap_command_buffer->poke();
if (!ctx.swap_command_buffer->pending)
{
//Release in case there is competition for frame resources
process_swap_request(&ctx, true);
}
}
}
check_present_status();
continue;
}

View file

@ -165,7 +165,8 @@ enum frame_context_state : u32
struct frame_context_t
{
VkSemaphore present_semaphore = VK_NULL_HANDLE;
VkSemaphore acquire_signal_semaphore = VK_NULL_HANDLE;
VkSemaphore present_wait_semaphore = VK_NULL_HANDLE;
VkDescriptorSet descriptor_set = VK_NULL_HANDLE;
vk::descriptor_pool descriptor_pool;
@ -194,7 +195,8 @@ struct frame_context_t
//Copy shareable information
void grab_resources(frame_context_t &other)
{
present_semaphore = other.present_semaphore;
present_wait_semaphore = other.present_wait_semaphore;
acquire_signal_semaphore = other.acquire_signal_semaphore;
descriptor_set = other.descriptor_set;
descriptor_pool = other.descriptor_pool;
used_descriptors = other.used_descriptors;
@ -482,13 +484,18 @@ public:
private:
void clear_surface(u32 mask);
void close_and_submit_command_buffer(const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
void open_command_buffer();
void prepare_rtts(rsx::framebuffer_creation_context context);
void open_command_buffer();
void close_and_submit_command_buffer(
VkFence fence = VK_NULL_HANDLE,
VkSemaphore wait_semaphore = VK_NULL_HANDLE,
VkSemaphore signal_semaphore = VK_NULL_HANDLE,
VkPipelineStageFlags pipeline_stage_flags = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT);
void flush_command_queue(bool hard_sync = false);
void queue_swap_request();
void process_swap_request(frame_context_t *ctx, bool free_resources = false);
void frame_context_cleanup(frame_context_t *ctx, bool free_resources = false);
void advance_queued_frames();
void present(frame_context_t *ctx);
void reinitialize_swapchain();

View file

@ -947,7 +947,7 @@ namespace vk
is_open = false;
}
void submit(VkQueue queue, const std::vector<VkSemaphore> &semaphores, VkFence fence, VkPipelineStageFlags pipeline_stage_flags)
void submit(VkQueue queue, VkSemaphore wait_semaphore, VkSemaphore signal_semaphore, VkFence fence, VkPipelineStageFlags pipeline_stage_flags)
{
if (is_open)
{
@ -955,19 +955,29 @@ namespace vk
return;
}
if (fence == VK_NULL_HANDLE)
if (!fence)
{
fence = m_submit_fence;
is_pending = (fence != VK_NULL_HANDLE);
}
VkSubmitInfo infos = {};
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
infos.commandBufferCount = 1;
infos.pCommandBuffers = &commands;
infos.pWaitDstStageMask = &pipeline_stage_flags;
infos.pWaitSemaphores = semaphores.data();
infos.waitSemaphoreCount = static_cast<uint32_t>(semaphores.size());
infos.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
if (wait_semaphore)
{
infos.waitSemaphoreCount = 1;
infos.pWaitSemaphores = &wait_semaphore;
}
if (signal_semaphore)
{
infos.signalSemaphoreCount = 1;
infos.pSignalSemaphores = &signal_semaphore;
}
acquire_global_submit_lock();
CHECK_RESULT(vkQueueSubmit(queue, 1, &infos, fence));
@ -1637,7 +1647,7 @@ public:
virtual VkImage& get_image(u32 index) = 0;
virtual VkResult acquire_next_swapchain_image(VkSemaphore semaphore, u64 timeout, u32* result) = 0;
virtual void end_frame(command_buffer& cmd, u32 index) = 0;
virtual VkResult present(u32 index) = 0;
virtual VkResult present(VkSemaphore semaphore, u32 index) = 0;
virtual VkImageLayout get_optimal_present_layout() = 0;
virtual bool supports_automatic_wm_reports() const
@ -1769,7 +1779,7 @@ public:
dev.destroy();
}
VkResult present(u32 image) override
VkResult present(VkSemaphore /*semaphore*/, u32 image) override
{
auto& src = swapchain_images[image];
GdiFlush();
@ -1826,7 +1836,7 @@ public:
dev.destroy();
}
VkResult present(u32 index) override
VkResult present(VkSemaphore /*semaphore*/, u32 index) override
{
fmt::throw_exception("Native macOS swapchain is not implemented yet!");
}
@ -1914,7 +1924,7 @@ public:
dev.destroy();
}
VkResult present(u32 index) override
VkResult present(VkSemaphore /*semaphore*/, u32 index) override
{
auto& src = swapchain_images[index];
if (pixmap)
@ -2216,7 +2226,7 @@ public:
{
}
VkResult present(u32 image) override
VkResult present(VkSemaphore semaphore, u32 image) override
{
VkPresentInfoKHR present = {};
present.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
@ -2224,6 +2234,8 @@ public:
present.swapchainCount = 1;
present.pSwapchains = &m_vk_swapchain;
present.pImageIndices = &image;
present.waitSemaphoreCount = 1;
present.pWaitSemaphores = &semaphore;
return queuePresentKHR(vk_present_queue, &present);
}

View file

@ -1100,7 +1100,7 @@ namespace vk
info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
vkCreateFence(*m_device, &info, nullptr, &submit_fence);
cmd.submit(m_submit_queue, {}, submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, submit_fence, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
vk::wait_for_fence(submit_fence, GENERAL_WAIT_TIMEOUT);
vkDestroyFence(*m_device, submit_fence, nullptr);
@ -1111,7 +1111,7 @@ namespace vk
else
{
// Auxilliary command queue with auto-restart capability
cmd.submit(m_submit_queue, {}, VK_NULL_HANDLE, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
cmd.submit(m_submit_queue, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_NULL_HANDLE, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
}
verify(HERE), cmd.flags == 0;