rsx: Implement atomic FIFO fetching (stability improvement) (non-default setting) (#12107)

This commit is contained in:
Elad Ashkenazi 2022-06-04 15:35:06 +03:00 committed by GitHub
parent 286f97fad0
commit 9bb7e8d614
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 503 additions and 152 deletions

View file

@ -180,7 +180,7 @@ namespace cfg
// Value node with fixed set of possible values, each maps to an enum value of type T.
template <typename T>
class _enum final : public _base
class _enum : public _base
{
atomic_t<T> m_value;

View file

@ -6024,7 +6024,7 @@ public:
break;
}
if (u64 cmdh = ci->getZExtValue() & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_RESULT_MASK); !g_use_rtm)
if (u64 cmdh = ci->getZExtValue() & ~(MFC_BARRIER_MASK | MFC_FENCE_MASK | MFC_RESULT_MASK); g_cfg.core.rsx_fifo_accuracy || !g_use_rtm)
{
// TODO: don't require TSX (current implementation is TSX-only)
if (cmdh == MFC_PUT_CMD || cmdh == MFC_SNDSIG_CMD)

View file

@ -1875,6 +1875,8 @@ void spu_thread::do_dma_transfer(spu_thread* _this, const spu_mfc_cmd& args, u8*
src = zero_buf;
}
rsx::reservation_lock<false, 1> rsx_lock(eal, args.size, !is_get && g_cfg.core.rsx_fifo_accuracy && !g_cfg.core.spu_accurate_dma);
if ((!g_use_rtm && !is_get) || g_cfg.core.spu_accurate_dma) [[unlikely]]
{
perf_meter<"ADMA_GET"_u64> perf_get = perf_;

View file

@ -432,6 +432,7 @@ error_code sys_rsx_context_attribute(u32 context_id, u32 package_id, u64 a3, u64
const u64 get = static_cast<u32>(a3);
const u64 put = static_cast<u32>(a4);
vm::_ref<atomic_be_t<u64>>(render->dma_address + ::offset32(&RsxDmaControl::put)).release(put << 32 | get);
render->fifo_ctrl->set_get(static_cast<u32>(get));
render->last_known_code_start = get;
render->sync_point_request.release(true);
break;

View file

@ -4,10 +4,17 @@
#include "RSXThread.h"
#include "Capture/rsx_capture.h"
#include "Common/time.hpp"
#include "Emu/Memory/vm_reservation.h"
#include "Emu/Cell/lv2/sys_rsx.h"
#include "util/asm.hpp"
#include <bitset>
using spu_rdata_t = std::byte[128];
extern void mov_rdata(spu_rdata_t& _dst, const spu_rdata_t& _src);
extern bool cmp_rdata(const spu_rdata_t& _lhs, const spu_rdata_t& _rhs);
namespace rsx
{
namespace FIFO
@ -32,10 +39,11 @@ namespace rsx
// NOTE: Only supposed to be invoked to wait for a single arg on command[0] (4 bytes)
// Wait for put to allow us to procceed execution
sync_get();
invalidate_cache();
while (read_put() == m_internal_get && !Emu.IsStopped())
{
std::this_thread::yield();
get_current_renderer()->cpu_wait({});
}
}
}
@ -58,16 +66,120 @@ namespace rsx
}
}
void FIFO_control::set_get(u32 get, bool check_spin)
std::pair<bool, u32> FIFO_control::fetch_u32(u32 addr)
{
if (check_spin && m_ctrl->get == get)
if (addr - m_cache_addr >= m_cache_size)
{
if (const u32 addr = m_iotable->get_addr(m_memwatch_addr); addr + 1)
const u32 put = read_put();
if (put == addr)
{
m_memwatch_addr = get;
m_memwatch_cmp = vm::read32(addr);
return {false, FIFO_EMPTY};
}
m_cache_addr = addr & -128;
const u32 addr1 = m_iotable->get_addr(m_cache_addr);
if (addr1 == umax)
{
m_cache_size = 0;
return {false, FIFO_ERROR};
}
m_cache_size = std::min<u32>((put | 0x7f) - m_cache_addr, u32{sizeof(m_cache)} - 1) + 1;
if (0x100000 - (m_cache_addr & 0xfffff) < m_cache_size)
{
// Check if memory layout changes in the next 1MB page boundary
if ((addr1 >> 20) + 1 != (m_iotable->get_addr(m_cache_addr + 0x100000) >> 20))
{
// Trim cache as needed if memory layout changes
m_cache_size = 0x100000 - (m_cache_addr & 0xfffff);
}
}
// Make mask of cache lines to fetch
u8 to_fetch = static_cast<u8>((1u << (m_cache_size / 128)) - 1);
if (addr < put && put < m_cache_addr + m_cache_size)
{
// Adjust to knownly-prepared FIFO buffer bounds
m_cache_size = put - m_cache_addr;
}
rsx::reservation_lock<true, 1> rsx_lock(addr1, m_cache_size, true);
const auto src = vm::_ptr<spu_rdata_t>(addr1);
// Find the next set bit after every iteration
for (u32 i = 0, start_time = 0;; i = (std::countr_zero<u32>(utils::rol8(to_fetch, 0 - i - 1)) + i + 1) % 8)
{
// If a reservation is being updated, try to load another
const auto& res = vm::reservation_acquire(addr1 + i * 128);
const u64 time0 = res;
if (!(time0 & 127))
{
mov_rdata(m_cache[i], src[i]);
if (time0 == res && cmp_rdata(m_cache[i], src[i]))
{
// The fetch of the cache line content has been successful, unset its bit
to_fetch &= ~(1u << i);
if (!to_fetch)
{
break;
}
continue;
}
}
if (!start_time)
{
start_time = rsx::uclock();
}
if (rsx::uclock() - start_time >= 50u)
{
const auto rsx = get_current_renderer();
if (rsx->is_stopped())
{
return {};
}
rsx->cpu_wait({});
// Add idle time in reverse: after exchnage start_time becomes uclock(), use substruction because of the reversed order of parameters
const u64 _start = std::exchange(start_time, rsx::uclock());
rsx->performance_counters.idle_time -= _start - start_time;
}
busy_wait(200);
if (g_cfg.core.rsx_fifo_accuracy >= rsx_fifo_mode::atomic_ordered)
{
i = (i - 1) % 8;
}
}
}
be_t<u32> ret;
std::memcpy(&ret, reinterpret_cast<const u8*>(&m_cache) + (addr - m_cache_addr), sizeof(u32));
return {true, ret};
}
void FIFO_control::set_get(u32 get, u32 spin_cmd)
{
invalidate_cache();
if (spin_cmd && m_ctrl->get == get)
{
m_memwatch_addr = get;
m_memwatch_cmp = spin_cmd;
return;
}
@ -76,21 +188,64 @@ namespace rsx
m_remaining_commands = 0;
}
std::span<const u32> FIFO_control::get_current_arg_ptr() const
{
if (g_cfg.core.rsx_fifo_accuracy)
{
// Return a pointer to the cache storage with confined access
return {reinterpret_cast<const u32*>(&m_cache) + (m_internal_get - m_cache_addr) / 4, (m_cache_size - (m_internal_get - m_cache_addr)) / 4};
}
else
{
// Return a raw pointer with no limited access
return {static_cast<const u32*>(vm::base(m_iotable->get_addr(m_internal_get))), 0x10000};
}
}
bool FIFO_control::read_unsafe(register_pair& data)
{
// Fast read with no processing, only safe inside a PACKET_BEGIN+count block
if (m_remaining_commands &&
m_internal_get != read_put<false>())
if (m_remaining_commands)
{
m_command_reg += m_command_inc;
m_args_ptr += 4;
m_remaining_commands--;
bool ok{};
u32 arg = 0;
if (g_cfg.core.rsx_fifo_accuracy)
{
std::tie(ok, arg) = fetch_u32(m_internal_get + 4);
if (!ok)
{
if (arg == FIFO_ERROR)
{
get_current_renderer()->recover_fifo();
}
return false;
}
}
else
{
if (m_internal_get + 4 == read_put<false>())
{
return false;
}
m_args_ptr += 4;
arg = vm::read32(m_args_ptr);
}
m_internal_get += 4;
data.set(m_command_reg, vm::read32(m_args_ptr));
m_command_reg += m_command_inc;
--m_remaining_commands;
data.set(m_command_reg, arg);
return true;
}
m_internal_get += 4;
return false;
}
@ -101,10 +256,8 @@ namespace rsx
if (m_remaining_commands > count)
{
m_command_reg += m_command_inc * count;
m_args_ptr += 4 * count;
m_remaining_commands -= count;
m_internal_get += 4 * count;
return true;
}
@ -120,19 +273,10 @@ namespace rsx
void FIFO_control::read(register_pair& data)
{
const u32 put = read_put();
m_internal_get = m_ctrl->get;
if (put == m_internal_get)
{
// Nothing to do
data.reg = FIFO_EMPTY;
return;
}
if (m_remaining_commands && read_unsafe(data))
if (m_remaining_commands)
{
// Previous block aborted to wait for PUT pointer
read_unsafe(data);
return;
}
@ -155,15 +299,38 @@ namespace rsx
m_memwatch_cmp = 0;
}
if (const u32 addr = m_iotable->get_addr(m_internal_get); addr + 1)
if (!g_cfg.core.rsx_fifo_accuracy)
{
m_cmd = vm::read32(addr);
const u32 put = read_put();
if (put == m_internal_get)
{
// Nothing to do
data.reg = FIFO_EMPTY;
return;
}
if (const u32 addr = m_iotable->get_addr(m_internal_get); addr + 1)
{
m_cmd = vm::read32(addr);
}
else
{
data.reg = FIFO_ERROR;
return;
}
}
else
{
// TODO: Optional recovery
data.reg = FIFO_ERROR;
return;
if (auto [ok, arg] = fetch_u32(m_internal_get); ok)
{
m_cmd = arg;
}
else
{
data.reg = arg;
return;
}
}
if (m_cmd & RSX_METHOD_NON_METHOD_CMD_MASK) [[unlikely]]
@ -188,20 +355,11 @@ namespace rsx
if (!count)
{
m_ctrl->get.release(m_internal_get + 4);
m_ctrl->get.release(m_internal_get += 4);
data.reg = FIFO_NOP;
return;
}
// Validate the args ptr if the command attempts to read from it
m_args_ptr = m_iotable->get_addr(m_internal_get + 4);
if (m_args_ptr == umax) [[unlikely]]
{
// Optional recovery
data.reg = FIFO_ERROR;
return;
}
if (count > 1)
{
// Set up readback parameters
@ -210,8 +368,43 @@ namespace rsx
m_remaining_commands = count - 1;
}
if (g_cfg.core.rsx_fifo_accuracy)
{
m_internal_get += 4;
auto [ok, arg] = fetch_u32(m_internal_get);
if (!ok)
{
// Optional recovery
if (arg == FIFO_ERROR)
{
data.reg = FIFO_ERROR;
}
else
{
data.reg = FIFO_EMPTY;
m_command_reg = m_cmd & 0xfffc;
m_remaining_commands++;
}
return;
}
data.set(m_cmd & 0xfffc, arg);
return;
}
inc_get(true); // Wait for data block to become available
m_internal_get += 4;
// Validate the args ptr if the command attempts to read from it
m_args_ptr = m_iotable->get_addr(m_internal_get);
if (m_args_ptr == umax) [[unlikely]]
{
// Optional recovery
data.reg = FIFO_ERROR;
return;
}
data.set(m_cmd & 0xfffc, vm::read32(m_args_ptr));
}
@ -452,7 +645,7 @@ namespace rsx
}
//rsx_log.warning("rsx jump(0x%x) #addr=0x%x, cmd=0x%x, get=0x%x, put=0x%x", offs, m_ioAddress + get, cmd, get, put);
fifo_ctrl->set_get(offs);
fifo_ctrl->set_get(offs, cmd);
return;
}
if ((cmd & RSX_METHOD_CALL_CMD_MASK) == RSX_METHOD_CALL_CMD)
@ -552,9 +745,9 @@ namespace rsx
commands.back().rsx_command.first = (fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) | (reg << 2) | (remaining << 18);
for (u32 i = 1; i < remaining && fifo_ctrl->get_pos() + (i - 1) * 4 != (ctrl->put & ~3); i++)
for (u32 i = 1; i < remaining && fifo_ctrl->get_pos() + i * 4 != (ctrl->put & ~3); i++)
{
replay_cmd.rsx_command = std::make_pair(0, vm::read32(fifo_ctrl->get_current_arg_ptr() + (i * 4)));
replay_cmd.rsx_command = std::make_pair(0, vm::read32(iomap_table.get_addr(fifo_ctrl->get_pos()) + (i * 4)));
commands.push_back(replay_cmd);
}

View file

@ -3,6 +3,8 @@
#include "util/types.hpp"
#include "Emu/RSX/gcm_enums.h"
#include <span>
struct RsxDmaControl;
namespace rsx
@ -124,18 +126,24 @@ namespace rsx
u32 m_args_ptr = 0;
u32 m_cmd = ~0u;
u32 m_cache_addr = 0;
u32 m_cache_size = 0;
alignas(64) std::byte m_cache[8][128];
public:
FIFO_control(rsx::thread* pctrl);
~FIFO_control() = default;
std::pair<bool, u32> fetch_u32(u32 addr);
void invalidate_cache() { m_cache_size = 0; }
u32 get_pos() const { return m_internal_get; }
u32 last_cmd() const { return m_cmd; }
void sync_get() const;
u32 get_current_arg_ptr() const { return m_args_ptr; }
std::span<const u32> get_current_arg_ptr() const;
u32 get_remaining_args_count() const { return m_remaining_commands; }
void inc_get(bool wait);
void set_get(u32 get, bool check_spin = false);
void set_get(u32 get, u32 spin_cmd = 0);
void abort();
template <bool = true>

View file

@ -77,6 +77,12 @@ namespace rsx
{
std::function<bool(u32 addr, bool is_writing)> g_access_violation_handler;
rsx_iomap_table::rsx_iomap_table() noexcept
: ea(fill_array(-1))
, io(fill_array(-1))
{
}
u32 get_address(u32 offset, u32 location, u32 size_to_check, u32 line, u32 col, const char* file, const char* func)
{
const auto render = get_current_renderer();
@ -2649,6 +2655,7 @@ namespace rsx
{
// Make sure GET value is exposed before sync points
fifo_ctrl->sync_get();
fifo_ctrl->invalidate_cache();
}
std::pair<u32, u32> thread::try_get_pc_of_x_cmds_backwards(u32 count, u32 get) const
@ -2710,6 +2717,8 @@ namespace rsx
void thread::recover_fifo(u32 line, u32 col, const char* file, const char* func)
{
bool kill_itself = g_cfg.core.rsx_fifo_accuracy == rsx_fifo_mode::as_ps3;
const u64 current_time = rsx::uclock();
if (recovered_fifo_cmds_history.size() == 20u)
@ -2721,13 +2730,18 @@ namespace rsx
if (current_time - cmd_info.timestamp < 2'000'000u - std::min<u32>(g_cfg.video.driver_wakeup_delay * 700, 1'400'000))
{
// Probably hopeless
fmt::throw_exception("Dead FIFO commands queue state has been detected!\nTry increasing \"Driver Wake-Up Delay\" setting in Advanced settings. Called from %s", src_loc{line, col, file, func});
kill_itself = true;
}
// Erase the last command from history, keep the size of the queue the same
recovered_fifo_cmds_history.pop();
}
if (kill_itself)
{
fmt::throw_exception("Dead FIFO commands queue state has been detected!\nTry increasing \"Driver Wake-Up Delay\" setting in Advanced settings. Called from %s", src_loc{line, col, file, func});
}
// Error. Should reset the queue
fifo_ctrl->set_get(restore_point);
fifo_ret_addr = saved_fifo_ret;

View file

@ -44,13 +44,9 @@ namespace rsx
{
std::array<atomic_t<u32>, 4096> ea;
std::array<atomic_t<u32>, 4096> io;
std::array<shared_mutex, 4096> rs{};
std::array<shared_mutex, 0x8'0000> rs;
rsx_iomap_table() noexcept
: ea(fill_array(-1))
, io(fill_array(-1))
{
}
rsx_iomap_table() noexcept;
// Try to get the real address given a mapped address
// Returns -1 on failure
@ -59,39 +55,32 @@ namespace rsx
return this->ea[offs >> 20] | (offs & 0xFFFFF);
}
template<bool IsFullLock>
template <bool IsFullLock, uint Stride>
bool lock(u32 addr, u32 len, cpu_thread* self = nullptr) noexcept
{
if (len <= 1) return false;
const u32 end = addr + len - 1;
for (u32 block = (addr >> 20); block <= (end >> 20); ++block)
bool added_wait = false;
for (u32 block = addr / 8192; block <= (end / 8192); block += Stride)
{
auto& mutex_ = rs[block];
if constexpr (IsFullLock)
if (IsFullLock ? !mutex_.try_lock() : !mutex_.try_lock_shared()) [[ unlikely ]]
{
if (self) [[ likely ]]
if (self)
{
while (!mutex_.try_lock())
{
self->cpu_wait({});
}
added_wait |= !self->state.test_and_set(cpu_flag::wait);
}
if (!self || self->id_type() != 0x55u)
{
IsFullLock ? mutex_.lock() : mutex_.lock_shared();
}
else
{
mutex_.lock();
}
}
else
{
if (!self) [[ likely ]]
{
mutex_.lock_shared();
}
else
{
while (!mutex_.try_lock_shared())
while (IsFullLock ? !mutex_.try_lock() : !mutex_.try_lock_shared())
{
self->cpu_wait({});
}
@ -99,16 +88,21 @@ namespace rsx
}
}
if (added_wait)
{
self->check_state();
}
return true;
}
template<bool IsFullLock>
template <bool IsFullLock, uint Stride>
void unlock(u32 addr, u32 len) noexcept
{
ensure(len >= 1);
const u32 end = addr + len - 1;
for (u32 block = (addr >> 20); block <= (end >> 20); ++block)
for (u32 block = (addr / 8192); block <= (end / 8192); block += Stride)
{
if constexpr (IsFullLock)
{
@ -847,7 +841,7 @@ namespace rsx
return g_fxo->try_get<rsx::thread>();
}
template<bool IsFullLock = false>
template<bool IsFullLock = false, uint Stride = 128>
class reservation_lock
{
u32 addr = 0, length = 0;
@ -858,9 +852,7 @@ namespace rsx
this->addr = addr;
this->length = length;
auto renderer = get_current_renderer();
cpu_thread* lock_owner = renderer->is_current_thread() ? renderer : nullptr;
this->locked = renderer->iomap_table.lock<IsFullLock>(addr, length, lock_owner);
this->locked = get_current_renderer()->iomap_table.lock<IsFullLock, Stride>(addr, length, get_current_cpu_thread());
}
public:
@ -873,6 +865,14 @@ namespace rsx
}
}
reservation_lock(u32 addr, u32 length, bool setting)
{
if (setting && addr < constants::local_mem_base)
{
lock_range(addr, length);
}
}
// Multi-range lock. If ranges overlap, the combined range will be acquired.
// If ranges do not overlap, the first range that is in main memory will be acquired.
reservation_lock(u32 dst_addr, u32 dst_length, u32 src_addr, u32 src_length)
@ -904,7 +904,7 @@ namespace rsx
{
if (locked)
{
get_current_renderer()->iomap_table.unlock<IsFullLock>(addr, length);
get_current_renderer()->iomap_table.unlock<IsFullLock, Stride>(addr, length);
}
}
};

View file

@ -21,7 +21,11 @@ namespace rsx
const u32 cmd = rsx->get_fifo_cmd();
rsx_log.error("Invalid RSX method 0x%x (arg=0x%x, start=0x%x, count=0x%x, non-inc=%s)", reg << 2, arg,
cmd & 0xfffc, (cmd >> 18) & 0x7ff, !!(cmd & RSX_METHOD_NON_INCREMENT_CMD));
rsx->recover_fifo();
if (g_cfg.core.rsx_fifo_accuracy != rsx_fifo_mode::as_ps3)
{
rsx->recover_fifo();
}
}
static void trace_method(thread* /*rsx*/, u32 reg, u32 arg)
@ -181,7 +185,9 @@ namespace rsx
// TODO: Check if possible to write on reservations
if (rsx->label_addr >> 28 != addr >> 28)
{
rsx_log.fatal("NV406E semaphore unexpected address. Please report to the developers. (offset=0x%x, addr=0x%x)", offset, addr);
rsx_log.error("NV406E semaphore unexpected address. Please report to the developers. (offset=0x%x, addr=0x%x)", offset, addr);
rsx->recover_fifo();
return;
}
write_gcm_label<false, true>(rsx, addr, arg);
@ -260,13 +266,20 @@ namespace rsx
return;
}
const u32 addr = get_address(offset, method_registers.semaphore_context_dma_4097());
if (rsx->label_addr >> 28 != addr >> 28)
{
rsx_log.error("NV4097 semaphore unexpected address. Please report to the developers. (offset=0x%x, addr=0x%x)", offset, addr);
}
if (g_cfg.video.strict_rendering_mode) [[ unlikely ]]
{
write_gcm_label<true, true>(rsx, get_address(offset, method_registers.semaphore_context_dma_4097()), arg);
write_gcm_label<true, true>(rsx, addr, arg);
}
else
{
write_gcm_label<true, false>(rsx, get_address(offset, method_registers.semaphore_context_dma_4097()), arg);
write_gcm_label<true, false>(rsx, addr, arg);
}
}
@ -283,8 +296,15 @@ namespace rsx
return;
}
const u32 addr = get_address(offset, method_registers.semaphore_context_dma_4097());
if (rsx->label_addr >> 28 != addr >> 28)
{
rsx_log.error("NV4097 semaphore unexpected address. Please report to the developers. (offset=0x%x, addr=0x%x)", offset, addr);
}
const u32 val = (arg & 0xff00ff00) | ((arg & 0xff) << 16) | ((arg >> 16) & 0xff);
write_gcm_label<true, true>(rsx, get_address(offset, method_registers.semaphore_context_dma_4097()), val);
write_gcm_label<true, true>(rsx, addr, val);
}
/**
@ -456,9 +476,16 @@ namespace rsx
const u32 reg = index / 4;
const u8 subreg = index % 4;
// Get real args count
const u32 count = std::min<u32>({rsx->fifo_ctrl->get_remaining_args_count() + 1,
static_cast<u32>(((rsx->ctrl->put & ~3ull) - (rsx->fifo_ctrl->get_pos() - 4)) / 4), 32 - index});
// FIFO args count including this one
const u32 fifo_args_cnt = rsx->fifo_ctrl->get_remaining_args_count() + 1;
// The range of methods this function resposible to
const u32 method_range = 32 - index;
// Get limit imposed by FIFO PUT (if put is behind get it will result in a number ignored by min)
const u32 fifo_read_limit = static_cast<u32>(((rsx->ctrl->put & ~3ull) - (rsx->fifo_ctrl->get_pos())) / 4);
const u32 count = std::min<u32>({fifo_args_cnt, fifo_read_limit, method_range});
const u32 load = rsx::method_registers.transform_constant_load();
@ -476,21 +503,28 @@ namespace rsx
const auto values = &rsx::method_registers.transform_constants[load + reg][subreg];
const auto fifo_span = rsx->fifo_ctrl->get_current_arg_ptr();
if (fifo_span.size() < rcount)
{
rcount = fifo_span.size();
}
if (rsx->m_graphics_state & rsx::pipeline_state::transform_constants_dirty)
{
// Minor optimization: don't compare values if we already know we need invalidation
copy_data_swap_u32(values, static_cast<u32*>(vm::base(rsx->fifo_ctrl->get_current_arg_ptr())), rcount);
copy_data_swap_u32(values, fifo_span.data(), rcount);
}
else
{
if (copy_data_swap_u32_cmp(values, static_cast<u32*>(vm::base(rsx->fifo_ctrl->get_current_arg_ptr())), rcount))
if (copy_data_swap_u32_cmp(values, fifo_span.data(), rcount))
{
// Transform constants invalidation is expensive (~8k bytes per update)
rsx->m_graphics_state |= rsx::pipeline_state::transform_constants_dirty;
}
}
rsx->fifo_ctrl->skip_methods(count - 1);
rsx->fifo_ctrl->skip_methods(rcount - 1);
}
};
@ -500,9 +534,16 @@ namespace rsx
{
const u32 index = reg - NV4097_SET_TRANSFORM_PROGRAM;
// Get real args count
const u32 count = std::min<u32>({rsx->fifo_ctrl->get_remaining_args_count() + 1,
static_cast<u32>(((rsx->ctrl->put & ~3ull) - (rsx->fifo_ctrl->get_pos() - 4)) / 4), 32 - index});
// FIFO args count including this one
const u32 fifo_args_cnt = rsx->fifo_ctrl->get_remaining_args_count() + 1;
// The range of methods this function resposible to
const u32 method_range = 32 - index;
// Get limit imposed by FIFO PUT (if put is behind get it will result in a number ignored by min)
const u32 fifo_read_limit = static_cast<u32>(((rsx->ctrl->put & ~3ull) - (rsx->fifo_ctrl->get_pos())) / 4);
const u32 count = std::min<u32>({fifo_args_cnt, fifo_read_limit, method_range});
const u32 load_pos = rsx::method_registers.transform_program_load();
@ -515,11 +556,18 @@ namespace rsx
rcount -= max - (max_vertex_program_instructions * 4);
}
copy_data_swap_u32(&rsx::method_registers.transform_program[load_pos * 4 + index % 4], static_cast<u32*>(vm::base(rsx->fifo_ctrl->get_current_arg_ptr())), rcount);
const auto fifo_span = rsx->fifo_ctrl->get_current_arg_ptr();
if (fifo_span.size() < rcount)
{
rcount = fifo_span.size();
}
copy_data_swap_u32(&rsx::method_registers.transform_program[load_pos * 4 + index % 4], fifo_span.data(), rcount);
rsx->m_graphics_state |= rsx::pipeline_state::vertex_program_ucode_dirty;
rsx::method_registers.transform_program_load_set(load_pos + ((rcount + index % 4) / 4));
rsx->fifo_ctrl->skip_methods(count - 1);
rsx->fifo_ctrl->skip_methods(rcount - 1);
}
};
@ -953,11 +1001,18 @@ namespace rsx
}
// Get position of the current command arg
const u32 src_offset = rsx->fifo_ctrl->get_pos() - 4;
const u32 src_offset = rsx->fifo_ctrl->get_pos();
// Get real args count (starting from NV3089_COLOR)
const u32 count = std::min<u32>({rsx->fifo_ctrl->get_remaining_args_count() + 1,
static_cast<u32>(((rsx->ctrl->put & ~3ull) - src_offset) / 4), 0x700 - index, out_x_max - index});
// FIFO args count including this one
const u32 fifo_args_cnt = rsx->fifo_ctrl->get_remaining_args_count() + 1;
// The range of methods this function resposible to
const u32 method_range = std::min<u32>(0x700 - index, out_x_max - index);
// Get limit imposed by FIFO PUT (if put is behind get it will result in a number ignored by min)
const u32 fifo_read_limit = static_cast<u32>(((rsx->ctrl->put & ~3ull) - (rsx->fifo_ctrl->get_pos())) / 4);
u32 count = std::min<u32>({fifo_args_cnt, fifo_read_limit, method_range});
const u32 dst_dma = method_registers.blit_engine_output_location_nv3062();
const u32 dst_offset = method_registers.blit_engine_output_offset_nv3062();
@ -966,6 +1021,13 @@ namespace rsx
const u32 x = method_registers.nv308a_x() + index;
const u32 y = method_registers.nv308a_y();
const auto fifo_span = rsx->fifo_ctrl->get_current_arg_ptr();
if (fifo_span.size() < count)
{
count = fifo_span.size();
}
// Skip "handled methods"
rsx->fifo_ctrl->skip_methods(count - 1);
@ -986,12 +1048,10 @@ namespace rsx
return;
}
const auto src_address = get_address(src_offset, CELL_GCM_LOCATION_MAIN);
const auto dst = vm::_ptr<u8>(dst_address);
const auto src = vm::_ptr<const u8>(src_address);
const auto src = reinterpret_cast<const u8*>(fifo_span.data());
auto res = rsx::reservation_lock<true>(dst_address, data_length, src_address, data_length);
rsx::reservation_lock<true> rsx_lock(dst_address, data_length);
if (rsx->fifo_ctrl->last_cmd() & RSX_METHOD_NON_INCREMENT_CMD_MASK) [[unlikely]]
{
@ -1022,9 +1082,8 @@ namespace rsx
const auto data_length = count * 2;
const auto dst_address = get_address(dst_offset + (x * 2) + (y * out_pitch), dst_dma, data_length);
const auto src_address = get_address(src_offset, CELL_GCM_LOCATION_MAIN);
const auto dst = vm::_ptr<u16>(dst_address);
const auto src = vm::_ptr<const u32>(src_address);
const auto src = reinterpret_cast<const be_t<u32>*>(fifo_span.data());
if (!dst_address)
{
@ -1032,7 +1091,7 @@ namespace rsx
return;
}
auto res = rsx::reservation_lock<true>(dst_address, data_length, src_address, data_length);
rsx::reservation_lock<true> rsx_lock(dst_address, data_length);
auto convert = [](u32 input) -> u16
{

View file

@ -43,6 +43,19 @@ struct cfg_root : cfg::node
cfg::_bool spu_accurate_dma{ this, "Accurate SPU DMA", false };
cfg::_bool accurate_cache_line_stores{ this, "Accurate Cache Line Stores", false };
cfg::_bool rsx_accurate_res_access{this, "Accurate RSX reservation access", false, true};
struct fifo_setting : public cfg::_enum<rsx_fifo_mode>
{
using _enum = cfg::_enum<rsx_fifo_mode>;
using _enum::_enum;
explicit operator bool() const
{
return get() != rsx_fifo_mode::fast;
}
};
fifo_setting rsx_fifo_accuracy{this, "RSX FIFO Accuracy", rsx_fifo_mode::fast };
cfg::_bool spu_verification{ this, "SPU Verification", true }; // Should be enabled
cfg::_bool spu_cache{ this, "SPU Cache", true };
cfg::_bool spu_prof{ this, "SPU Profiler", false };

View file

@ -187,6 +187,23 @@ void fmt_class_string<tsx_usage>::format(std::string& out, u64 arg)
});
}
template <>
void fmt_class_string<rsx_fifo_mode>::format(std::string& out, u64 arg)
{
format_enum(out, arg, [](rsx_fifo_mode value)
{
switch (value)
{
case rsx_fifo_mode::fast: return "Fast";
case rsx_fifo_mode::atomic: return "Atomic";
case rsx_fifo_mode::atomic_ordered: return "Ordered & Atomic";
case rsx_fifo_mode::as_ps3: return "PS3";
}
return unknown;
});
}
template <>
void fmt_class_string<sleep_timers_accuracy_level>::format(std::string& out, u64 arg)
{

View file

@ -218,6 +218,14 @@ enum class screen_quadrant
bottom_right
};
enum class rsx_fifo_mode
{
fast,
atomic,
atomic_ordered,
as_ps3,
};
enum class tsx_usage
{
disabled,

View file

@ -1097,6 +1097,15 @@ QString emu_settings::GetLocalizedSetting(const QString& original, emu_settings_
case sleep_timers_accuracy_level::_all_timers: return tr("All Timers", "Sleep timers accuracy");
}
break;
case emu_settings_type::FIFOAccuracy:
switch (static_cast<rsx_fifo_mode>(index))
{
case rsx_fifo_mode::fast: return tr("Fast", "RSX FIFO Accuracy");
case rsx_fifo_mode::atomic: return tr("Atomic", "RSX FIFO Accuracy");
case rsx_fifo_mode::atomic_ordered: return tr("Ordered & Atomic", "RSX FIFO Accuracy");
case rsx_fifo_mode::as_ps3: return tr("PS3", "RSX FIFO Accuracy");
}
break;
case emu_settings_type::PerfOverlayDetailLevel:
switch (static_cast<detail_level>(index))
{

View file

@ -24,6 +24,7 @@ enum class emu_settings_type
AccurateSpuDMA,
AccurateClineStores,
AccurateRSXAccess,
FIFOAccuracy,
AccurateXFloat,
ApproximateXFloat,
AccuratePPU128Loop,
@ -194,6 +195,7 @@ inline static const QMap<emu_settings_type, cfg_location> settings_location =
{ emu_settings_type::AccurateSpuDMA, { "Core", "Accurate SPU DMA"}},
{ emu_settings_type::AccurateClineStores, { "Core", "Accurate Cache Line Stores"}},
{ emu_settings_type::AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}},
{ emu_settings_type::FIFOAccuracy, { "Core", "RSX FIFO Accuracy"}},
{ emu_settings_type::AccurateXFloat, { "Core", "Accurate xfloat"}},
{ emu_settings_type::ApproximateXFloat, { "Core", "Approximate xfloat"}},
{ emu_settings_type::MFCCommandsShuffling, { "Core", "MFC Commands Shuffling Limit"}},

View file

@ -1341,6 +1341,12 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
m_emu_settings->EnhanceComboBox(ui->sleepTimersAccuracy, emu_settings_type::SleepTimersAccuracy);
SubscribeTooltip(ui->gb_sleep_timers_accuracy, tooltips.settings.sleep_timers_accuracy);
m_emu_settings->EnhanceComboBox(ui->FIFOAccuracy, emu_settings_type::FIFOAccuracy);
SubscribeTooltip(ui->gb_rsx_fifo_accuracy, tooltips.settings.rsx_fifo_accuracy);
// Hide a developers' setting
ui->FIFOAccuracy->removeItem(static_cast<int>(rsx_fifo_mode::as_ps3));
m_emu_settings->EnhanceComboBox(ui->vulkansched, emu_settings_type::VulkanAsyncSchedulerDriver);
SubscribeTooltip(ui->gb_vulkansched, tooltips.settings.vulkan_async_scheduler);

View file

@ -2284,6 +2284,49 @@
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="gb_clockScale">
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Minimum">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="title">
<string>Clocks Scale</string>
</property>
<layout class="QVBoxLayout" name="gb_clockScale_layout">
<item>
<widget class="QSlider" name="clockScale">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
</widget>
</item>
<item>
<layout class="QHBoxLayout" name="clockScaleLayout" stretch="1,0">
<item>
<widget class="QLabel" name="clockScaleText">
<property name="text">
<string>100%</string>
</property>
<property name="alignment">
<set>Qt::AlignCenter</set>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="clockScaleReset">
<property name="text">
<string>Reset</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget>
</item>
<item>
<spacer name="advancedTabSpacerLeft">
<property name="orientation">
@ -2459,6 +2502,24 @@
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="gb_rsx_fifo_accuracy">
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Minimum">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="title">
<string>RSX FIFO Accuracy</string>
</property>
<layout class="QVBoxLayout" name="gb_rsx_fifo_accuracy_layout">
<item>
<widget class="QComboBox" name="FIFOAccuracy"/>
</item>
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="gb_wakeupDelay">
<property name="sizePolicy">
@ -2552,49 +2613,6 @@
</layout>
</widget>
</item>
<item>
<widget class="QGroupBox" name="gb_clockScale">
<property name="sizePolicy">
<sizepolicy hsizetype="Preferred" vsizetype="Minimum">
<horstretch>0</horstretch>
<verstretch>0</verstretch>
</sizepolicy>
</property>
<property name="title">
<string>Clocks Scale</string>
</property>
<layout class="QVBoxLayout" name="gb_clockScale_layout">
<item>
<widget class="QSlider" name="clockScale">
<property name="orientation">
<enum>Qt::Horizontal</enum>
</property>
</widget>
</item>
<item>
<layout class="QHBoxLayout" name="clockScaleLayout" stretch="1,0">
<item>
<widget class="QLabel" name="clockScaleText">
<property name="text">
<string>100%</string>
</property>
<property name="alignment">
<set>Qt::AlignCenter</set>
</property>
</widget>
</item>
<item>
<widget class="QPushButton" name="clockScaleReset">
<property name="text">
<string>Reset</string>
</property>
</widget>
</item>
</layout>
</item>
</layout>
</widget>
</item>
<item>
<spacer name="advancedTabSpacerRight">
<property name="orientation">

View file

@ -33,6 +33,7 @@ public:
const QString zcull_operation_mode = tr("Changes ZCULL report synchronization behaviour. Experiment to find the best option for your game. Approximate mode is recommended for most games.\n· Precise is the most accurate to PS3 behaviour. Required for accurate visuals in some titles such as Demon's Souls and The Darkness.\n· Approximate is a much faster way to generate occlusion data which may not always match what the PS3 would generate. Works well with most PS3 games.\n· Relaxed changes the synchronization method completely and can greatly improve performance in some games or completely break others.");
const QString max_spurs_threads = tr("Limits the maximum number of SPURS threads in each thread group.\nMay improve performance in some cases, especially on systems with limited number of hardware threads.\nLimiting the number of threads is likely to cause crashes; it's recommended to keep this at the default value.");
const QString sleep_timers_accuracy = tr("Changes the sleep period accuracy.\n'As Host' uses default accuracy of the underlying operating system, while 'All Timers' attempts to improve it.\n'Usleep Only' limits the adjustments to usleep syscall only.\nCan affect performance in unexpected ways.");
const QString rsx_fifo_accuracy = tr("\"Fast\" is the least accurate setting, RSX does not emulate atomic FIFO buffer.\n\"Atomic & Ordered\" is the most accurate but it is the slowest and without much stability benefit in games over \"Atomic\" which benefits stability greatly in many games with little performance penalty.");
const QString vblank_rate = tr("Adjusts the frequency of vertical blanking signals that the emulator sends.\nAffects timing of events which rely on these signals.");
const QString vblank_ntsc_fixup = tr("Multiplies the rate of VBLANK by 1000/1001 for values like 59.94Hz.\nKnown to fix the rhythm game Space Channel 5 Part 2");
const QString clocks_scale = tr("Changes the scale of emulated system time.\nAffects software which uses system time to calculate things such as dynamic timesteps.");