SPU LLVM: Add relaxed xfloat option

- This new setting is on by default
- It's active when approximate default is disabled
- Approximate xfloat is now exposed to the gui
This commit is contained in:
Malcolm Jestadt 2022-01-29 13:22:09 -05:00 committed by Ivan
parent 86919ec0e1
commit 91673f8fdc
6 changed files with 17 additions and 3 deletions

View file

@ -8020,7 +8020,7 @@ public:
return eval(sext<s32[4]>(bitcast<s32[4]>(a) > bitcast<s32[4]>(b)));
}
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
{
const auto ai = eval(bitcast<s32[4]>(a));
const auto bi = eval(bitcast<s32[4]>(b));
@ -8480,7 +8480,7 @@ public:
const auto b = value<f32[4]>(ci->getOperand(1));
const auto c = value<f32[4]>(ci->getOperand(2));
if (g_cfg.core.spu_approx_xfloat)
if (g_cfg.core.spu_approx_xfloat || g_cfg.core.spu_relaxed_xfloat)
{
return fma32x4(eval(-clamp_smax(a)), clamp_smax(b), c);
}

View file

@ -52,6 +52,7 @@ struct cfg_root : cfg::node
cfg::_enum<tsx_usage> enable_TSX{ this, "Enable TSX", enable_tsx_by_default() ? tsx_usage::enabled : tsx_usage::disabled }; // Enable TSX. Forcing this on Haswell/Broadwell CPUs should be used carefully
cfg::_bool spu_accurate_xfloat{ this, "Accurate xfloat", false };
cfg::_bool spu_approx_xfloat{ this, "Approximate xfloat", true };
cfg::_bool spu_relaxed_xfloat{ this, "Relaxed xfloat", true }; // Approximate accuracy for only the "FCGT" and "FNMS" instructions
cfg::_int<-1, 14> ppu_128_reservations_loop_max_length{ this, "Accurate PPU 128-byte Reservation Op Max Length", 0, true }; // -1: Always accurate, 0: Never accurate, 1-14: max accurate loop length
cfg::_int<-64, 64> stub_ppu_traps{ this, "Stub PPU Traps", 0, true }; // Hack, skip PPU traps for rare cases where the trap is continueable (specify relative instructions to skip)
cfg::_bool full_width_avx512{ this, "Full Width AVX-512", false };

View file

@ -25,6 +25,7 @@ enum class emu_settings_type
AccurateClineStores,
AccurateRSXAccess,
AccurateXFloat,
ApproximateXFloat,
AccuratePPU128Loop,
MFCCommandsShuffling,
NumPPUThreads,
@ -184,6 +185,7 @@ inline static const QMap<emu_settings_type, cfg_location> settings_location =
{ emu_settings_type::AccurateClineStores, { "Core", "Accurate Cache Line Stores"}},
{ emu_settings_type::AccurateRSXAccess, { "Core", "Accurate RSX reservation access"}},
{ emu_settings_type::AccurateXFloat, { "Core", "Accurate xfloat"}},
{ emu_settings_type::ApproximateXFloat, { "Core", "Approximate xfloat"}},
{ emu_settings_type::MFCCommandsShuffling, { "Core", "MFC Commands Shuffling Limit"}},
{ emu_settings_type::SetDAZandFTZ, { "Core", "Set DAZ and FTZ"}},
{ emu_settings_type::SPUBlockSize, { "Core", "SPU Block Size"}},

View file

@ -209,6 +209,9 @@ settings_dialog::settings_dialog(std::shared_ptr<gui_settings> gui_settings, std
m_emu_settings->EnhanceCheckBox(ui->accurateXFloat, emu_settings_type::AccurateXFloat);
SubscribeTooltip(ui->accurateXFloat, tooltips.settings.accurate_xfloat);
m_emu_settings->EnhanceCheckBox(ui->approximateXFloat, emu_settings_type::ApproximateXFloat);
SubscribeTooltip(ui->approximateXFloat, tooltips.settings.approximate_xfloat);
m_emu_settings->EnhanceCheckBox(ui->fullWidthAVX512, emu_settings_type::FullWidthAVX512);
SubscribeTooltip(ui->fullWidthAVX512, tooltips.settings.full_width_avx512);
ui->fullWidthAVX512->setEnabled(utils::has_avx512());

View file

@ -157,6 +157,13 @@
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="approximateXFloat">
<property name="text">
<string>Approximate xfloat</string>
</property>
</widget>
</item>
<item>
<widget class="QCheckBox" name="fullWidthAVX512">
<property name="text">

View file

@ -63,7 +63,8 @@ public:
const QString spu_dynamic = tr("Alternative interpreter (slow). May be faster than static interpreter. Try this if SPU Recompiler (LLVM) doesn't work.");
const QString spu_asmjit = tr("Recompiles the game's SPU code using the ASMJIT Recompiler.\nThis is the fast option with very good compatibility.\nIf unsure, use this option.");
const QString spu_llvm = tr("Recompiles and caches the game's SPU code using the LLVM Recompiler before running which adds extra start-up time.\nThis is the fastest option with very good compatibility.\nIf you experience issues, use the ASMJIT Recompiler.");
const QString accurate_xfloat = tr("Adds extra accuracy to SPU float vectors processing.\nFixes bugs in various games at the cost of performance.\nThis setting is only applied when SPU Decoder is set to Fast or LLVM.");
const QString accurate_xfloat = tr("Adds extra accuracy to SPU float vectors processing.\nFixes bugs in various games at the cost of performance.\nThis setting is only applied when SPU Decoder is set to Dynamic or LLVM.");
const QString approximate_xfloat = tr("Default accuracy for SPU float vectors processing.\nFixes bugs in various games at the cost of performance.\nThis setting is only applied when SPU Decoder is set to Dynamic or LLVM.");
const QString enable_thread_scheduler = tr("Control how RPCS3 utilizes the threads of your system.\nEach option heavily depends on the game and on your CPU. It's recommended to try each option to find out which performs the best.\nChanging the thread scheduler is not supported on CPUs with less than 12 threads.");
const QString spu_loop_detection = tr("Try to detect loop conditions in SPU kernels and use them as scheduling hints.\nImproves performance and reduces CPU usage.\nMay cause severe audio stuttering in rare cases.");
const QString enable_tsx = tr("Enable usage of TSX instructions.\nNeeds to be forced on some Haswell or Broadwell CPUs or CPUs with the TSX-FA instruction set.\nForcing TSX in these cases may lead to system and performance instability, use it with caution.");