diff --git a/rpcs3/Emu/RSX/VK/VKCompute.cpp b/rpcs3/Emu/RSX/VK/VKCompute.cpp index 5a88537ce1..69f85e7c38 100644 --- a/rpcs3/Emu/RSX/VK/VKCompute.cpp +++ b/rpcs3/Emu/RSX/VK/VKCompute.cpp @@ -81,6 +81,7 @@ namespace vk case vk::driver_vendor::DOZEN: // Actual optimal size depends on the D3D device. Use 32 since it should work well on both AMD and NVIDIA case vk::driver_vendor::NVIDIA: + case vk::driver_vendor::NVK: // Warps are multiples of 32. Increasing kernel depth seems to hurt performance (Nier, Big Duck sample) unroll_loops = true; optimal_kernel_size = 1; diff --git a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp index da77ce54aa..aab1406ac5 100644 --- a/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKFragmentProgram.cpp @@ -247,7 +247,7 @@ void VKFragmentDecompilerThread::insertGlobalFunctions(std::stringstream &OS) m_shader_props.emulate_coverage_tests = g_cfg.video.antialiasing_level == msaa_level::none; m_shader_props.emulate_shadow_compare = device_props.emulate_depth_compare; m_shader_props.low_precision_tests = device_props.has_low_precision_rounding && !(m_prog.ctrl & RSX_SHADER_CONTROL_ATTRIBUTE_INTERPOLATION); - m_shader_props.disable_early_discard = vk::get_driver_vendor() != vk::driver_vendor::NVIDIA; + m_shader_props.disable_early_discard = vk::is_NVIDIA(vk::get_driver_vendor()); m_shader_props.supports_native_fp16 = device_props.has_native_half_support; m_shader_props.ROP_output_rounding = g_cfg.video.shader_precision != gpu_preset_level::low; m_shader_props.require_tex1D_ops = properties.has_tex1D; @@ -402,7 +402,7 @@ void VKFragmentProgram::Decompile(const RSXFragmentProgram& prog) } decompiler.device_props.emulate_depth_compare = !pdev->get_formats_support().d24_unorm_s8; - decompiler.device_props.has_low_precision_rounding = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA; + decompiler.device_props.has_low_precision_rounding = vk::is_NVIDIA(vk::get_driver_vendor()); decompiler.Task(); shader.create(::glsl::program_domain::glsl_fragment_program, source); diff --git a/rpcs3/Emu/RSX/VK/VKGSRender.cpp b/rpcs3/Emu/RSX/VK/VKGSRender.cpp index 4968b832e8..8c765abd5c 100644 --- a/rpcs3/Emu/RSX/VK/VKGSRender.cpp +++ b/rpcs3/Emu/RSX/VK/VKGSRender.cpp @@ -743,7 +743,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) // NVIDIA has broken attribute interpolation backend_config.supports_normalized_barycentrics = ( - vk::get_driver_vendor() != vk::driver_vendor::NVIDIA || + !vk::is_NVIDIA(vk::get_driver_vendor()) || !m_device->get_barycoords_support() || g_cfg.video.shader_precision == gpu_preset_level::low); @@ -758,7 +758,7 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) // NOTE: On NVIDIA cards going back decades (including the PS3) there is a slight normalization inaccuracy in compressed formats. // Confirmed in BLES01916 (The Evil Within) which uses RGB565 for some virtual texturing data. - backend_config.supports_hw_renormalization = (vk::get_driver_vendor() == vk::driver_vendor::NVIDIA); + backend_config.supports_hw_renormalization = vk::is_NVIDIA(vk::get_driver_vendor()); // Conditional rendering support // Do not use on MVK due to a speedhack we rely on (streaming results without stopping the current renderpass) @@ -800,6 +800,10 @@ VKGSRender::VKGSRender(utils::serial* ar) noexcept : GSRender(ar) g_cfg.video.vk.asynchronous_scheduler.set(vk_gpu_scheduler_mode::safe); } break; + case vk::driver_vendor::NVK: + // TODO: Verify if this driver crashes or not + rsx_log.warning("NVK behavior with passthrough DMA is unknown. Proceed with caution."); + break; #if !defined(_WIN32) // Anything running on AMDGPU kernel driver will not work due to the check for fd-backed memory allocations case vk::driver_vendor::RADV: @@ -2407,7 +2411,8 @@ void VKGSRender::patch_transform_constants(rsx::context* ctx, u32 index, u32 cou vk::driver_vendor::AMD, vk::driver_vendor::RADV, vk::driver_vendor::LAVAPIPE, - vk::driver_vendor::NVIDIA + vk::driver_vendor::NVIDIA, + vk::driver_vendor::NVK }; const auto driver_vendor = vk::get_driver_vendor(); diff --git a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp index 2faba9d014..390ca0bb39 100644 --- a/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp +++ b/rpcs3/Emu/RSX/VK/VKVertexProgram.cpp @@ -207,8 +207,8 @@ void VKVertexDecompilerThread::insertMainStart(std::stringstream & OS) properties2.require_lit_emulation = properties.has_lit_op; properties2.emulate_zclip_transform = true; properties2.emulate_depth_clip_only = vk::g_render_device->get_shader_types_support().allow_float64; - properties2.low_precision_tests = vk::get_driver_vendor() == vk::driver_vendor::NVIDIA; - properties2.require_explicit_invariance = (vk::get_driver_vendor() == vk::driver_vendor::NVIDIA && g_cfg.video.shader_precision != gpu_preset_level::low); + properties2.low_precision_tests = vk::is_NVIDIA(vk::get_driver_vendor()); + properties2.require_explicit_invariance = (vk::is_NVIDIA(vk::get_driver_vendor()) && g_cfg.video.shader_precision != gpu_preset_level::low); glsl::insert_glsl_legacy_function(OS, properties2); glsl::insert_vertex_input_fetch(OS, glsl::glsl_rules_vulkan); diff --git a/rpcs3/Emu/RSX/VK/vkutils/chip_class.h b/rpcs3/Emu/RSX/VK/vkutils/chip_class.h index 68c78e4eb8..7c33fb911b 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/chip_class.h +++ b/rpcs3/Emu/RSX/VK/vkutils/chip_class.h @@ -9,12 +9,17 @@ namespace vk enum class chip_class { unknown, + + // AMD AMD_gcn_generic, AMD_polaris, AMD_vega, AMD_navi1x, AMD_navi2x, AMD_navi3x, + _AMD_ENUM_MAX_, // Do not insert AMD enums beyond this point + + // NVIDIA NV_generic, NV_kepler, NV_maxwell, @@ -23,9 +28,15 @@ namespace vk NV_turing, NV_ampere, NV_lovelace, + _NV_ENUM_MAX_, // Do not insert NV enums beyond this point + + // APPLE MVK_apple, + + // INTEL INTEL_generic, - INTEL_alchemist + INTEL_alchemist, + _INTEL_ENUM_MAX, // Do not insert INTEL enums beyond this point }; enum class driver_vendor @@ -38,7 +49,8 @@ namespace vk ANV, MVK, DOZEN, - LAVAPIPE + LAVAPIPE, + NVK }; driver_vendor get_driver_vendor(); @@ -57,5 +69,11 @@ namespace vk chip_class get_chip_family(); chip_class get_chip_family(u32 vendor_id, u32 device_id); - static inline bool is_NVIDIA(chip_class chip) { return chip >= chip_class::NV_generic && chip <= chip_class::NV_ampere; } + static inline bool is_NVIDIA(chip_class chip) { return chip >= chip_class::NV_generic && chip < chip_class::_NV_ENUM_MAX_; } + static inline bool is_AMD(chip_class chip) { return chip >= chip_class::AMD_gcn_generic && chip < chip_class::_AMD_ENUM_MAX_; } + static inline bool is_INTEL(chip_class chip) { return chip >= chip_class::INTEL_generic && chip < chip_class::_INTEL_ENUM_MAX; } + + static inline bool is_NVIDIA(driver_vendor vendor) { return vendor == driver_vendor::NVIDIA || vendor == driver_vendor::NVK; } + static inline bool is_AMD(driver_vendor vendor) { return vendor == driver_vendor::AMD || vendor == driver_vendor::RADV; } + static inline bool is_INTEL(driver_vendor vendor) { return vendor == driver_vendor::INTEL || vendor == driver_vendor::ANV; } } diff --git a/rpcs3/Emu/RSX/VK/vkutils/device.cpp b/rpcs3/Emu/RSX/VK/vkutils/device.cpp index 2cc5f25006..339b21015d 100644 --- a/rpcs3/Emu/RSX/VK/vkutils/device.cpp +++ b/rpcs3/Emu/RSX/VK/vkutils/device.cpp @@ -243,6 +243,11 @@ namespace vk if (gpu_name.find("NVIDIA") != umax || gpu_name.find("GeForce") != umax || gpu_name.find("Quadro") != umax) { + if (gpu_name.find("NVK") != umax) + { + return driver_vendor::NVK; + } + return driver_vendor::NVIDIA; } @@ -281,6 +286,8 @@ namespace vk return driver_vendor::DOZEN; case VK_DRIVER_ID_MESA_LLVMPIPE: return driver_vendor::LAVAPIPE; + case VK_DRIVER_ID_MESA_NVK: + return driver_vendor::NVK; default: // Mobile? return driver_vendor::unknown;