From fb88e1c1c933a792f96600206ed03a07cd7c3661 Mon Sep 17 00:00:00 2001 From: Ivan Chikish Date: Sat, 11 Mar 2023 22:08:27 +0300 Subject: [PATCH] Update to LLVM 16.0.0, switch to upstream LLVM --- .ci/build-freebsd.sh | 6 +- .ci/build-linux.sh | 9 +- .ci/build-mac.sh | 6 +- .ci/deploy-mac.sh | 2 +- .ci/get_keys-windows.sh | 2 +- .ci/install-freebsd.sh | 2 +- .ci/setup-windows.sh | 2 +- .cirrus.yml | 2 +- 3rdparty/llvm.cmake | 34 ++--- BUILDING.md | 2 +- CMakeLists.txt | 2 +- Utilities/JIT.cpp | 58 +++++--- buildfiles/msvc/rpcs3_debug.props | 137 ++++++++++++------- buildfiles/msvc/rpcs3_release.props | 137 ++++++++++++------- llvm | 2 +- rpcs3/Emu/CPU/CPUTranslator.cpp | 14 +- rpcs3/Emu/CPU/CPUTranslator.h | 179 +++++++++++++++++++++++- rpcs3/Emu/Cell/PPUThread.cpp | 24 ++-- rpcs3/Emu/Cell/PPUTranslator.cpp | 204 ++++++++++++++++++++++++++-- rpcs3/Emu/Cell/PPUTranslator.h | 2 + rpcs3/Emu/Cell/SPURecompiler.cpp | 135 ++++++++++++++---- rpcs3/Loader/PSF.h | 2 +- rpcs3/util/sysinfo.hpp | 12 -- rpcs3/util/types.hpp | 2 +- 24 files changed, 746 insertions(+), 231 deletions(-) diff --git a/.ci/build-freebsd.sh b/.ci/build-freebsd.sh index 24038f3ba6..1ee97a4f61 100755 --- a/.ci/build-freebsd.sh +++ b/.ci/build-freebsd.sh @@ -7,9 +7,9 @@ git submodule -q update --init --depth 1 $(awk '/path/ && !/llvm/ { print $3 }' # Prefer newer Clang than in base system (see also .ci/install-freebsd.sh) # libc++ isn't in llvm* packages, so download manually -fetch https://github.com/llvm/llvm-project/releases/download/llvmorg-15.0.6/llvm-project-15.0.6.src.tar.xz +fetch https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.0/llvm-project-16.0.0.src.tar.xz tar xf llvm*.tar.xz -export CC=clang15 CXX=clang++15 +export CC=clang16 CXX=clang++16 cmake -B libcxx_build -G Ninja -S llvm*/libcxx \ -DLLVM_CCACHE_BUILD=ON \ -DLIBCXX_INCLUDE_BENCHMARKS=OFF \ @@ -20,7 +20,7 @@ export CXXFLAGS="$CXXFLAGS -nostdinc++ -isystem$PWD/libcxx_prefix/include/c++/v1 export LDFLAGS="$LDFLAGS -nostdlib++ -L$PWD/libcxx_prefix/lib -l:libc++.a -lcxxrt" CONFIGURE_ARGS=" - -DWITH_LLVM=OFF + -DWITH_LLVM=ON -DUSE_SDL=OFF -DUSE_PRECOMPILED_HEADERS=OFF -DUSE_NATIVE_INSTRUCTIONS=OFF diff --git a/.ci/build-linux.sh b/.ci/build-linux.sh index 2ea86dbff3..099fcabaf6 100755 --- a/.ci/build-linux.sh +++ b/.ci/build-linux.sh @@ -9,16 +9,11 @@ if [ -z "$CIRRUS_CI" ]; then cd rpcs3 || exit 1 fi -# Pull all the submodules except llvm, since it is built separately and we just download that build +# Pull all the submodules except llvm # Note: Tried to use git submodule status, but it takes over 20 seconds # shellcheck disable=SC2046 git submodule -q update --init $(awk '/path/ && !/llvm/ { print $3 }' .gitmodules) -# Download pre-compiled llvm libs -curl -sLO https://github.com/RPCS3/llvm-mirror/releases/download/custom-build/llvmlibs-linux.tar.gz -mkdir llvmlibs -tar -xzf ./llvmlibs-linux.tar.gz -C llvmlibs - mkdir build && cd build || exit 1 if [ "$COMPILER" = "gcc" ]; then @@ -42,8 +37,6 @@ export CFLAGS="$CFLAGS -fuse-ld=${LINKER}" cmake .. \ -DCMAKE_INSTALL_PREFIX=/usr \ - -DBUILD_LLVM_SUBMODULE=OFF \ - -DLLVM_DIR=llvmlibs/lib/cmake/llvm/ \ -DUSE_NATIVE_INSTRUCTIONS=OFF \ -DUSE_PRECOMPILED_HEADERS=OFF \ -DCMAKE_C_FLAGS="$CFLAGS" \ diff --git a/.ci/build-mac.sh b/.ci/build-mac.sh index 2a33e99d64..c087380f23 100755 --- a/.ci/build-mac.sh +++ b/.ci/build-mac.sh @@ -1,10 +1,10 @@ #!/bin/sh -ex -brew install -f --overwrite llvm@14 nasm ninja git p7zip create-dmg ccache +brew install -f --overwrite llvm@16 nasm ninja git p7zip create-dmg ccache #/usr/sbin/softwareupdate --install-rosetta --agree-to-license arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" -arch -x86_64 /usr/local/homebrew/bin/brew install -f --overwrite llvm@14 sdl2 glew cmake +arch -x86_64 /usr/local/homebrew/bin/brew install -f --overwrite llvm@16 sdl2 glew cmake #export MACOSX_DEPLOYMENT_TARGET=12.0 export CXX=clang++ @@ -33,7 +33,7 @@ cd .. export Qt5_DIR="$WORKDIR/qt-downloader/5.15.2/clang_64/lib/cmake/Qt5" export SDL2_DIR="$BREW_X64_PATH/opt/sdl2/lib/cmake/SDL2" -export PATH="$BREW_PATH/opt/llvm@14/bin:$WORKDIR/qt-downloader/5.15.2/clang_64/bin:$BREW_BIN:$BREW_SBIN:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/Apple/usr/bin:$PATH" +export PATH="$BREW_PATH/opt/llvm@16/bin:$WORKDIR/qt-downloader/5.15.2/clang_64/bin:$BREW_BIN:$BREW_SBIN:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/Apple/usr/bin:$PATH" export LDFLAGS="-L$BREW_X64_PATH/lib -Wl,-rpath,$BREW_X64_PATH/lib" export CPPFLAGS="-I$BREW_X64_PATH/include -msse -msse2 -mcx16 -no-pie" export LIBRARY_PATH="$BREW_X64_PATH/lib" diff --git a/.ci/deploy-mac.sh b/.ci/deploy-mac.sh index bec1b765c0..c312d2de06 100755 --- a/.ci/deploy-mac.sh +++ b/.ci/deploy-mac.sh @@ -15,7 +15,7 @@ echo "AVVER=$AVVER" >> ../.ci/ci-vars.env cd bin mkdir "rpcs3.app/Contents/lib/" -cp "/usr/local/Homebrew/opt/llvm@14/lib/c++/libc++abi.1.0.dylib" "rpcs3.app/Contents/lib/libc++abi.1.dylib" +cp "/usr/local/Homebrew/opt/llvm@16/lib/c++/libc++abi.1.0.dylib" "rpcs3.app/Contents/lib/libc++abi.1.dylib" rm -rf "rpcs3.app/Contents/Frameworks/QtPdf.framework" \ "rpcs3.app/Contents/Frameworks/QtQml.framework" \ diff --git a/.ci/get_keys-windows.sh b/.ci/get_keys-windows.sh index 1f6efc379c..bdc0bad98a 100644 --- a/.ci/get_keys-windows.sh +++ b/.ci/get_keys-windows.sh @@ -1,4 +1,4 @@ #!/bin/sh -ex -curl -L -o "./llvm.lock" "https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z.sha256" +curl -L -o "./llvm.lock" "https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z.sha256" curl -L -o "./glslang.lock" "https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z.sha256" diff --git a/.ci/install-freebsd.sh b/.ci/install-freebsd.sh index 2f67d16106..2a63e5ddfc 100755 --- a/.ci/install-freebsd.sh +++ b/.ci/install-freebsd.sh @@ -9,7 +9,7 @@ export ASSUME_ALWAYS_YES=true pkg info # debug # Prefer newer Clang than in base system (see also .ci/build-freebsd.sh) -pkg install llvm15 +pkg install llvm16 # Mandatory dependencies (qt5-dbus and qt5-gui are pulled via qt5-widgets) pkg install git ccache cmake ninja qt5-qmake qt5-buildtools qt5-widgets qt5-concurrent qt5-multimedia qt5-svg glew openal-soft ffmpeg diff --git a/.ci/setup-windows.sh b/.ci/setup-windows.sh index ee6cd096d0..b127f0d30a 100755 --- a/.ci/setup-windows.sh +++ b/.ci/setup-windows.sh @@ -19,7 +19,7 @@ QT_DECL_URL="${QT_HOST}${QT_PREFIX}qtdeclarative${QT_SUFFIX}" QT_TOOL_URL="${QT_HOST}${QT_PREFIX}qttools${QT_SUFFIX}" QT_MM_URL="${QT_HOST}${QT_PREFIX}qtmultimedia${QT_SUFFIX}" QT_SVG_URL="${QT_HOST}${QT_PREFIX}qtsvg${QT_SUFFIX}" -LLVMLIBS_URL='https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z' +LLVMLIBS_URL='https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z' GLSLANG_URL='https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z' VULKAN_SDK_URL="https://www.dropbox.com/s/cs77c3iv5mbo0bt/VulkanSDK-${VULKAN_VER}-Installer.exe" diff --git a/.cirrus.yml b/.cirrus.yml index 596db3f445..0fbfebf180 100644 --- a/.cirrus.yml +++ b/.cirrus.yml @@ -61,7 +61,7 @@ windows_task: linux_task: container: - image: rpcs3/rpcs3-ci-bionic:1.6 + image: rpcs3/rpcs3-ci-bionic:1.7 cpu: 4 memory: 16G env: diff --git a/3rdparty/llvm.cmake b/3rdparty/llvm.cmake index 56f9c50d2e..9947dab1ef 100644 --- a/3rdparty/llvm.cmake +++ b/3rdparty/llvm.cmake @@ -2,7 +2,7 @@ if(WITH_LLVM) CHECK_CXX_COMPILER_FLAG("-msse -msse2 -mcx16" COMPILER_X86) CHECK_CXX_COMPILER_FLAG("-march=armv8-a+lse" COMPILER_ARM) - if(BUILD_LLVM_SUBMODULE) + if(BUILD_LLVM) message(STATUS "LLVM will be built from the submodule.") set(LLVM_TARGETS_TO_BUILD "AArch64;X86") @@ -38,49 +38,33 @@ if(WITH_LLVM) set(CMAKE_CXX_FLAGS ${CXX_FLAGS_OLD}) # now tries to find LLVM again - find_package(LLVM 13.0 CONFIG) + find_package(LLVM 16.0 CONFIG) if(NOT LLVM_FOUND) message(FATAL_ERROR "Couldn't build LLVM from the submodule. You might need to run `git submodule update --init`") endif() else() - message(STATUS "Using prebuilt LLVM") + message(STATUS "Using prebuilt or system LLVM") if (LLVM_DIR AND NOT IS_ABSOLUTE "${LLVM_DIR}") # change relative LLVM_DIR to be relative to the source dir set(LLVM_DIR ${CMAKE_SOURCE_DIR}/${LLVM_DIR}) endif() - find_package(LLVM 13.0 CONFIG) + find_package(LLVM 16.0 CONFIG) if (NOT LLVM_FOUND) - if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 11) - message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 11.0. \ - Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.") + if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 16) + message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 16. \ + Enable BUILD_LLVM option to build LLVM from included as a git submodule.") endif() message(FATAL_ERROR "Can't find LLVM libraries from the CMAKE_PREFIX_PATH path or LLVM_DIR. \ - Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.") + Enable BUILD_LLVM option to build LLVM from included as a git submodule.") endif() endif() - set(LLVM_LIBS LLVMMCJIT) - - if(COMPILER_X86) - set(LLVM_LIBS ${LLVM_LIBS} LLVMX86CodeGen LLVMX86AsmParser) - endif() - - if(COMPILER_ARM) - set(LLVM_LIBS ${LLVM_LIBS} LLVMX86CodeGen LLVMX86AsmParser LLVMAArch64CodeGen LLVMAArch64AsmParser) - endif() - - if(WIN32 OR CMAKE_SYSTEM MATCHES "Linux") - set(LLVM_LIBS ${LLVM_LIBS} LLVMIntelJITEvents) - endif() - - if(CMAKE_SYSTEM MATCHES "Linux") - set(LLVM_LIBS ${LLVM_LIBS} LLVMPerfJITEvents) - endif() + set(LLVM_LIBS LLVM) add_library(3rdparty_llvm INTERFACE) target_link_libraries(3rdparty_llvm INTERFACE ${LLVM_LIBS}) diff --git a/BUILDING.md b/BUILDING.md index e33f68ac8c..02da997d01 100644 --- a/BUILDING.md +++ b/BUILDING.md @@ -111,7 +111,7 @@ git submodule update --init Open `rpcs3.sln`. The recommended build configuration is `Release`. (On older revisions: `Release - LLVM`) -You may want to download the precompiled [LLVM libs](https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z) and extract them to the root rpcs3 folder (which contains `rpcs3.sln`), as well as download and extract the [additional libs](https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z) to `lib\%CONFIGURATION%-x64\` to speed up compilation time (unoptimised/debug libs are currently not available precompiled). +You may want to download the precompiled [LLVM libs](https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z) and extract them to the root rpcs3 folder (which contains `rpcs3.sln`), as well as download and extract the [additional libs](https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z) to `lib\%CONFIGURATION%-x64\` to speed up compilation time (unoptimised/debug libs are currently not available precompiled). If you're not using the precompiled libs, build the following projects in *__BUILD_BEFORE* folder by right-clicking on a project > *Build*.: * glslang diff --git a/CMakeLists.txt b/CMakeLists.txt index ca8cc08c1f..f9f20d2b4d 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -14,7 +14,7 @@ endif() option(USE_NATIVE_INSTRUCTIONS "USE_NATIVE_INSTRUCTIONS makes rpcs3 compile with -march=native, which is useful for local builds, but not good for packages." ON) option(WITH_LLVM "Enable usage of LLVM library" ON) -option(BUILD_LLVM_SUBMODULE "Build LLVM from git submodule" ON) +option(BUILD_LLVM "Build LLVM from git submodule" OFF) option(USE_FAUDIO "FAudio audio backend" ON) option(USE_LIBEVDEV "libevdev-based joystick support" ON) option(USE_DISCORD_RPC "Discord rich presence integration" OFF) diff --git a/Utilities/JIT.cpp b/Utilities/JIT.cpp index d4e34d1a7f..2c203813a6 100644 --- a/Utilities/JIT.cpp +++ b/Utilities/JIT.cpp @@ -198,6 +198,9 @@ static u8* add_jit_memory(usz size, uint align) }); } + ensure(pointer + pos >= get_jit_memory() + Off); + ensure(pointer + pos < get_jit_memory() + Off + 0x40000000); + return pointer + pos; } @@ -1319,7 +1322,10 @@ std::string jit_compiler::cpu(const std::string& _cpu) m_cpu == "icelake-client" || m_cpu == "icelake-server" || m_cpu == "tigerlake" || - m_cpu == "rocketlake") + m_cpu == "rocketlake" || + m_cpu == "alderlake" || + m_cpu == "raptorlake" || + m_cpu == "meteorlake") { // Downgrade if AVX is not supported by some chips if (!utils::has_avx()) @@ -1350,6 +1356,18 @@ std::string jit_compiler::cpu(const std::string& _cpu) // Upgrade m_cpu = "znver2"; } + + if ((m_cpu == "znver3" || m_cpu == "goldmont" || m_cpu == "alderlake" || m_cpu == "raptorlake" || m_cpu == "meteorlake") && utils::has_avx512_icl()) + { + // Upgrade + m_cpu = "icelake-client"; + } + + if (m_cpu == "goldmont" && utils::has_avx2()) + { + // Upgrade + m_cpu = "alderlake"; + } } return m_cpu; @@ -1362,15 +1380,13 @@ jit_compiler::jit_compiler(const std::unordered_map& _link, co std::string result; auto null_mod = std::make_unique ("null_", *m_context); -#if defined(__APPLE__) && defined(ARCH_ARM64) - // Force override triple on Apple arm64 or we'll get linking errors. - null_mod->setTargetTriple(llvm::Triple::normalize(utils::c_llvm_default_triple)); -#endif + null_mod->setTargetTriple(llvm::Triple::normalize(llvm::sys::getProcessTriple())); + + std::unique_ptr mem; if (_link.empty()) { - std::unique_ptr mem; - + // Auxiliary JIT (does not use custom memory manager, only writes the objects) if (flags & 0x1) { mem = std::make_unique(); @@ -1378,31 +1394,33 @@ jit_compiler::jit_compiler(const std::unordered_map& _link, co else { mem = std::make_unique(); - null_mod->setTargetTriple(llvm::Triple::normalize(utils::c_llvm_default_triple)); +#if defined(_WIN32) && defined(ARCH_X64) + null_mod->setTargetTriple(llvm::Triple::normalize("x86_64-unknown-linux-gnu")); +#endif } + } + else + { + mem = std::make_unique(); + } - // Auxiliary JIT (does not use custom memory manager, only writes the objects) + { m_engine.reset(llvm::EngineBuilder(std::move(null_mod)) .setErrorStr(&result) .setEngineKind(llvm::EngineKind::JIT) .setMCJITMemoryManager(std::move(mem)) .setOptLevel(llvm::CodeGenOpt::Aggressive) .setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small) +#ifdef __APPLE__ + .setCodeModel(llvm::CodeModel::Large) +#endif + .setRelocationModel(llvm::Reloc::Model::PIC_) .setMCPU(m_cpu) .create()); } - else - { - // Primary JIT - m_engine.reset(llvm::EngineBuilder(std::move(null_mod)) - .setErrorStr(&result) - .setEngineKind(llvm::EngineKind::JIT) - .setMCJITMemoryManager(std::make_unique()) - .setOptLevel(llvm::CodeGenOpt::Aggressive) - .setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small) - .setMCPU(m_cpu) - .create()); + if (!_link.empty()) + { for (auto&& [name, addr] : _link) { m_engine->updateGlobalMapping(name, addr); diff --git a/buildfiles/msvc/rpcs3_debug.props b/buildfiles/msvc/rpcs3_debug.props index d35ce58c59..fe719908e2 100644 --- a/buildfiles/msvc/rpcs3_debug.props +++ b/buildfiles/msvc/rpcs3_debug.props @@ -16,54 +16,97 @@ %(AdditionalLibraryDirectories);..\llvm_build\Debug\lib %(AdditionalDependencies); - LLVMProfileData.lib; - LLVMDebugInfoCodeView.lib; - LLVMDebugInfoMSF.lib; - LLVMInstrumentation.lib; - LLVMMCJIT.lib; - LLVMRuntimeDyld.lib; - LLVMVectorize.lib; - LLVMX86CodeGen.lib; - LLVMGlobalISel.lib; - LLVMX86Disassembler.lib; - LLVMExecutionEngine.lib; - LLVMAsmPrinter.lib; - LLVMSelectionDAG.lib; - LLVMCodeGen.lib; - LLVMScalarOpts.lib; - LLVMInstCombine.lib; - LLVMTransformUtils.lib; - LLVMAnalysis.lib; - LLVMTarget.lib; - LLVMX86Desc.lib; - LLVMObject.lib; - LLVMMCParser.lib; - LLVMBitReader.lib; - LLVMCore.lib; - LLVMMC.lib; - LLVMX86Info.lib; - LLVMSupport.lib; - LLVMMCDisassembler.lib; - LLVMipo.lib; - LLVMBinaryFormat.lib; - LLVMPasses.lib; - LLVMIRReader.lib; - LLVMLinker.lib; - LLVMAsmParser.lib; - LLVMX86AsmParser.lib; - LLVMDemangle.lib; - LLVMDebugInfoDWARF.lib; - LLVMRemarks.lib; - LLVMBitstreamReader.lib; - LLVMTextAPI.lib; - LLVMCFGuard.lib; - LLVMAggressiveInstCombine.lib; - LLVMBitWriter.lib; - LLVMCoroutines.lib; - LLVMObjCARCOpts.lib; - LLVMIntelJITEvents.lib; + LLVMAggressiveInstCombine.lib; + LLVMAnalysis.lib; + LLVMAsmParser.lib; + LLVMAsmPrinter.lib; + LLVMBinaryFormat.lib; + LLVMBitReader.lib; + LLVMBitstreamReader.lib; + LLVMBitWriter.lib; + LLVMCFGuard.lib; + LLVMCFIVerify.lib; + LLVMCodeGen.lib; + LLVMCore.lib; + LLVMCoroutines.lib; + LLVMCoverage.lib; + LLVMDebugInfoCodeView.lib; + LLVMDebuginfod.lib; + LLVMDebugInfoDWARF.lib; + LLVMDebugInfoGSYM.lib; + LLVMDebugInfoLogicalView.lib; + LLVMDebugInfoMSF.lib; + LLVMDebugInfoPDB.lib; + LLVMDemangle.lib; + LLVMDiff.lib; + LLVMDlltoolDriver.lib; + LLVMDWARFLinker.lib; + LLVMDWARFLinkerParallel.lib; + LLVMDWP.lib; + LLVMExecutionEngine.lib; + LLVMExegesis.lib; + LLVMExegesisX86.lib; + LLVMExtensions.lib; + LLVMFileCheck.lib; + LLVMFrontendHLSL.lib; + LLVMFrontendOpenACC.lib; + LLVMFrontendOpenMP.lib; + LLVMFuzzerCLI.lib; + LLVMFuzzMutate.lib; + LLVMGlobalISel.lib; + LLVMInstCombine.lib; + LLVMInstrumentation.lib; + LLVMIntelJITEvents.lib; + LLVMInterfaceStub.lib; + LLVMInterpreter.lib; + LLVMipo.lib; + LLVMIRPrinter.lib; + LLVMIRReader.lib; + LLVMJITLink.lib; + LLVMLibDriver.lib; + LLVMLineEditor.lib; + LLVMLinker.lib; + LLVMLTO.lib; + LLVMMCA.lib; + LLVMMCDisassembler.lib; + LLVMMCJIT.lib; + LLVMMC.lib; + LLVMMCParser.lib; + LLVMMIRParser.lib; + LLVMObjCARCOpts.lib; + LLVMObjCopy.lib; + LLVMObject.lib; + LLVMObjectYAML.lib; + LLVMOption.lib; + LLVMOrcJIT.lib; + LLVMOrcShared.lib; + LLVMOrcTargetProcess.lib; + LLVMPasses.lib; + LLVMProfileData.lib; + LLVMRemarks.lib; + LLVMRuntimeDyld.lib; + LLVMScalarOpts.lib; + LLVMSelectionDAG.lib; + LLVMSupport.lib; + LLVMSymbolize.lib; + LLVMTableGenGlobalISel.lib; + LLVMTableGen.lib; + LLVMTarget.lib; + LLVMTargetParser.lib; + LLVMTextAPI.lib; + LLVMTransformUtils.lib; + LLVMVectorize.lib; + LLVMWindowsDriver.lib; + LLVMWindowsManifest.lib; + LLVMX86AsmParser.lib; + LLVMX86CodeGen.lib; + LLVMX86Desc.lib; + LLVMX86Disassembler.lib; + LLVMX86Info.lib; + LLVMX86TargetMCA.lib; + LLVMXRay.lib; - \ No newline at end of file + diff --git a/buildfiles/msvc/rpcs3_release.props b/buildfiles/msvc/rpcs3_release.props index 038eb81d82..e7c907c075 100644 --- a/buildfiles/msvc/rpcs3_release.props +++ b/buildfiles/msvc/rpcs3_release.props @@ -17,54 +17,97 @@ true %(AdditionalLibraryDirectories);..\llvm_build\Release\lib %(AdditionalDependencies); - LLVMProfileData.lib; - LLVMDebugInfoCodeView.lib; - LLVMDebugInfoMSF.lib; - LLVMInstrumentation.lib; - LLVMMCJIT.lib; - LLVMRuntimeDyld.lib; - LLVMVectorize.lib; - LLVMX86CodeGen.lib; - LLVMGlobalISel.lib; - LLVMX86Disassembler.lib; - LLVMExecutionEngine.lib; - LLVMAsmPrinter.lib; - LLVMSelectionDAG.lib; - LLVMCodeGen.lib; - LLVMScalarOpts.lib; - LLVMInstCombine.lib; - LLVMTransformUtils.lib; - LLVMAnalysis.lib; - LLVMTarget.lib; - LLVMX86Desc.lib; - LLVMObject.lib; - LLVMMCParser.lib; - LLVMBitReader.lib; - LLVMCore.lib; - LLVMMC.lib; - LLVMX86Info.lib; - LLVMSupport.lib; - LLVMMCDisassembler.lib; - LLVMipo.lib; - LLVMBinaryFormat.lib; - LLVMPasses.lib; - LLVMIRReader.lib; - LLVMLinker.lib; - LLVMAsmParser.lib; - LLVMX86AsmParser.lib; - LLVMDemangle.lib; - LLVMDebugInfoDWARF.lib; - LLVMRemarks.lib; - LLVMBitstreamReader.lib; - LLVMTextAPI.lib; - LLVMCFGuard.lib; - LLVMAggressiveInstCombine.lib; - LLVMBitWriter.lib; - LLVMCoroutines.lib; - LLVMObjCARCOpts.lib; - LLVMIntelJITEvents.lib; + LLVMAggressiveInstCombine.lib; + LLVMAnalysis.lib; + LLVMAsmParser.lib; + LLVMAsmPrinter.lib; + LLVMBinaryFormat.lib; + LLVMBitReader.lib; + LLVMBitstreamReader.lib; + LLVMBitWriter.lib; + LLVMCFGuard.lib; + LLVMCFIVerify.lib; + LLVMCodeGen.lib; + LLVMCore.lib; + LLVMCoroutines.lib; + LLVMCoverage.lib; + LLVMDebugInfoCodeView.lib; + LLVMDebuginfod.lib; + LLVMDebugInfoDWARF.lib; + LLVMDebugInfoGSYM.lib; + LLVMDebugInfoLogicalView.lib; + LLVMDebugInfoMSF.lib; + LLVMDebugInfoPDB.lib; + LLVMDemangle.lib; + LLVMDiff.lib; + LLVMDlltoolDriver.lib; + LLVMDWARFLinker.lib; + LLVMDWARFLinkerParallel.lib; + LLVMDWP.lib; + LLVMExecutionEngine.lib; + LLVMExegesis.lib; + LLVMExegesisX86.lib; + LLVMExtensions.lib; + LLVMFileCheck.lib; + LLVMFrontendHLSL.lib; + LLVMFrontendOpenACC.lib; + LLVMFrontendOpenMP.lib; + LLVMFuzzerCLI.lib; + LLVMFuzzMutate.lib; + LLVMGlobalISel.lib; + LLVMInstCombine.lib; + LLVMInstrumentation.lib; + LLVMIntelJITEvents.lib; + LLVMInterfaceStub.lib; + LLVMInterpreter.lib; + LLVMipo.lib; + LLVMIRPrinter.lib; + LLVMIRReader.lib; + LLVMJITLink.lib; + LLVMLibDriver.lib; + LLVMLineEditor.lib; + LLVMLinker.lib; + LLVMLTO.lib; + LLVMMCA.lib; + LLVMMCDisassembler.lib; + LLVMMCJIT.lib; + LLVMMC.lib; + LLVMMCParser.lib; + LLVMMIRParser.lib; + LLVMObjCARCOpts.lib; + LLVMObjCopy.lib; + LLVMObject.lib; + LLVMObjectYAML.lib; + LLVMOption.lib; + LLVMOrcJIT.lib; + LLVMOrcShared.lib; + LLVMOrcTargetProcess.lib; + LLVMPasses.lib; + LLVMProfileData.lib; + LLVMRemarks.lib; + LLVMRuntimeDyld.lib; + LLVMScalarOpts.lib; + LLVMSelectionDAG.lib; + LLVMSupport.lib; + LLVMSymbolize.lib; + LLVMTableGenGlobalISel.lib; + LLVMTableGen.lib; + LLVMTarget.lib; + LLVMTargetParser.lib; + LLVMTextAPI.lib; + LLVMTransformUtils.lib; + LLVMVectorize.lib; + LLVMWindowsDriver.lib; + LLVMWindowsManifest.lib; + LLVMX86AsmParser.lib; + LLVMX86CodeGen.lib; + LLVMX86Desc.lib; + LLVMX86Disassembler.lib; + LLVMX86Info.lib; + LLVMX86TargetMCA.lib; + LLVMXRay.lib; - \ No newline at end of file + diff --git a/llvm b/llvm index 9b52b6c39a..89d5468e95 160000 --- a/llvm +++ b/llvm @@ -1 +1 @@ -Subproject commit 9b52b6c39ae9f0759fbce7dd0db4b3290d6ebc56 +Subproject commit 89d5468e9505ddb04754eadbfed526f5b6ad4cbd diff --git a/rpcs3/Emu/CPU/CPUTranslator.cpp b/rpcs3/Emu/CPU/CPUTranslator.cpp index 05fcbe9769..28969827b5 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.cpp +++ b/rpcs3/Emu/CPU/CPUTranslator.cpp @@ -75,6 +75,14 @@ cpu_translator::cpu_translator(llvm::Module* _module, bool is_be) return result; } }); + + register_intrinsic("any_select_by_bit4", [&](llvm::CallInst* ci) -> llvm::Value* + { + const auto s = bitcast(m_ir->CreateShl(bitcast(ci->getOperand(0)), 3));; + const auto a = bitcast(ci->getOperand(1)); + const auto b = bitcast(ci->getOperand(2)); + return m_ir->CreateSelect(m_ir->CreateICmpSLT(s, llvm::ConstantAggregateZero::get(get_type())), b, a); + }); } void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngine& engine) @@ -112,6 +120,8 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin cpu == "broadwell" || cpu == "skylake" || cpu == "alderlake" || + cpu == "raptorlake" || + cpu == "meteorlake" || cpu == "bdver2" || cpu == "bdver3" || cpu == "bdver4" || @@ -135,7 +145,9 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin // Test VNNI feature (TODO) if (cpu == "cascadelake" || cpu == "cooperlake" || - cpu == "alderlake") + cpu == "alderlake" || + cpu == "raptorlake" || + cpu == "meteorlake") { m_use_vnni = true; } diff --git a/rpcs3/Emu/CPU/CPUTranslator.h b/rpcs3/Emu/CPU/CPUTranslator.h index a6a93cd7bf..72ce4ce01f 100644 --- a/rpcs3/Emu/CPU/CPUTranslator.h +++ b/rpcs3/Emu/CPU/CPUTranslator.h @@ -19,7 +19,9 @@ #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Module.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Support/KnownBits.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/IntrinsicsAArch64.h" @@ -59,6 +61,62 @@ concept DSLValue = requires (T& v) { v.eval(std::declval*>()) } -> LLVMValue; }; +template +struct get_int_bits +{ +}; + +template <> +struct get_int_bits<1> +{ + using utype = bool; +}; + +template <> +struct get_int_bits<2> +{ + using utype = i2; +}; + +template <> +struct get_int_bits<4> +{ + using utype = i4; +}; + +template <> +struct get_int_bits<8> +{ + using utype = u8; +}; + +template <> +struct get_int_bits<16> +{ + using utype = u16; +}; + +template <> +struct get_int_bits<32> +{ + using utype = u32; +}; + +template <> +struct get_int_bits<64> +{ + using utype = u64; +}; + +template <> +struct get_int_bits<128> +{ + using utype = u128; +}; + +template +using get_int_vt = typename get_int_bits::utype; + template struct llvm_value_t { @@ -3292,10 +3350,41 @@ public: // Infinite-precision shift left template > - static auto inf_shl(T&& a, U&& b) + auto inf_shl(T&& a, U&& b) { static constexpr u32 esz = llvm_value_t::esize; + if constexpr (esz == 32) + { +#if defined(ARCH_X64) + if (m_use_fma && !llvm::isa(b.eval(m_ir))) + return eval(llvm_calli{"llvm.x86.avx2.psllv.d", {std::forward(a), std::forward(b)}}); +#endif + } + + if constexpr (esz == 16) + { +#if defined(ARCH_X64) + if (m_use_avx512 && !llvm::isa(b.eval(m_ir))) + return eval(llvm_calli{"llvm.x86.avx512.psllv.w.128", {std::forward(a), std::forward(b)}}); + + if (m_use_fma && !llvm::isa(b.eval(m_ir))) + { + using t32 = value_t; + auto a32 = eval(bitcast(std::forward(a))); + auto b32 = eval(bitcast(std::forward(b))); + auto sizeL = eval(b32 & 0xffff); + auto sizeH = eval(b32 >> 16); + auto dataL = eval(llvm_calli{"llvm.x86.avx2.psllv.d", {a32, sizeL}}); + auto dataH = eval(llvm_calli{"llvm.x86.avx2.psllv.d", {eval(a32 & 0xffff0000), sizeH}}); + return eval(bitcast((dataL & 0xffff) | dataH)); + } +#endif + } + + return eval(select(b < esz, a << b, splat(0))); + + /* return expr(select(b < esz, a << b, splat(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple { static const auto M = match(); @@ -3314,14 +3403,46 @@ public: value = nullptr; return {}; }); + */ } // Infinite-precision logical shift right (unsigned) template > - static auto inf_lshr(T&& a, U&& b) + auto inf_lshr(T&& a, U&& b) { static constexpr u32 esz = llvm_value_t::esize; + if constexpr (esz == 32) + { +#if defined(ARCH_X64) + if (m_use_fma && !llvm::isa(b.eval(m_ir))) + return eval(llvm_calli{"llvm.x86.avx2.psrlv.d", {std::forward(a), std::forward(b)}}); +#endif + } + + if constexpr (esz == 16) + { +#if defined(ARCH_X64) + if (m_use_avx512 && !llvm::isa(b.eval(m_ir))) + return eval(llvm_calli{"llvm.x86.avx512.psrlv.w.128", {std::forward(a), std::forward(b)}}); + + if (m_use_fma && !llvm::isa(b.eval(m_ir))) + { + using t32 = value_t; + auto a32 = eval(bitcast(std::forward(a))); + auto b32 = eval(bitcast(std::forward(b))); + auto sizeL = eval(b32 & 0xffff); + auto sizeH = eval(b32 >> 16); + auto dataL = eval(llvm_calli{"llvm.x86.avx2.psrlv.d", {eval(a32 & 0xffff), sizeL}}); + auto dataH = eval(llvm_calli{"llvm.x86.avx2.psrlv.d", {a32, sizeH}}); + return eval(bitcast(dataL | (dataH & 0xffff0000))); + } +#endif + } + + return eval(select(b < esz, a >> b, splat(0))); + + /* return expr(select(b < esz, a >> b, splat(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple { static const auto M = match(); @@ -3340,14 +3461,46 @@ public: value = nullptr; return {}; }); + */ } // Infinite-precision arithmetic shift right (signed) template > - static auto inf_ashr(T&& a, U&& b) + auto inf_ashr(T&& a, U&& b) { static constexpr u32 esz = llvm_value_t::esize; + if constexpr (esz == 32) + { +#if defined(ARCH_X64) + if (m_use_fma && !llvm::isa(b.eval(m_ir))) + return eval(llvm_calli{"llvm.x86.avx2.psrav.d", {std::forward(a), std::forward(b)}}); +#endif + } + + if constexpr (esz == 16) + { +#if defined(ARCH_X64) + if (m_use_avx512 && !llvm::isa(b.eval(m_ir))) + return eval(llvm_calli{"llvm.x86.avx512.psrav.w.128", {std::forward(a), std::forward(b)}}); + + if (m_use_fma && !llvm::isa(b.eval(m_ir))) + { + using t32 = value_t; + auto a32 = eval(bitcast(std::forward(a))); + auto b32 = eval(bitcast(std::forward(b))); + auto sizeL = eval(b32 & 0xffff); + auto sizeH = eval(b32 >> 16); + auto dataL = eval(llvm_calli{"llvm.x86.avx2.psrav.d", {eval(a32 << 16), sizeL}}); + auto dataH = eval(llvm_calli{"llvm.x86.avx2.psrav.d", {a32, sizeH}}); + return eval(bitcast((dataL >> 16) | (dataH & 0xffff0000))); + } +#endif + } + + return eval(a >> select(b > (esz - 1), splat(esz - 1), b)); + + /* return expr(a >> select(b > (esz - 1), splat(esz - 1), b), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple { static const auto M = match(); @@ -3366,6 +3519,7 @@ public: value = nullptr; return {}; }); + */ } template @@ -3567,6 +3721,18 @@ public: template llvm::Constant* make_const_vector(T, llvm::Type*, u32 = __builtin_LINE()); + template + llvm::KnownBits get_known_bits(T a) + { + return llvm::computeKnownBits(a.eval(m_ir), m_module->getDataLayout()); + } + + template + llvm::KnownBits kbc(T value) + { + return llvm::KnownBits::makeConstant(llvm::APInt(sizeof(T) * 8, u64(value))); + } + private: // Custom intrinsic table std::unordered_map> m_intrinsics; @@ -3647,6 +3813,13 @@ public: }); } + // (m << 3) >= 0 ? a : b + template + static auto select_by_bit4(T&& m, U&& a, V&& b) + { + return llvm_calli{"any_select_by_bit4", {std::forward(m), std::forward(a), std::forward(b)}}; + } + template , f32[4]>>> static auto fre(T&& a) { diff --git a/rpcs3/Emu/Cell/PPUThread.cpp b/rpcs3/Emu/Cell/PPUThread.cpp index 869ea0b430..c3141d6963 100644 --- a/rpcs3/Emu/Cell/PPUThread.cpp +++ b/rpcs3/Emu/Cell/PPUThread.cpp @@ -2130,10 +2130,10 @@ static void ppu_check(ppu_thread& ppu, u64 addr) { ppu.cia = ::narrow(addr); + // ppu_check() shall not return directly if (ppu.test_stopped()) - { - return; - } + ; + ppu_escape(&ppu); } static void ppu_trace(u64 addr) @@ -3368,13 +3368,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only) { std::unordered_map link_table { - { "sys_game_watchdog_start", reinterpret_cast(ppu_execute_syscall) }, - { "sys_game_watchdog_stop", reinterpret_cast(ppu_execute_syscall) }, - { "sys_game_watchdog_clear", reinterpret_cast(ppu_execute_syscall) }, - { "sys_game_get_system_sw_version", reinterpret_cast(ppu_execute_syscall) }, - { "sys_game_board_storage_read", reinterpret_cast(ppu_execute_syscall) }, - { "sys_game_board_storage_write", reinterpret_cast(ppu_execute_syscall) }, - { "sys_game_get_rtc_status", reinterpret_cast(ppu_execute_syscall) }, { "__trap", reinterpret_cast(&ppu_trap) }, { "__error", reinterpret_cast(&ppu_error) }, { "__check", reinterpret_cast(&ppu_check) }, @@ -3388,6 +3381,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only) { "__dcbz", reinterpret_cast(+[](u32 addr){ alignas(64) static constexpr u8 z[128]{}; do_cell_atomic_128_store(addr, z); }) }, { "__resupdate", reinterpret_cast(vm::reservation_update) }, { "__resinterp", reinterpret_cast(ppu_reservation_fallback) }, + { "__escape", reinterpret_cast(+ppu_escape) }, }; for (u64 index = 0; index < 1024; index++) @@ -3943,12 +3937,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co std::unique_ptr _module = std::make_unique(obj_name, jit.get_context()); // Initialize target -#if defined(__APPLE__) && defined(ARCH_ARM64) - // Force target linux on macOS arm64 to bypass some 64-bit address space linking issues - _module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple)); -#else _module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); -#endif _module->setDataLayout(jit.get_engine().getTargetMachine()->createDataLayout()); // Initialize translator @@ -3978,6 +3967,11 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co } { + if (g_cfg.core.ppu_debug) + { + translator.build_interpreter(); + } + legacy::FunctionPassManager pm(_module.get()); // Basic optimizations diff --git a/rpcs3/Emu/Cell/PPUTranslator.cpp b/rpcs3/Emu/Cell/PPUTranslator.cpp index 06d76614e3..bba48ba2c4 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.cpp +++ b/rpcs3/Emu/Cell/PPUTranslator.cpp @@ -200,7 +200,7 @@ Function* PPUTranslator::Translate(const ppu_function& info) // Create tail call to the check function m_ir->SetInsertPoint(vcheck); - Call(GetType(), "__check", m_thread, GetAddr()); + Call(GetType(), "__check", m_thread, GetAddr())->setTailCall(); m_ir->CreateRetVoid(); } else @@ -604,12 +604,12 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align) { // Read, byteswap, bitcast const auto int_type = m_ir->getIntNTy(size); - const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr, int_type), llvm::MaybeAlign{align}, true); + const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr, int_type), llvm::MaybeAlign{align}); return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type); } // Read normally - return m_ir->CreateAlignedLoad(type, GetMemory(addr, type), llvm::MaybeAlign{align}, true); + return m_ir->CreateAlignedLoad(type, GetMemory(addr, type), llvm::MaybeAlign{align}); } void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align) @@ -625,7 +625,7 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align } // Write - m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), llvm::MaybeAlign{align}, true); + m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), llvm::MaybeAlign{align}); } void PPUTranslator::CompilationError(const std::string& error) @@ -1945,12 +1945,14 @@ void PPUTranslator::SC(ppu_opcode_t op) if (index < 1024) { Call(GetType(), fmt::format("%s", ppu_syscall_code(index)), m_thread); + //Call(GetType(), "__escape", m_thread)->setTailCall(); m_ir->CreateRetVoid(); return; } } Call(GetType(), op.lev ? "__lv1call" : "__syscall", m_thread, num); + //Call(GetType(), "__escape", m_thread)->setTailCall(); m_ir->CreateRetVoid(); } @@ -2507,6 +2509,7 @@ void PPUTranslator::LWARX(ppu_opcode_t op) RegStore(Trunc(GetAddr()), m_cia); FlushRegisters(); Call(GetType(), "__resinterp", m_thread); + //Call(GetType(), "__escape", m_thread)->setTailCall(); m_ir->CreateRetVoid(); return; } @@ -2649,6 +2652,7 @@ void PPUTranslator::LDARX(ppu_opcode_t op) RegStore(Trunc(GetAddr()), m_cia); FlushRegisters(); Call(GetType(), "__resinterp", m_thread); + //Call(GetType(), "__escape", m_thread)->setTailCall(); m_ir->CreateRetVoid(); return; } @@ -2786,11 +2790,7 @@ void PPUTranslator::MTOCRF(ppu_opcode_t op) const auto index = m_ir->CreateAnd(m_ir->CreateLShr(value, 28 - i * 4), 15); const auto src = m_ir->CreateGEP(dyn_cast(m_mtocr_table)->getValueType(), m_mtocr_table, {m_ir->getInt32(0), m_ir->CreateShl(index, 2)}); const auto dst = bitcast(m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast(m_cr - m_locals) + i * 4), GetType()); -#if LLVM_VERSION_MAJOR < 15 - Call(GetType(), "llvm.memcpy.p0i8.p0i8.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse()); -#else Call(GetType(), "llvm.memcpy.p0.p0.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse()); -#endif } } } @@ -3313,7 +3313,7 @@ void PPUTranslator::STVLX(ppu_opcode_t op) const auto mask = bitcast(splat(0xffff) << trunc(value(addr) & 0xf)); const auto ptr = value(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType())); const auto align = splat(16); - eval(llvm_calli{"llvm.masked.store.v16i8.p0v16i8", {data, ptr, align, mask}}); + eval(llvm_calli{"llvm.masked.store.v16i8.p0", {data, ptr, align, mask}}); } void PPUTranslator::STDBRX(ppu_opcode_t op) @@ -3343,7 +3343,7 @@ void PPUTranslator::STVRX(ppu_opcode_t op) const auto mask = bitcast(trunc(splat(0xffff) << (value(addr) & 0xf) >> 16)); const auto ptr = value(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType())); const auto align = splat(16); - eval(llvm_calli{"llvm.masked.store.v16i8.p0v16i8", {data, ptr, align, mask}}); + eval(llvm_calli{"llvm.masked.store.v16i8.p0", {data, ptr, align, mask}}); } void PPUTranslator::STFSUX(ppu_opcode_t op) @@ -3524,7 +3524,7 @@ void PPUTranslator::DCBZ(ppu_opcode_t op) } else { - Call(GetType(), "llvm.memset.p0i8.i32", GetMemory(addr, GetType()), m_ir->getInt8(0), m_ir->getInt32(128), m_ir->getTrue()); + Call(GetType(), "llvm.memset.p0.i32", GetMemory(addr, GetType()), m_ir->getInt8(0), m_ir->getInt32(128), m_ir->getFalse()); } } @@ -4601,6 +4601,7 @@ void PPUTranslator::UNK(ppu_opcode_t op) { FlushRegisters(); Call(GetType(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode)); + //Call(GetType(), "__escape", m_thread)->setTailCall(); m_ir->CreateRetVoid(); } @@ -4862,6 +4863,7 @@ Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right) void PPUTranslator::Trap() { Call(GetType(), "__trap", m_thread, GetAddr()); + //Call(GetType(), "__escape", m_thread)->setTailCall(); m_ir->CreateRetVoid(); } @@ -4909,4 +4911,184 @@ MDNode* PPUTranslator::CheckBranchProbability(u32 bo) return nullptr; } +void PPUTranslator::build_interpreter() +{ +#define BUILD_VEC_INST(i) { \ + m_function = llvm::cast(m_module->getOrInsertFunction("op_" #i, get_type(), m_thread_type->getPointerTo()).getCallee()); \ + std::fill(std::begin(m_globals), std::end(m_globals), nullptr); \ + std::fill(std::begin(m_locals), std::end(m_locals), nullptr); \ + IRBuilder<> irb(BasicBlock::Create(m_context, "__entry", m_function)); \ + m_ir = &irb; \ + m_thread = m_function->getArg(0); \ + ppu_opcode_t op{}; \ + op.vd = 0; \ + op.va = 1; \ + op.vb = 2; \ + op.vc = 3; \ + this->i(op); \ + FlushRegisters(); \ + m_ir->CreateRetVoid(); \ + replace_intrinsics(*m_function); \ + } + + BUILD_VEC_INST(VADDCUW); + BUILD_VEC_INST(VADDFP); + BUILD_VEC_INST(VADDSBS); + BUILD_VEC_INST(VADDSHS); + BUILD_VEC_INST(VADDSWS); + BUILD_VEC_INST(VADDUBM); + BUILD_VEC_INST(VADDUBS); + BUILD_VEC_INST(VADDUHM); + BUILD_VEC_INST(VADDUHS); + BUILD_VEC_INST(VADDUWM); + BUILD_VEC_INST(VADDUWS); + BUILD_VEC_INST(VAND); + BUILD_VEC_INST(VANDC); + BUILD_VEC_INST(VAVGSB); + BUILD_VEC_INST(VAVGSH); + BUILD_VEC_INST(VAVGSW); + BUILD_VEC_INST(VAVGUB); + BUILD_VEC_INST(VAVGUH); + BUILD_VEC_INST(VAVGUW); + BUILD_VEC_INST(VCFSX); + BUILD_VEC_INST(VCFUX); + BUILD_VEC_INST(VCMPBFP); + BUILD_VEC_INST(VCMPBFP_); + BUILD_VEC_INST(VCMPEQFP); + BUILD_VEC_INST(VCMPEQFP_); + BUILD_VEC_INST(VCMPEQUB); + BUILD_VEC_INST(VCMPEQUB_); + BUILD_VEC_INST(VCMPEQUH); + BUILD_VEC_INST(VCMPEQUH_); + BUILD_VEC_INST(VCMPEQUW); + BUILD_VEC_INST(VCMPEQUW_); + BUILD_VEC_INST(VCMPGEFP); + BUILD_VEC_INST(VCMPGEFP_); + BUILD_VEC_INST(VCMPGTFP); + BUILD_VEC_INST(VCMPGTFP_); + BUILD_VEC_INST(VCMPGTSB); + BUILD_VEC_INST(VCMPGTSB_); + BUILD_VEC_INST(VCMPGTSH); + BUILD_VEC_INST(VCMPGTSH_); + BUILD_VEC_INST(VCMPGTSW); + BUILD_VEC_INST(VCMPGTSW_); + BUILD_VEC_INST(VCMPGTUB); + BUILD_VEC_INST(VCMPGTUB_); + BUILD_VEC_INST(VCMPGTUH); + BUILD_VEC_INST(VCMPGTUH_); + BUILD_VEC_INST(VCMPGTUW); + BUILD_VEC_INST(VCMPGTUW_); + BUILD_VEC_INST(VCTSXS); + BUILD_VEC_INST(VCTUXS); + BUILD_VEC_INST(VEXPTEFP); + BUILD_VEC_INST(VLOGEFP); + BUILD_VEC_INST(VMADDFP); + BUILD_VEC_INST(VMAXFP); + BUILD_VEC_INST(VMAXSB); + BUILD_VEC_INST(VMAXSH); + BUILD_VEC_INST(VMAXSW); + BUILD_VEC_INST(VMAXUB); + BUILD_VEC_INST(VMAXUH); + BUILD_VEC_INST(VMAXUW); + BUILD_VEC_INST(VMHADDSHS); + BUILD_VEC_INST(VMHRADDSHS); + BUILD_VEC_INST(VMINFP); + BUILD_VEC_INST(VMINSB); + BUILD_VEC_INST(VMINSH); + BUILD_VEC_INST(VMINSW); + BUILD_VEC_INST(VMINUB); + BUILD_VEC_INST(VMINUH); + BUILD_VEC_INST(VMINUW); + BUILD_VEC_INST(VMLADDUHM); + BUILD_VEC_INST(VMRGHB); + BUILD_VEC_INST(VMRGHH); + BUILD_VEC_INST(VMRGHW); + BUILD_VEC_INST(VMRGLB); + BUILD_VEC_INST(VMRGLH); + BUILD_VEC_INST(VMRGLW); + BUILD_VEC_INST(VMSUMMBM); + BUILD_VEC_INST(VMSUMSHM); + BUILD_VEC_INST(VMSUMSHS); + BUILD_VEC_INST(VMSUMUBM); + BUILD_VEC_INST(VMSUMUHM); + BUILD_VEC_INST(VMSUMUHS); + BUILD_VEC_INST(VMULESB); + BUILD_VEC_INST(VMULESH); + BUILD_VEC_INST(VMULEUB); + BUILD_VEC_INST(VMULEUH); + BUILD_VEC_INST(VMULOSB); + BUILD_VEC_INST(VMULOSH); + BUILD_VEC_INST(VMULOUB); + BUILD_VEC_INST(VMULOUH); + BUILD_VEC_INST(VNMSUBFP); + BUILD_VEC_INST(VNOR); + BUILD_VEC_INST(VOR); + BUILD_VEC_INST(VPERM); + BUILD_VEC_INST(VPKPX); + BUILD_VEC_INST(VPKSHSS); + BUILD_VEC_INST(VPKSHUS); + BUILD_VEC_INST(VPKSWSS); + BUILD_VEC_INST(VPKSWUS); + BUILD_VEC_INST(VPKUHUM); + BUILD_VEC_INST(VPKUHUS); + BUILD_VEC_INST(VPKUWUM); + BUILD_VEC_INST(VPKUWUS); + BUILD_VEC_INST(VREFP); + BUILD_VEC_INST(VRFIM); + BUILD_VEC_INST(VRFIN); + BUILD_VEC_INST(VRFIP); + BUILD_VEC_INST(VRFIZ); + BUILD_VEC_INST(VRLB); + BUILD_VEC_INST(VRLH); + BUILD_VEC_INST(VRLW); + BUILD_VEC_INST(VRSQRTEFP); + BUILD_VEC_INST(VSEL); + BUILD_VEC_INST(VSL); + BUILD_VEC_INST(VSLB); + BUILD_VEC_INST(VSLDOI); + BUILD_VEC_INST(VSLH); + BUILD_VEC_INST(VSLO); + BUILD_VEC_INST(VSLW); + BUILD_VEC_INST(VSPLTB); + BUILD_VEC_INST(VSPLTH); + BUILD_VEC_INST(VSPLTISB); + BUILD_VEC_INST(VSPLTISH); + BUILD_VEC_INST(VSPLTISW); + BUILD_VEC_INST(VSPLTW); + BUILD_VEC_INST(VSR); + BUILD_VEC_INST(VSRAB); + BUILD_VEC_INST(VSRAH); + BUILD_VEC_INST(VSRAW); + BUILD_VEC_INST(VSRB); + BUILD_VEC_INST(VSRH); + BUILD_VEC_INST(VSRO); + BUILD_VEC_INST(VSRW); + BUILD_VEC_INST(VSUBCUW); + BUILD_VEC_INST(VSUBFP); + BUILD_VEC_INST(VSUBSBS); + BUILD_VEC_INST(VSUBSHS); + BUILD_VEC_INST(VSUBSWS); + BUILD_VEC_INST(VSUBUBM); + BUILD_VEC_INST(VSUBUBS); + BUILD_VEC_INST(VSUBUHM); + BUILD_VEC_INST(VSUBUHS); + BUILD_VEC_INST(VSUBUWM); + BUILD_VEC_INST(VSUBUWS); + BUILD_VEC_INST(VSUMSWS); + BUILD_VEC_INST(VSUM2SWS); + BUILD_VEC_INST(VSUM4SBS); + BUILD_VEC_INST(VSUM4SHS); + BUILD_VEC_INST(VSUM4UBS); + BUILD_VEC_INST(VUPKHPX); + BUILD_VEC_INST(VUPKHSB); + BUILD_VEC_INST(VUPKHSH); + BUILD_VEC_INST(VUPKLPX); + BUILD_VEC_INST(VUPKLSB); + BUILD_VEC_INST(VUPKLSH); + BUILD_VEC_INST(VXOR); +#undef BUILD_VEC_INST + + +} + #endif diff --git a/rpcs3/Emu/Cell/PPUTranslator.h b/rpcs3/Emu/Cell/PPUTranslator.h index 41e8745355..84732d6c0c 100644 --- a/rpcs3/Emu/Cell/PPUTranslator.h +++ b/rpcs3/Emu/Cell/PPUTranslator.h @@ -856,6 +856,8 @@ public: void FCTID_(ppu_opcode_t op) { return FCTID(op); } void FCTIDZ_(ppu_opcode_t op) { return FCTIDZ(op); } void FCFID_(ppu_opcode_t op) { return FCFID(op); } + + void build_interpreter(); }; #endif diff --git a/rpcs3/Emu/Cell/SPURecompiler.cpp b/rpcs3/Emu/Cell/SPURecompiler.cpp index 070a7ecaab..975ed2e8da 100644 --- a/rpcs3/Emu/Cell/SPURecompiler.cpp +++ b/rpcs3/Emu/Cell/SPURecompiler.cpp @@ -3914,6 +3914,7 @@ void spu_recompiler_base::dump(const spu_program& result, std::string& out) #if LLVM_VERSION_MAJOR < 17 #include "llvm/ADT/Triple.h" #endif +#include "llvm/Support/Host.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/Verifier.h" #include "llvm/IR/InlineAsm.h" @@ -5006,7 +5007,11 @@ public: // Create LLVM module std::unique_ptr _module = std::make_unique(m_hash + ".obj", m_context); - _module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple)); +#if defined(_WIN32) && defined(ARCH_X64) + _module->setTargetTriple(Triple::normalize("x86_64-unknown-linux-gnu")); +#else + _module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); +#endif _module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout()); m_module = _module.get(); @@ -5227,6 +5232,7 @@ public: m_ir->CreateRetVoid(); m_ir->SetInsertPoint(label_stop); + call("spu_escape", spu_runtime::g_escape, m_thread)->setTailCall(); m_ir->CreateRetVoid(); m_ir->SetInsertPoint(label_diff); @@ -5681,7 +5687,11 @@ public: // Create LLVM module std::unique_ptr _module = std::make_unique("spu_interpreter.obj", m_context); - _module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple)); +#if defined(_WIN32) && defined(ARCH_X64) + _module->setTargetTriple(Triple::normalize("x86_64-unknown-linux-gnu")); +#else + _module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); +#endif _module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout()); m_module = _module.get(); @@ -5982,7 +5992,8 @@ public: ncall->setTailCall(); m_ir->CreateRetVoid(); m_ir->SetInsertPoint(_stop); - m_ir->CreateStore(m_interp_pc, spu_ptr(&spu_thread::pc)); + m_ir->CreateStore(m_interp_pc, spu_ptr(&spu_thread::pc), true); + call("spu_escape", spu_runtime::g_escape, m_thread)->setTailCall(); m_ir->CreateRetVoid(); } } @@ -6355,7 +6366,7 @@ public: llvm::Value* get_rchcnt(u32 off, u64 inv = 0) { - const auto val = m_ir->CreateLoad(get_type(), _ptr(m_thread, off), true); + const auto val = m_ir->CreateLoad(get_type(), _ptr(m_thread, off)); const auto shv = m_ir->CreateLShr(val, spu_channel::off_count); return m_ir->CreateTrunc(m_ir->CreateXor(shv, u64{inv}), get_type()); } @@ -6415,20 +6426,20 @@ public: } case MFC_Cmd: { - res.value = m_ir->CreateLoad(get_type(), spu_ptr(&spu_thread::mfc_size), true); + res.value = m_ir->CreateLoad(get_type(), spu_ptr(&spu_thread::mfc_size)); res.value = m_ir->CreateSub(m_ir->getInt32(16), res.value); break; } case SPU_RdInMbox: { - res.value = m_ir->CreateLoad(get_type(), spu_ptr(&spu_thread::ch_in_mbox), true); + res.value = m_ir->CreateLoad(get_type(), spu_ptr(&spu_thread::ch_in_mbox)); res.value = m_ir->CreateLShr(res.value, 8); res.value = m_ir->CreateAnd(res.value, 7); break; } case SPU_RdEventStat: { - const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type(), spu_ptr(&spu_thread::ch_events), true), 32), get_type()); + const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type(), spu_ptr(&spu_thread::ch_events)), 32), get_type()); res.value = call("spu_get_events", &exec_get_events, m_thread, mask); break; } @@ -6815,7 +6826,7 @@ public: if (csize > 0 && csize <= 16) { // Generate single copy operation - m_ir->CreateStore(m_ir->CreateLoad(vtype, m_ir->CreateBitCast(src, vtype->getPointerTo()), true), m_ir->CreateBitCast(dst, vtype->getPointerTo()), true); + m_ir->CreateStore(m_ir->CreateLoad(vtype, m_ir->CreateBitCast(src, vtype->getPointerTo())), m_ir->CreateBitCast(dst, vtype->getPointerTo())); } else if (csize <= stride * 16 && !(csize % 32)) { @@ -6826,7 +6837,7 @@ public: const auto _dst = m_ir->CreateGEP(get_type(), dst, m_ir->getInt32(i)); if (csize - i < stride) { - m_ir->CreateStore(m_ir->CreateLoad(get_type(), m_ir->CreateBitCast(_src, get_type()), true), m_ir->CreateBitCast(_dst, get_type()), true); + m_ir->CreateStore(m_ir->CreateLoad(get_type(), m_ir->CreateBitCast(_src, get_type())), m_ir->CreateBitCast(_dst, get_type())); } else { @@ -7086,18 +7097,51 @@ public: void ROTM(spu_opcode_t op) { const auto [a, b] = get_vrs(op.ra, op.rb); - set_vr(op.rt, inf_lshr(a, -b & 63)); + + auto minusb = eval(-b); + if (auto [ok, x] = match_expr(b, -match()); ok) + { + minusb = eval(x); + } + + if (auto k = get_known_bits(minusb); (k & kbc(32)).isZero()) + { + set_vr(op.rt, a >> (minusb & 31)); + return; + } + + set_vr(op.rt, inf_lshr(a, minusb & 63)); } void ROTMA(spu_opcode_t op) { const auto [a, b] = get_vrs(op.ra, op.rb); - set_vr(op.rt, inf_ashr(a, -b & 63)); + + auto minusb = eval(-b); + if (auto [ok, x] = match_expr(b, -match()); ok) + { + minusb = eval(x); + } + + if (auto k = get_known_bits(minusb); (k & kbc(32)).isZero()) + { + set_vr(op.rt, a >> (minusb & 31)); + return; + } + + set_vr(op.rt, inf_ashr(a, minusb & 63)); } void SHL(spu_opcode_t op) { const auto [a, b] = get_vrs(op.ra, op.rb); + + if (auto k = get_known_bits(b); (k & kbc(32)).isZero()) + { + set_vr(op.rt, a << (b & 31)); + return; + } + set_vr(op.rt, inf_shl(a, b & 63)); } @@ -7110,18 +7154,51 @@ public: void ROTHM(spu_opcode_t op) { const auto [a, b] = get_vrs(op.ra, op.rb); - set_vr(op.rt, inf_lshr(a, -b & 31)); + + auto minusb = eval(-b); + if (auto [ok, x] = match_expr(b, -match()); ok) + { + minusb = eval(x); + } + + if (auto k = get_known_bits(minusb); (k & kbc(16)).isZero()) + { + set_vr(op.rt, a >> (minusb & 15)); + return; + } + + set_vr(op.rt, inf_lshr(a, minusb & 31)); } void ROTMAH(spu_opcode_t op) { const auto [a, b] = get_vrs(op.ra, op.rb); - set_vr(op.rt, inf_ashr(a, -b & 31)); + + auto minusb = eval(-b); + if (auto [ok, x] = match_expr(b, -match()); ok) + { + minusb = eval(x); + } + + if (auto k = get_known_bits(minusb); (k & kbc(16)).isZero()) + { + set_vr(op.rt, a >> (minusb & 15)); + return; + } + + set_vr(op.rt, inf_ashr(a, minusb & 31)); } void SHLH(spu_opcode_t op) { const auto [a, b] = get_vrs(op.ra, op.rb); + + if (auto k = get_known_bits(b); (k & kbc(16)).isZero()) + { + set_vr(op.rt, a << (b & 15)); + return; + } + set_vr(op.rt, inf_shl(a, b & 31)); } @@ -8093,6 +8170,12 @@ public: } } + if (auto [ok, y] = match_expr(x, bitcast]>(match>>())); ok) + { + // Don't ruin FSMB/FSM/FSMH instructions + return false; + } + set_vr(op.rt4, select(x, get_vr(op.rb), get_vr(op.ra))); return true; } @@ -8337,9 +8420,9 @@ public: const auto bx = pshufb(bs, c); if (perm_only) - set_vr(op.rt4, select(noncast(c << 3) >= 0, ax, bx)); + set_vr(op.rt4, select_by_bit4(c, ax, bx)); else - set_vr(op.rt4, select(noncast(c << 3) >= 0, ax, bx) | x); + set_vr(op.rt4, select_by_bit4(c, ax, bx) | x); return; } @@ -8352,9 +8435,9 @@ public: const auto ax = pshufb(as, c); if (perm_only) - set_vr(op.rt4, select(noncast(c << 3) >= 0, ax, b)); + set_vr(op.rt4, select_by_bit4(c, ax, b)); else - set_vr(op.rt4, select(noncast(c << 3) >= 0, ax, b) | x); + set_vr(op.rt4, select_by_bit4(c, ax, b) | x); return; } } @@ -8371,9 +8454,9 @@ public: const auto bx = pshufb(bs, c); if (perm_only) - set_vr(op.rt4, select(noncast(c << 3) >= 0, a, bx)); + set_vr(op.rt4, select_by_bit4(c, a, bx)); else - set_vr(op.rt4, select(noncast(c << 3) >= 0, a, bx) | x); + set_vr(op.rt4, select_by_bit4(c, a, bx) | x); return; } } @@ -8401,9 +8484,9 @@ public: const auto bx = pshufb(b, cr); if (perm_only) - set_vr(op.rt4, select(noncast(cr << 3) >= 0, ax, bx)); + set_vr(op.rt4, select_by_bit4(cr, ax, bx)); else - set_vr(op.rt4, select(noncast(cr << 3) >= 0, ax, bx) | x); + set_vr(op.rt4, select_by_bit4(cr, ax, bx) | x); } void MPYA(spu_opcode_t op) @@ -9611,13 +9694,13 @@ public: void make_store_ls(value_t addr, value_t data) { const auto bswapped = byteswap(data); - m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(get_type(), m_lsptr, addr.value), get_type()), true); + m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(get_type(), m_lsptr, addr.value), get_type())); } auto make_load_ls(value_t addr) { value_t data; - data.value = m_ir->CreateLoad(get_type(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type(), m_lsptr, addr.value), get_type()), true); + data.value = m_ir->CreateLoad(get_type(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type(), m_lsptr, addr.value), get_type())); return byteswap(data); } @@ -9839,7 +9922,7 @@ public: target->addIncoming(e_addr, e_exec); m_ir->CreateCondBr(get_imm(op.d).value, d_exec, d_done, m_md_unlikely); m_ir->SetInsertPoint(d_exec); - m_ir->CreateStore(m_ir->getFalse(), spu_ptr(&spu_thread::interrupts_enabled))->setVolatile(true); + m_ir->CreateStore(m_ir->getFalse(), spu_ptr(&spu_thread::interrupts_enabled)); m_ir->CreateBr(d_done); m_ir->SetInsertPoint(d_done); m_ir->CreateBr(m_interp_bblock); @@ -9890,7 +9973,7 @@ public: if (op.d) { - m_ir->CreateStore(m_ir->getFalse(), spu_ptr(&spu_thread::interrupts_enabled))->setVolatile(true); + m_ir->CreateStore(m_ir->getFalse(), spu_ptr(&spu_thread::interrupts_enabled)); } m_ir->CreateStore(addr.value, spu_ptr(&spu_thread::pc)); @@ -10211,7 +10294,7 @@ public: // Exit function on unexpected target m_ir->SetInsertPoint(sw->getDefaultDest()); - m_ir->CreateStore(addr.value, spu_ptr(&spu_thread::pc), true); + m_ir->CreateStore(addr.value, spu_ptr(&spu_thread::pc)); if (m_finfo && m_finfo->fn) { diff --git a/rpcs3/Loader/PSF.h b/rpcs3/Loader/PSF.h index eb5c79b1ea..98be890b75 100644 --- a/rpcs3/Loader/PSF.h +++ b/rpcs3/Loader/PSF.h @@ -136,7 +136,7 @@ namespace psf { std::string_view value{value_array, CharN}; value = value.substr(0, std::min(value.find_first_of('\0'), value.size())); - return string(CharN, value, allow_truncate); + return string(max_size, value, allow_truncate); } // Make array entry diff --git a/rpcs3/util/sysinfo.hpp b/rpcs3/util/sysinfo.hpp index 23ebefd8e7..951c961bb6 100755 --- a/rpcs3/util/sysinfo.hpp +++ b/rpcs3/util/sysinfo.hpp @@ -71,16 +71,4 @@ namespace utils u64 _get_main_tid(); inline const u64 main_tid = _get_main_tid(); - -#ifdef LLVM_AVAILABLE - -#if defined(ARCH_X64) - const std::string c_llvm_default_triple = "x86_64-unknown-linux-gnu"; -#elif defined(ARCH_ARM64) - const std::string c_llvm_default_triple = "arm64-unknown-linux-gnu"; -#else - const std::string c_llvm_default_triple = "Unimplemented!" -#endif - -#endif } diff --git a/rpcs3/util/types.hpp b/rpcs3/util/types.hpp index 8b0c12a285..fb44d45bdb 100644 --- a/rpcs3/util/types.hpp +++ b/rpcs3/util/types.hpp @@ -115,7 +115,7 @@ namespace std } #endif -#if defined(__INTELLISENSE__) +#if defined(__INTELLISENSE__) || (defined (__clang__) && (__clang_major__ <= 16)) #define consteval constexpr #define constinit #endif