Update to LLVM 16.0.0, switch to upstream LLVM

This commit is contained in:
Ivan Chikish 2023-03-11 22:08:27 +03:00 committed by Ivan
parent 7081b89e97
commit fb88e1c1c9
24 changed files with 746 additions and 231 deletions

View file

@ -7,9 +7,9 @@ git submodule -q update --init --depth 1 $(awk '/path/ && !/llvm/ { print $3 }'
# Prefer newer Clang than in base system (see also .ci/install-freebsd.sh) # Prefer newer Clang than in base system (see also .ci/install-freebsd.sh)
# libc++ isn't in llvm* packages, so download manually # libc++ isn't in llvm* packages, so download manually
fetch https://github.com/llvm/llvm-project/releases/download/llvmorg-15.0.6/llvm-project-15.0.6.src.tar.xz fetch https://github.com/llvm/llvm-project/releases/download/llvmorg-16.0.0/llvm-project-16.0.0.src.tar.xz
tar xf llvm*.tar.xz tar xf llvm*.tar.xz
export CC=clang15 CXX=clang++15 export CC=clang16 CXX=clang++16
cmake -B libcxx_build -G Ninja -S llvm*/libcxx \ cmake -B libcxx_build -G Ninja -S llvm*/libcxx \
-DLLVM_CCACHE_BUILD=ON \ -DLLVM_CCACHE_BUILD=ON \
-DLIBCXX_INCLUDE_BENCHMARKS=OFF \ -DLIBCXX_INCLUDE_BENCHMARKS=OFF \
@ -20,7 +20,7 @@ export CXXFLAGS="$CXXFLAGS -nostdinc++ -isystem$PWD/libcxx_prefix/include/c++/v1
export LDFLAGS="$LDFLAGS -nostdlib++ -L$PWD/libcxx_prefix/lib -l:libc++.a -lcxxrt" export LDFLAGS="$LDFLAGS -nostdlib++ -L$PWD/libcxx_prefix/lib -l:libc++.a -lcxxrt"
CONFIGURE_ARGS=" CONFIGURE_ARGS="
-DWITH_LLVM=OFF -DWITH_LLVM=ON
-DUSE_SDL=OFF -DUSE_SDL=OFF
-DUSE_PRECOMPILED_HEADERS=OFF -DUSE_PRECOMPILED_HEADERS=OFF
-DUSE_NATIVE_INSTRUCTIONS=OFF -DUSE_NATIVE_INSTRUCTIONS=OFF

View file

@ -9,16 +9,11 @@ if [ -z "$CIRRUS_CI" ]; then
cd rpcs3 || exit 1 cd rpcs3 || exit 1
fi fi
# Pull all the submodules except llvm, since it is built separately and we just download that build # Pull all the submodules except llvm
# Note: Tried to use git submodule status, but it takes over 20 seconds # Note: Tried to use git submodule status, but it takes over 20 seconds
# shellcheck disable=SC2046 # shellcheck disable=SC2046
git submodule -q update --init $(awk '/path/ && !/llvm/ { print $3 }' .gitmodules) git submodule -q update --init $(awk '/path/ && !/llvm/ { print $3 }' .gitmodules)
# Download pre-compiled llvm libs
curl -sLO https://github.com/RPCS3/llvm-mirror/releases/download/custom-build/llvmlibs-linux.tar.gz
mkdir llvmlibs
tar -xzf ./llvmlibs-linux.tar.gz -C llvmlibs
mkdir build && cd build || exit 1 mkdir build && cd build || exit 1
if [ "$COMPILER" = "gcc" ]; then if [ "$COMPILER" = "gcc" ]; then
@ -42,8 +37,6 @@ export CFLAGS="$CFLAGS -fuse-ld=${LINKER}"
cmake .. \ cmake .. \
-DCMAKE_INSTALL_PREFIX=/usr \ -DCMAKE_INSTALL_PREFIX=/usr \
-DBUILD_LLVM_SUBMODULE=OFF \
-DLLVM_DIR=llvmlibs/lib/cmake/llvm/ \
-DUSE_NATIVE_INSTRUCTIONS=OFF \ -DUSE_NATIVE_INSTRUCTIONS=OFF \
-DUSE_PRECOMPILED_HEADERS=OFF \ -DUSE_PRECOMPILED_HEADERS=OFF \
-DCMAKE_C_FLAGS="$CFLAGS" \ -DCMAKE_C_FLAGS="$CFLAGS" \

View file

@ -1,10 +1,10 @@
#!/bin/sh -ex #!/bin/sh -ex
brew install -f --overwrite llvm@14 nasm ninja git p7zip create-dmg ccache brew install -f --overwrite llvm@16 nasm ninja git p7zip create-dmg ccache
#/usr/sbin/softwareupdate --install-rosetta --agree-to-license #/usr/sbin/softwareupdate --install-rosetta --agree-to-license
arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" arch -x86_64 /bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
arch -x86_64 /usr/local/homebrew/bin/brew install -f --overwrite llvm@14 sdl2 glew cmake arch -x86_64 /usr/local/homebrew/bin/brew install -f --overwrite llvm@16 sdl2 glew cmake
#export MACOSX_DEPLOYMENT_TARGET=12.0 #export MACOSX_DEPLOYMENT_TARGET=12.0
export CXX=clang++ export CXX=clang++
@ -33,7 +33,7 @@ cd ..
export Qt5_DIR="$WORKDIR/qt-downloader/5.15.2/clang_64/lib/cmake/Qt5" export Qt5_DIR="$WORKDIR/qt-downloader/5.15.2/clang_64/lib/cmake/Qt5"
export SDL2_DIR="$BREW_X64_PATH/opt/sdl2/lib/cmake/SDL2" export SDL2_DIR="$BREW_X64_PATH/opt/sdl2/lib/cmake/SDL2"
export PATH="$BREW_PATH/opt/llvm@14/bin:$WORKDIR/qt-downloader/5.15.2/clang_64/bin:$BREW_BIN:$BREW_SBIN:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/Apple/usr/bin:$PATH" export PATH="$BREW_PATH/opt/llvm@16/bin:$WORKDIR/qt-downloader/5.15.2/clang_64/bin:$BREW_BIN:$BREW_SBIN:/usr/bin:/bin:/usr/sbin:/sbin:/opt/X11/bin:/Library/Apple/usr/bin:$PATH"
export LDFLAGS="-L$BREW_X64_PATH/lib -Wl,-rpath,$BREW_X64_PATH/lib" export LDFLAGS="-L$BREW_X64_PATH/lib -Wl,-rpath,$BREW_X64_PATH/lib"
export CPPFLAGS="-I$BREW_X64_PATH/include -msse -msse2 -mcx16 -no-pie" export CPPFLAGS="-I$BREW_X64_PATH/include -msse -msse2 -mcx16 -no-pie"
export LIBRARY_PATH="$BREW_X64_PATH/lib" export LIBRARY_PATH="$BREW_X64_PATH/lib"

View file

@ -15,7 +15,7 @@ echo "AVVER=$AVVER" >> ../.ci/ci-vars.env
cd bin cd bin
mkdir "rpcs3.app/Contents/lib/" mkdir "rpcs3.app/Contents/lib/"
cp "/usr/local/Homebrew/opt/llvm@14/lib/c++/libc++abi.1.0.dylib" "rpcs3.app/Contents/lib/libc++abi.1.dylib" cp "/usr/local/Homebrew/opt/llvm@16/lib/c++/libc++abi.1.0.dylib" "rpcs3.app/Contents/lib/libc++abi.1.dylib"
rm -rf "rpcs3.app/Contents/Frameworks/QtPdf.framework" \ rm -rf "rpcs3.app/Contents/Frameworks/QtPdf.framework" \
"rpcs3.app/Contents/Frameworks/QtQml.framework" \ "rpcs3.app/Contents/Frameworks/QtQml.framework" \

View file

@ -1,4 +1,4 @@
#!/bin/sh -ex #!/bin/sh -ex
curl -L -o "./llvm.lock" "https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z.sha256" curl -L -o "./llvm.lock" "https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z.sha256"
curl -L -o "./glslang.lock" "https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z.sha256" curl -L -o "./glslang.lock" "https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z.sha256"

View file

@ -9,7 +9,7 @@ export ASSUME_ALWAYS_YES=true
pkg info # debug pkg info # debug
# Prefer newer Clang than in base system (see also .ci/build-freebsd.sh) # Prefer newer Clang than in base system (see also .ci/build-freebsd.sh)
pkg install llvm15 pkg install llvm16
# Mandatory dependencies (qt5-dbus and qt5-gui are pulled via qt5-widgets) # Mandatory dependencies (qt5-dbus and qt5-gui are pulled via qt5-widgets)
pkg install git ccache cmake ninja qt5-qmake qt5-buildtools qt5-widgets qt5-concurrent qt5-multimedia qt5-svg glew openal-soft ffmpeg pkg install git ccache cmake ninja qt5-qmake qt5-buildtools qt5-widgets qt5-concurrent qt5-multimedia qt5-svg glew openal-soft ffmpeg

View file

@ -19,7 +19,7 @@ QT_DECL_URL="${QT_HOST}${QT_PREFIX}qtdeclarative${QT_SUFFIX}"
QT_TOOL_URL="${QT_HOST}${QT_PREFIX}qttools${QT_SUFFIX}" QT_TOOL_URL="${QT_HOST}${QT_PREFIX}qttools${QT_SUFFIX}"
QT_MM_URL="${QT_HOST}${QT_PREFIX}qtmultimedia${QT_SUFFIX}" QT_MM_URL="${QT_HOST}${QT_PREFIX}qtmultimedia${QT_SUFFIX}"
QT_SVG_URL="${QT_HOST}${QT_PREFIX}qtsvg${QT_SUFFIX}" QT_SVG_URL="${QT_HOST}${QT_PREFIX}qtsvg${QT_SUFFIX}"
LLVMLIBS_URL='https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z' LLVMLIBS_URL='https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z'
GLSLANG_URL='https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z' GLSLANG_URL='https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z'
VULKAN_SDK_URL="https://www.dropbox.com/s/cs77c3iv5mbo0bt/VulkanSDK-${VULKAN_VER}-Installer.exe" VULKAN_SDK_URL="https://www.dropbox.com/s/cs77c3iv5mbo0bt/VulkanSDK-${VULKAN_VER}-Installer.exe"

View file

@ -61,7 +61,7 @@ windows_task:
linux_task: linux_task:
container: container:
image: rpcs3/rpcs3-ci-bionic:1.6 image: rpcs3/rpcs3-ci-bionic:1.7
cpu: 4 cpu: 4
memory: 16G memory: 16G
env: env:

34
3rdparty/llvm.cmake vendored
View file

@ -2,7 +2,7 @@ if(WITH_LLVM)
CHECK_CXX_COMPILER_FLAG("-msse -msse2 -mcx16" COMPILER_X86) CHECK_CXX_COMPILER_FLAG("-msse -msse2 -mcx16" COMPILER_X86)
CHECK_CXX_COMPILER_FLAG("-march=armv8-a+lse" COMPILER_ARM) CHECK_CXX_COMPILER_FLAG("-march=armv8-a+lse" COMPILER_ARM)
if(BUILD_LLVM_SUBMODULE) if(BUILD_LLVM)
message(STATUS "LLVM will be built from the submodule.") message(STATUS "LLVM will be built from the submodule.")
set(LLVM_TARGETS_TO_BUILD "AArch64;X86") set(LLVM_TARGETS_TO_BUILD "AArch64;X86")
@ -38,49 +38,33 @@ if(WITH_LLVM)
set(CMAKE_CXX_FLAGS ${CXX_FLAGS_OLD}) set(CMAKE_CXX_FLAGS ${CXX_FLAGS_OLD})
# now tries to find LLVM again # now tries to find LLVM again
find_package(LLVM 13.0 CONFIG) find_package(LLVM 16.0 CONFIG)
if(NOT LLVM_FOUND) if(NOT LLVM_FOUND)
message(FATAL_ERROR "Couldn't build LLVM from the submodule. You might need to run `git submodule update --init`") message(FATAL_ERROR "Couldn't build LLVM from the submodule. You might need to run `git submodule update --init`")
endif() endif()
else() else()
message(STATUS "Using prebuilt LLVM") message(STATUS "Using prebuilt or system LLVM")
if (LLVM_DIR AND NOT IS_ABSOLUTE "${LLVM_DIR}") if (LLVM_DIR AND NOT IS_ABSOLUTE "${LLVM_DIR}")
# change relative LLVM_DIR to be relative to the source dir # change relative LLVM_DIR to be relative to the source dir
set(LLVM_DIR ${CMAKE_SOURCE_DIR}/${LLVM_DIR}) set(LLVM_DIR ${CMAKE_SOURCE_DIR}/${LLVM_DIR})
endif() endif()
find_package(LLVM 13.0 CONFIG) find_package(LLVM 16.0 CONFIG)
if (NOT LLVM_FOUND) if (NOT LLVM_FOUND)
if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 11) if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 16)
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 11.0. \ message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 16. \
Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.") Enable BUILD_LLVM option to build LLVM from included as a git submodule.")
endif() endif()
message(FATAL_ERROR "Can't find LLVM libraries from the CMAKE_PREFIX_PATH path or LLVM_DIR. \ message(FATAL_ERROR "Can't find LLVM libraries from the CMAKE_PREFIX_PATH path or LLVM_DIR. \
Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.") Enable BUILD_LLVM option to build LLVM from included as a git submodule.")
endif() endif()
endif() endif()
set(LLVM_LIBS LLVMMCJIT) set(LLVM_LIBS LLVM)
if(COMPILER_X86)
set(LLVM_LIBS ${LLVM_LIBS} LLVMX86CodeGen LLVMX86AsmParser)
endif()
if(COMPILER_ARM)
set(LLVM_LIBS ${LLVM_LIBS} LLVMX86CodeGen LLVMX86AsmParser LLVMAArch64CodeGen LLVMAArch64AsmParser)
endif()
if(WIN32 OR CMAKE_SYSTEM MATCHES "Linux")
set(LLVM_LIBS ${LLVM_LIBS} LLVMIntelJITEvents)
endif()
if(CMAKE_SYSTEM MATCHES "Linux")
set(LLVM_LIBS ${LLVM_LIBS} LLVMPerfJITEvents)
endif()
add_library(3rdparty_llvm INTERFACE) add_library(3rdparty_llvm INTERFACE)
target_link_libraries(3rdparty_llvm INTERFACE ${LLVM_LIBS}) target_link_libraries(3rdparty_llvm INTERFACE ${LLVM_LIBS})

View file

@ -111,7 +111,7 @@ git submodule update --init
Open `rpcs3.sln`. The recommended build configuration is `Release`. (On older revisions: `Release - LLVM`) Open `rpcs3.sln`. The recommended build configuration is `Release`. (On older revisions: `Release - LLVM`)
You may want to download the precompiled [LLVM libs](https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win/llvmlibs_mt.7z) and extract them to the root rpcs3 folder (which contains `rpcs3.sln`), as well as download and extract the [additional libs](https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z) to `lib\%CONFIGURATION%-x64\` to speed up compilation time (unoptimised/debug libs are currently not available precompiled). You may want to download the precompiled [LLVM libs](https://github.com/RPCS3/llvm-mirror/releases/download/custom-build-win-16.0.0/llvmlibs_mt.7z) and extract them to the root rpcs3 folder (which contains `rpcs3.sln`), as well as download and extract the [additional libs](https://github.com/RPCS3/glslang/releases/download/custom-build-win/glslanglibs_mt.7z) to `lib\%CONFIGURATION%-x64\` to speed up compilation time (unoptimised/debug libs are currently not available precompiled).
If you're not using the precompiled libs, build the following projects in *__BUILD_BEFORE* folder by right-clicking on a project > *Build*.: If you're not using the precompiled libs, build the following projects in *__BUILD_BEFORE* folder by right-clicking on a project > *Build*.:
* glslang * glslang

View file

@ -14,7 +14,7 @@ endif()
option(USE_NATIVE_INSTRUCTIONS "USE_NATIVE_INSTRUCTIONS makes rpcs3 compile with -march=native, which is useful for local builds, but not good for packages." ON) option(USE_NATIVE_INSTRUCTIONS "USE_NATIVE_INSTRUCTIONS makes rpcs3 compile with -march=native, which is useful for local builds, but not good for packages." ON)
option(WITH_LLVM "Enable usage of LLVM library" ON) option(WITH_LLVM "Enable usage of LLVM library" ON)
option(BUILD_LLVM_SUBMODULE "Build LLVM from git submodule" ON) option(BUILD_LLVM "Build LLVM from git submodule" OFF)
option(USE_FAUDIO "FAudio audio backend" ON) option(USE_FAUDIO "FAudio audio backend" ON)
option(USE_LIBEVDEV "libevdev-based joystick support" ON) option(USE_LIBEVDEV "libevdev-based joystick support" ON)
option(USE_DISCORD_RPC "Discord rich presence integration" OFF) option(USE_DISCORD_RPC "Discord rich presence integration" OFF)

View file

@ -198,6 +198,9 @@ static u8* add_jit_memory(usz size, uint align)
}); });
} }
ensure(pointer + pos >= get_jit_memory() + Off);
ensure(pointer + pos < get_jit_memory() + Off + 0x40000000);
return pointer + pos; return pointer + pos;
} }
@ -1319,7 +1322,10 @@ std::string jit_compiler::cpu(const std::string& _cpu)
m_cpu == "icelake-client" || m_cpu == "icelake-client" ||
m_cpu == "icelake-server" || m_cpu == "icelake-server" ||
m_cpu == "tigerlake" || m_cpu == "tigerlake" ||
m_cpu == "rocketlake") m_cpu == "rocketlake" ||
m_cpu == "alderlake" ||
m_cpu == "raptorlake" ||
m_cpu == "meteorlake")
{ {
// Downgrade if AVX is not supported by some chips // Downgrade if AVX is not supported by some chips
if (!utils::has_avx()) if (!utils::has_avx())
@ -1350,6 +1356,18 @@ std::string jit_compiler::cpu(const std::string& _cpu)
// Upgrade // Upgrade
m_cpu = "znver2"; m_cpu = "znver2";
} }
if ((m_cpu == "znver3" || m_cpu == "goldmont" || m_cpu == "alderlake" || m_cpu == "raptorlake" || m_cpu == "meteorlake") && utils::has_avx512_icl())
{
// Upgrade
m_cpu = "icelake-client";
}
if (m_cpu == "goldmont" && utils::has_avx2())
{
// Upgrade
m_cpu = "alderlake";
}
} }
return m_cpu; return m_cpu;
@ -1362,15 +1380,13 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
std::string result; std::string result;
auto null_mod = std::make_unique<llvm::Module> ("null_", *m_context); auto null_mod = std::make_unique<llvm::Module> ("null_", *m_context);
#if defined(__APPLE__) && defined(ARCH_ARM64) null_mod->setTargetTriple(llvm::Triple::normalize(llvm::sys::getProcessTriple()));
// Force override triple on Apple arm64 or we'll get linking errors.
null_mod->setTargetTriple(llvm::Triple::normalize(utils::c_llvm_default_triple)); std::unique_ptr<llvm::RTDyldMemoryManager> mem;
#endif
if (_link.empty()) if (_link.empty())
{ {
std::unique_ptr<llvm::RTDyldMemoryManager> mem; // Auxiliary JIT (does not use custom memory manager, only writes the objects)
if (flags & 0x1) if (flags & 0x1)
{ {
mem = std::make_unique<MemoryManager1>(); mem = std::make_unique<MemoryManager1>();
@ -1378,31 +1394,33 @@ jit_compiler::jit_compiler(const std::unordered_map<std::string, u64>& _link, co
else else
{ {
mem = std::make_unique<MemoryManager2>(); mem = std::make_unique<MemoryManager2>();
null_mod->setTargetTriple(llvm::Triple::normalize(utils::c_llvm_default_triple)); #if defined(_WIN32) && defined(ARCH_X64)
null_mod->setTargetTriple(llvm::Triple::normalize("x86_64-unknown-linux-gnu"));
#endif
} }
}
else
{
mem = std::make_unique<MemoryManager1>();
}
// Auxiliary JIT (does not use custom memory manager, only writes the objects) {
m_engine.reset(llvm::EngineBuilder(std::move(null_mod)) m_engine.reset(llvm::EngineBuilder(std::move(null_mod))
.setErrorStr(&result) .setErrorStr(&result)
.setEngineKind(llvm::EngineKind::JIT) .setEngineKind(llvm::EngineKind::JIT)
.setMCJITMemoryManager(std::move(mem)) .setMCJITMemoryManager(std::move(mem))
.setOptLevel(llvm::CodeGenOpt::Aggressive) .setOptLevel(llvm::CodeGenOpt::Aggressive)
.setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small) .setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small)
#ifdef __APPLE__
.setCodeModel(llvm::CodeModel::Large)
#endif
.setRelocationModel(llvm::Reloc::Model::PIC_)
.setMCPU(m_cpu) .setMCPU(m_cpu)
.create()); .create());
} }
else
{
// Primary JIT
m_engine.reset(llvm::EngineBuilder(std::move(null_mod))
.setErrorStr(&result)
.setEngineKind(llvm::EngineKind::JIT)
.setMCJITMemoryManager(std::make_unique<MemoryManager1>())
.setOptLevel(llvm::CodeGenOpt::Aggressive)
.setCodeModel(flags & 0x2 ? llvm::CodeModel::Large : llvm::CodeModel::Small)
.setMCPU(m_cpu)
.create());
if (!_link.empty())
{
for (auto&& [name, addr] : _link) for (auto&& [name, addr] : _link)
{ {
m_engine->updateGlobalMapping(name, addr); m_engine->updateGlobalMapping(name, addr);

View file

@ -16,54 +16,97 @@
<Link> <Link>
<AdditionalLibraryDirectories Condition="'$(Configuration)'=='Debug'">%(AdditionalLibraryDirectories);..\llvm_build\Debug\lib</AdditionalLibraryDirectories> <AdditionalLibraryDirectories Condition="'$(Configuration)'=='Debug'">%(AdditionalLibraryDirectories);..\llvm_build\Debug\lib</AdditionalLibraryDirectories>
<AdditionalDependencies>%(AdditionalDependencies); <AdditionalDependencies>%(AdditionalDependencies);
LLVMProfileData.lib; LLVMAggressiveInstCombine.lib;
LLVMDebugInfoCodeView.lib; LLVMAnalysis.lib;
LLVMDebugInfoMSF.lib; LLVMAsmParser.lib;
LLVMInstrumentation.lib; LLVMAsmPrinter.lib;
LLVMMCJIT.lib; LLVMBinaryFormat.lib;
LLVMRuntimeDyld.lib; LLVMBitReader.lib;
LLVMVectorize.lib; LLVMBitstreamReader.lib;
LLVMX86CodeGen.lib; LLVMBitWriter.lib;
LLVMGlobalISel.lib; LLVMCFGuard.lib;
LLVMX86Disassembler.lib; LLVMCFIVerify.lib;
LLVMExecutionEngine.lib; LLVMCodeGen.lib;
LLVMAsmPrinter.lib; LLVMCore.lib;
LLVMSelectionDAG.lib; LLVMCoroutines.lib;
LLVMCodeGen.lib; LLVMCoverage.lib;
LLVMScalarOpts.lib; LLVMDebugInfoCodeView.lib;
LLVMInstCombine.lib; LLVMDebuginfod.lib;
LLVMTransformUtils.lib; LLVMDebugInfoDWARF.lib;
LLVMAnalysis.lib; LLVMDebugInfoGSYM.lib;
LLVMTarget.lib; LLVMDebugInfoLogicalView.lib;
LLVMX86Desc.lib; LLVMDebugInfoMSF.lib;
LLVMObject.lib; LLVMDebugInfoPDB.lib;
LLVMMCParser.lib; LLVMDemangle.lib;
LLVMBitReader.lib; LLVMDiff.lib;
LLVMCore.lib; LLVMDlltoolDriver.lib;
LLVMMC.lib; LLVMDWARFLinker.lib;
LLVMX86Info.lib; LLVMDWARFLinkerParallel.lib;
LLVMSupport.lib; LLVMDWP.lib;
LLVMMCDisassembler.lib; LLVMExecutionEngine.lib;
LLVMipo.lib; LLVMExegesis.lib;
LLVMBinaryFormat.lib; LLVMExegesisX86.lib;
LLVMPasses.lib; LLVMExtensions.lib;
LLVMIRReader.lib; LLVMFileCheck.lib;
LLVMLinker.lib; LLVMFrontendHLSL.lib;
LLVMAsmParser.lib; LLVMFrontendOpenACC.lib;
LLVMX86AsmParser.lib; LLVMFrontendOpenMP.lib;
LLVMDemangle.lib; LLVMFuzzerCLI.lib;
LLVMDebugInfoDWARF.lib; LLVMFuzzMutate.lib;
LLVMRemarks.lib; LLVMGlobalISel.lib;
LLVMBitstreamReader.lib; LLVMInstCombine.lib;
LLVMTextAPI.lib; LLVMInstrumentation.lib;
LLVMCFGuard.lib; LLVMIntelJITEvents.lib;
LLVMAggressiveInstCombine.lib; LLVMInterfaceStub.lib;
LLVMBitWriter.lib; LLVMInterpreter.lib;
LLVMCoroutines.lib; LLVMipo.lib;
LLVMObjCARCOpts.lib; LLVMIRPrinter.lib;
LLVMIntelJITEvents.lib; LLVMIRReader.lib;
LLVMJITLink.lib;
LLVMLibDriver.lib;
LLVMLineEditor.lib;
LLVMLinker.lib;
LLVMLTO.lib;
LLVMMCA.lib;
LLVMMCDisassembler.lib;
LLVMMCJIT.lib;
LLVMMC.lib;
LLVMMCParser.lib;
LLVMMIRParser.lib;
LLVMObjCARCOpts.lib;
LLVMObjCopy.lib;
LLVMObject.lib;
LLVMObjectYAML.lib;
LLVMOption.lib;
LLVMOrcJIT.lib;
LLVMOrcShared.lib;
LLVMOrcTargetProcess.lib;
LLVMPasses.lib;
LLVMProfileData.lib;
LLVMRemarks.lib;
LLVMRuntimeDyld.lib;
LLVMScalarOpts.lib;
LLVMSelectionDAG.lib;
LLVMSupport.lib;
LLVMSymbolize.lib;
LLVMTableGenGlobalISel.lib;
LLVMTableGen.lib;
LLVMTarget.lib;
LLVMTargetParser.lib;
LLVMTextAPI.lib;
LLVMTransformUtils.lib;
LLVMVectorize.lib;
LLVMWindowsDriver.lib;
LLVMWindowsManifest.lib;
LLVMX86AsmParser.lib;
LLVMX86CodeGen.lib;
LLVMX86Desc.lib;
LLVMX86Disassembler.lib;
LLVMX86Info.lib;
LLVMX86TargetMCA.lib;
LLVMXRay.lib;
</AdditionalDependencies> </AdditionalDependencies>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemGroup /> <ItemGroup />
</Project> </Project>

View file

@ -17,54 +17,97 @@
<OptimizeReferences>true</OptimizeReferences> <OptimizeReferences>true</OptimizeReferences>
<AdditionalLibraryDirectories Condition="'$(Configuration)'=='Release'">%(AdditionalLibraryDirectories);..\llvm_build\Release\lib</AdditionalLibraryDirectories> <AdditionalLibraryDirectories Condition="'$(Configuration)'=='Release'">%(AdditionalLibraryDirectories);..\llvm_build\Release\lib</AdditionalLibraryDirectories>
<AdditionalDependencies>%(AdditionalDependencies); <AdditionalDependencies>%(AdditionalDependencies);
LLVMProfileData.lib; LLVMAggressiveInstCombine.lib;
LLVMDebugInfoCodeView.lib; LLVMAnalysis.lib;
LLVMDebugInfoMSF.lib; LLVMAsmParser.lib;
LLVMInstrumentation.lib; LLVMAsmPrinter.lib;
LLVMMCJIT.lib; LLVMBinaryFormat.lib;
LLVMRuntimeDyld.lib; LLVMBitReader.lib;
LLVMVectorize.lib; LLVMBitstreamReader.lib;
LLVMX86CodeGen.lib; LLVMBitWriter.lib;
LLVMGlobalISel.lib; LLVMCFGuard.lib;
LLVMX86Disassembler.lib; LLVMCFIVerify.lib;
LLVMExecutionEngine.lib; LLVMCodeGen.lib;
LLVMAsmPrinter.lib; LLVMCore.lib;
LLVMSelectionDAG.lib; LLVMCoroutines.lib;
LLVMCodeGen.lib; LLVMCoverage.lib;
LLVMScalarOpts.lib; LLVMDebugInfoCodeView.lib;
LLVMInstCombine.lib; LLVMDebuginfod.lib;
LLVMTransformUtils.lib; LLVMDebugInfoDWARF.lib;
LLVMAnalysis.lib; LLVMDebugInfoGSYM.lib;
LLVMTarget.lib; LLVMDebugInfoLogicalView.lib;
LLVMX86Desc.lib; LLVMDebugInfoMSF.lib;
LLVMObject.lib; LLVMDebugInfoPDB.lib;
LLVMMCParser.lib; LLVMDemangle.lib;
LLVMBitReader.lib; LLVMDiff.lib;
LLVMCore.lib; LLVMDlltoolDriver.lib;
LLVMMC.lib; LLVMDWARFLinker.lib;
LLVMX86Info.lib; LLVMDWARFLinkerParallel.lib;
LLVMSupport.lib; LLVMDWP.lib;
LLVMMCDisassembler.lib; LLVMExecutionEngine.lib;
LLVMipo.lib; LLVMExegesis.lib;
LLVMBinaryFormat.lib; LLVMExegesisX86.lib;
LLVMPasses.lib; LLVMExtensions.lib;
LLVMIRReader.lib; LLVMFileCheck.lib;
LLVMLinker.lib; LLVMFrontendHLSL.lib;
LLVMAsmParser.lib; LLVMFrontendOpenACC.lib;
LLVMX86AsmParser.lib; LLVMFrontendOpenMP.lib;
LLVMDemangle.lib; LLVMFuzzerCLI.lib;
LLVMDebugInfoDWARF.lib; LLVMFuzzMutate.lib;
LLVMRemarks.lib; LLVMGlobalISel.lib;
LLVMBitstreamReader.lib; LLVMInstCombine.lib;
LLVMTextAPI.lib; LLVMInstrumentation.lib;
LLVMCFGuard.lib; LLVMIntelJITEvents.lib;
LLVMAggressiveInstCombine.lib; LLVMInterfaceStub.lib;
LLVMBitWriter.lib; LLVMInterpreter.lib;
LLVMCoroutines.lib; LLVMipo.lib;
LLVMObjCARCOpts.lib; LLVMIRPrinter.lib;
LLVMIntelJITEvents.lib; LLVMIRReader.lib;
LLVMJITLink.lib;
LLVMLibDriver.lib;
LLVMLineEditor.lib;
LLVMLinker.lib;
LLVMLTO.lib;
LLVMMCA.lib;
LLVMMCDisassembler.lib;
LLVMMCJIT.lib;
LLVMMC.lib;
LLVMMCParser.lib;
LLVMMIRParser.lib;
LLVMObjCARCOpts.lib;
LLVMObjCopy.lib;
LLVMObject.lib;
LLVMObjectYAML.lib;
LLVMOption.lib;
LLVMOrcJIT.lib;
LLVMOrcShared.lib;
LLVMOrcTargetProcess.lib;
LLVMPasses.lib;
LLVMProfileData.lib;
LLVMRemarks.lib;
LLVMRuntimeDyld.lib;
LLVMScalarOpts.lib;
LLVMSelectionDAG.lib;
LLVMSupport.lib;
LLVMSymbolize.lib;
LLVMTableGenGlobalISel.lib;
LLVMTableGen.lib;
LLVMTarget.lib;
LLVMTargetParser.lib;
LLVMTextAPI.lib;
LLVMTransformUtils.lib;
LLVMVectorize.lib;
LLVMWindowsDriver.lib;
LLVMWindowsManifest.lib;
LLVMX86AsmParser.lib;
LLVMX86CodeGen.lib;
LLVMX86Desc.lib;
LLVMX86Disassembler.lib;
LLVMX86Info.lib;
LLVMX86TargetMCA.lib;
LLVMXRay.lib;
</AdditionalDependencies> </AdditionalDependencies>
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemGroup /> <ItemGroup />
</Project> </Project>

2
llvm

@ -1 +1 @@
Subproject commit 9b52b6c39ae9f0759fbce7dd0db4b3290d6ebc56 Subproject commit 89d5468e9505ddb04754eadbfed526f5b6ad4cbd

View file

@ -75,6 +75,14 @@ cpu_translator::cpu_translator(llvm::Module* _module, bool is_be)
return result; return result;
} }
}); });
register_intrinsic("any_select_by_bit4", [&](llvm::CallInst* ci) -> llvm::Value*
{
const auto s = bitcast<s8[16]>(m_ir->CreateShl(bitcast<u64[2]>(ci->getOperand(0)), 3));;
const auto a = bitcast<u8[16]>(ci->getOperand(1));
const auto b = bitcast<u8[16]>(ci->getOperand(2));
return m_ir->CreateSelect(m_ir->CreateICmpSLT(s, llvm::ConstantAggregateZero::get(get_type<s8[16]>())), b, a);
});
} }
void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngine& engine) void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngine& engine)
@ -112,6 +120,8 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin
cpu == "broadwell" || cpu == "broadwell" ||
cpu == "skylake" || cpu == "skylake" ||
cpu == "alderlake" || cpu == "alderlake" ||
cpu == "raptorlake" ||
cpu == "meteorlake" ||
cpu == "bdver2" || cpu == "bdver2" ||
cpu == "bdver3" || cpu == "bdver3" ||
cpu == "bdver4" || cpu == "bdver4" ||
@ -135,7 +145,9 @@ void cpu_translator::initialize(llvm::LLVMContext& context, llvm::ExecutionEngin
// Test VNNI feature (TODO) // Test VNNI feature (TODO)
if (cpu == "cascadelake" || if (cpu == "cascadelake" ||
cpu == "cooperlake" || cpu == "cooperlake" ||
cpu == "alderlake") cpu == "alderlake" ||
cpu == "raptorlake" ||
cpu == "meteorlake")
{ {
m_use_vnni = true; m_use_vnni = true;
} }

View file

@ -19,7 +19,9 @@
#include "llvm/IR/IRBuilder.h" #include "llvm/IR/IRBuilder.h"
#include "llvm/IR/Module.h" #include "llvm/IR/Module.h"
#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetMachine.h"
#include "llvm/Support/KnownBits.h"
#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/IR/IntrinsicsX86.h" #include "llvm/IR/IntrinsicsX86.h"
#include "llvm/IR/IntrinsicsAArch64.h" #include "llvm/IR/IntrinsicsAArch64.h"
@ -59,6 +61,62 @@ concept DSLValue = requires (T& v)
{ v.eval(std::declval<llvm::IRBuilder<>*>()) } -> LLVMValue; { v.eval(std::declval<llvm::IRBuilder<>*>()) } -> LLVMValue;
}; };
template <usz N>
struct get_int_bits
{
};
template <>
struct get_int_bits<1>
{
using utype = bool;
};
template <>
struct get_int_bits<2>
{
using utype = i2;
};
template <>
struct get_int_bits<4>
{
using utype = i4;
};
template <>
struct get_int_bits<8>
{
using utype = u8;
};
template <>
struct get_int_bits<16>
{
using utype = u16;
};
template <>
struct get_int_bits<32>
{
using utype = u32;
};
template <>
struct get_int_bits<64>
{
using utype = u64;
};
template <>
struct get_int_bits<128>
{
using utype = u128;
};
template <usz Bits>
using get_int_vt = typename get_int_bits<Bits>::utype;
template <typename T = void> template <typename T = void>
struct llvm_value_t struct llvm_value_t
{ {
@ -3292,10 +3350,41 @@ public:
// Infinite-precision shift left // Infinite-precision shift left
template <typename T, typename U, typename CT = llvm_common_t<T, U>> template <typename T, typename U, typename CT = llvm_common_t<T, U>>
static auto inf_shl(T&& a, U&& b) auto inf_shl(T&& a, U&& b)
{ {
static constexpr u32 esz = llvm_value_t<CT>::esize; static constexpr u32 esz = llvm_value_t<CT>::esize;
if constexpr (esz == 32)
{
#if defined(ARCH_X64)
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx2.psllv.d", {std::forward<T>(a), std::forward<U>(b)}});
#endif
}
if constexpr (esz == 16)
{
#if defined(ARCH_X64)
if (m_use_avx512 && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx512.psllv.w.128", {std::forward<T>(a), std::forward<U>(b)}});
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
{
using t32 = value_t<u32[4]>;
auto a32 = eval(bitcast<u32[4]>(std::forward<T>(a)));
auto b32 = eval(bitcast<u32[4]>(std::forward<U>(b)));
auto sizeL = eval(b32 & 0xffff);
auto sizeH = eval(b32 >> 16);
auto dataL = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psllv.d", {a32, sizeL}});
auto dataH = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psllv.d", {eval(a32 & 0xffff0000), sizeH}});
return eval(bitcast<CT>((dataL & 0xffff) | dataH));
}
#endif
}
return eval(select(b < esz, a << b, splat<CT>(0)));
/*
return expr(select(b < esz, a << b, splat<CT>(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U> return expr(select(b < esz, a << b, splat<CT>(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
{ {
static const auto M = match<CT>(); static const auto M = match<CT>();
@ -3314,14 +3403,46 @@ public:
value = nullptr; value = nullptr;
return {}; return {};
}); });
*/
} }
// Infinite-precision logical shift right (unsigned) // Infinite-precision logical shift right (unsigned)
template <typename T, typename U, typename CT = llvm_common_t<T, U>> template <typename T, typename U, typename CT = llvm_common_t<T, U>>
static auto inf_lshr(T&& a, U&& b) auto inf_lshr(T&& a, U&& b)
{ {
static constexpr u32 esz = llvm_value_t<CT>::esize; static constexpr u32 esz = llvm_value_t<CT>::esize;
if constexpr (esz == 32)
{
#if defined(ARCH_X64)
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx2.psrlv.d", {std::forward<T>(a), std::forward<U>(b)}});
#endif
}
if constexpr (esz == 16)
{
#if defined(ARCH_X64)
if (m_use_avx512 && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx512.psrlv.w.128", {std::forward<T>(a), std::forward<U>(b)}});
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
{
using t32 = value_t<u32[4]>;
auto a32 = eval(bitcast<u32[4]>(std::forward<T>(a)));
auto b32 = eval(bitcast<u32[4]>(std::forward<U>(b)));
auto sizeL = eval(b32 & 0xffff);
auto sizeH = eval(b32 >> 16);
auto dataL = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrlv.d", {eval(a32 & 0xffff), sizeL}});
auto dataH = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrlv.d", {a32, sizeH}});
return eval(bitcast<CT>(dataL | (dataH & 0xffff0000)));
}
#endif
}
return eval(select(b < esz, a >> b, splat<CT>(0)));
/*
return expr(select(b < esz, a >> b, splat<CT>(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U> return expr(select(b < esz, a >> b, splat<CT>(0)), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
{ {
static const auto M = match<CT>(); static const auto M = match<CT>();
@ -3340,14 +3461,46 @@ public:
value = nullptr; value = nullptr;
return {}; return {};
}); });
*/
} }
// Infinite-precision arithmetic shift right (signed) // Infinite-precision arithmetic shift right (signed)
template <typename T, typename U, typename CT = llvm_common_t<T, U>> template <typename T, typename U, typename CT = llvm_common_t<T, U>>
static auto inf_ashr(T&& a, U&& b) auto inf_ashr(T&& a, U&& b)
{ {
static constexpr u32 esz = llvm_value_t<CT>::esize; static constexpr u32 esz = llvm_value_t<CT>::esize;
if constexpr (esz == 32)
{
#if defined(ARCH_X64)
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx2.psrav.d", {std::forward<T>(a), std::forward<U>(b)}});
#endif
}
if constexpr (esz == 16)
{
#if defined(ARCH_X64)
if (m_use_avx512 && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
return eval(llvm_calli<CT, T, U>{"llvm.x86.avx512.psrav.w.128", {std::forward<T>(a), std::forward<U>(b)}});
if (m_use_fma && !llvm::isa<llvm::Constant>(b.eval(m_ir)))
{
using t32 = value_t<u32[4]>;
auto a32 = eval(bitcast<u32[4]>(std::forward<T>(a)));
auto b32 = eval(bitcast<u32[4]>(std::forward<U>(b)));
auto sizeL = eval(b32 & 0xffff);
auto sizeH = eval(b32 >> 16);
auto dataL = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrav.d", {eval(a32 << 16), sizeL}});
auto dataH = eval(llvm_calli<u32[4], t32, t32>{"llvm.x86.avx2.psrav.d", {a32, sizeH}});
return eval(bitcast<CT>((dataL >> 16) | (dataH & 0xffff0000)));
}
#endif
}
return eval(a >> select(b > (esz - 1), splat<CT>(esz - 1), b));
/*
return expr(a >> select(b > (esz - 1), splat<CT>(esz - 1), b), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U> return expr(a >> select(b > (esz - 1), splat<CT>(esz - 1), b), [](llvm::Value*& value, llvm::Module* _m) -> llvm_match_tuple<T, U>
{ {
static const auto M = match<CT>(); static const auto M = match<CT>();
@ -3366,6 +3519,7 @@ public:
value = nullptr; value = nullptr;
return {}; return {};
}); });
*/
} }
template <typename... Types> template <typename... Types>
@ -3567,6 +3721,18 @@ public:
template <typename T = v128> template <typename T = v128>
llvm::Constant* make_const_vector(T, llvm::Type*, u32 = __builtin_LINE()); llvm::Constant* make_const_vector(T, llvm::Type*, u32 = __builtin_LINE());
template <typename T>
llvm::KnownBits get_known_bits(T a)
{
return llvm::computeKnownBits(a.eval(m_ir), m_module->getDataLayout());
}
template <typename T>
llvm::KnownBits kbc(T value)
{
return llvm::KnownBits::makeConstant(llvm::APInt(sizeof(T) * 8, u64(value)));
}
private: private:
// Custom intrinsic table // Custom intrinsic table
std::unordered_map<std::string_view, std::function<llvm::Value*(llvm::CallInst*)>> m_intrinsics; std::unordered_map<std::string_view, std::function<llvm::Value*(llvm::CallInst*)>> m_intrinsics;
@ -3647,6 +3813,13 @@ public:
}); });
} }
// (m << 3) >= 0 ? a : b
template <typename T, typename U, typename V>
static auto select_by_bit4(T&& m, U&& a, V&& b)
{
return llvm_calli<u8[16], T, U, V>{"any_select_by_bit4", {std::forward<T>(m), std::forward<U>(a), std::forward<V>(b)}};
}
template <typename T, typename = std::enable_if_t<std::is_same_v<llvm_common_t<T>, f32[4]>>> template <typename T, typename = std::enable_if_t<std::is_same_v<llvm_common_t<T>, f32[4]>>>
static auto fre(T&& a) static auto fre(T&& a)
{ {

View file

@ -2130,10 +2130,10 @@ static void ppu_check(ppu_thread& ppu, u64 addr)
{ {
ppu.cia = ::narrow<u32>(addr); ppu.cia = ::narrow<u32>(addr);
// ppu_check() shall not return directly
if (ppu.test_stopped()) if (ppu.test_stopped())
{ ;
return; ppu_escape(&ppu);
}
} }
static void ppu_trace(u64 addr) static void ppu_trace(u64 addr)
@ -3368,13 +3368,6 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
{ {
std::unordered_map<std::string, u64> link_table std::unordered_map<std::string, u64> link_table
{ {
{ "sys_game_watchdog_start", reinterpret_cast<u64>(ppu_execute_syscall) },
{ "sys_game_watchdog_stop", reinterpret_cast<u64>(ppu_execute_syscall) },
{ "sys_game_watchdog_clear", reinterpret_cast<u64>(ppu_execute_syscall) },
{ "sys_game_get_system_sw_version", reinterpret_cast<u64>(ppu_execute_syscall) },
{ "sys_game_board_storage_read", reinterpret_cast<u64>(ppu_execute_syscall) },
{ "sys_game_board_storage_write", reinterpret_cast<u64>(ppu_execute_syscall) },
{ "sys_game_get_rtc_status", reinterpret_cast<u64>(ppu_execute_syscall) },
{ "__trap", reinterpret_cast<u64>(&ppu_trap) }, { "__trap", reinterpret_cast<u64>(&ppu_trap) },
{ "__error", reinterpret_cast<u64>(&ppu_error) }, { "__error", reinterpret_cast<u64>(&ppu_error) },
{ "__check", reinterpret_cast<u64>(&ppu_check) }, { "__check", reinterpret_cast<u64>(&ppu_check) },
@ -3388,6 +3381,7 @@ bool ppu_initialize(const ppu_module& info, bool check_only)
{ "__dcbz", reinterpret_cast<u64>(+[](u32 addr){ alignas(64) static constexpr u8 z[128]{}; do_cell_atomic_128_store(addr, z); }) }, { "__dcbz", reinterpret_cast<u64>(+[](u32 addr){ alignas(64) static constexpr u8 z[128]{}; do_cell_atomic_128_store(addr, z); }) },
{ "__resupdate", reinterpret_cast<u64>(vm::reservation_update) }, { "__resupdate", reinterpret_cast<u64>(vm::reservation_update) },
{ "__resinterp", reinterpret_cast<u64>(ppu_reservation_fallback) }, { "__resinterp", reinterpret_cast<u64>(ppu_reservation_fallback) },
{ "__escape", reinterpret_cast<u64>(+ppu_escape) },
}; };
for (u64 index = 0; index < 1024; index++) for (u64 index = 0; index < 1024; index++)
@ -3943,12 +3937,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
std::unique_ptr<Module> _module = std::make_unique<Module>(obj_name, jit.get_context()); std::unique_ptr<Module> _module = std::make_unique<Module>(obj_name, jit.get_context());
// Initialize target // Initialize target
#if defined(__APPLE__) && defined(ARCH_ARM64)
// Force target linux on macOS arm64 to bypass some 64-bit address space linking issues
_module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple));
#else
_module->setTargetTriple(Triple::normalize(sys::getProcessTriple())); _module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
#endif
_module->setDataLayout(jit.get_engine().getTargetMachine()->createDataLayout()); _module->setDataLayout(jit.get_engine().getTargetMachine()->createDataLayout());
// Initialize translator // Initialize translator
@ -3978,6 +3967,11 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
} }
{ {
if (g_cfg.core.ppu_debug)
{
translator.build_interpreter();
}
legacy::FunctionPassManager pm(_module.get()); legacy::FunctionPassManager pm(_module.get());
// Basic optimizations // Basic optimizations

View file

@ -200,7 +200,7 @@ Function* PPUTranslator::Translate(const ppu_function& info)
// Create tail call to the check function // Create tail call to the check function
m_ir->SetInsertPoint(vcheck); m_ir->SetInsertPoint(vcheck);
Call(GetType<void>(), "__check", m_thread, GetAddr()); Call(GetType<void>(), "__check", m_thread, GetAddr())->setTailCall();
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
} }
else else
@ -604,12 +604,12 @@ Value* PPUTranslator::ReadMemory(Value* addr, Type* type, bool is_be, u32 align)
{ {
// Read, byteswap, bitcast // Read, byteswap, bitcast
const auto int_type = m_ir->getIntNTy(size); const auto int_type = m_ir->getIntNTy(size);
const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr, int_type), llvm::MaybeAlign{align}, true); const auto value = m_ir->CreateAlignedLoad(int_type, GetMemory(addr, int_type), llvm::MaybeAlign{align});
return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type); return bitcast(Call(int_type, fmt::format("llvm.bswap.i%u", size), value), type);
} }
// Read normally // Read normally
return m_ir->CreateAlignedLoad(type, GetMemory(addr, type), llvm::MaybeAlign{align}, true); return m_ir->CreateAlignedLoad(type, GetMemory(addr, type), llvm::MaybeAlign{align});
} }
void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align) void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align)
@ -625,7 +625,7 @@ void PPUTranslator::WriteMemory(Value* addr, Value* value, bool is_be, u32 align
} }
// Write // Write
m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), llvm::MaybeAlign{align}, true); m_ir->CreateAlignedStore(value, GetMemory(addr, value->getType()), llvm::MaybeAlign{align});
} }
void PPUTranslator::CompilationError(const std::string& error) void PPUTranslator::CompilationError(const std::string& error)
@ -1945,12 +1945,14 @@ void PPUTranslator::SC(ppu_opcode_t op)
if (index < 1024) if (index < 1024)
{ {
Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread); Call(GetType<void>(), fmt::format("%s", ppu_syscall_code(index)), m_thread);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
return; return;
} }
} }
Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num); Call(GetType<void>(), op.lev ? "__lv1call" : "__syscall", m_thread, num);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
} }
@ -2507,6 +2509,7 @@ void PPUTranslator::LWARX(ppu_opcode_t op)
RegStore(Trunc(GetAddr()), m_cia); RegStore(Trunc(GetAddr()), m_cia);
FlushRegisters(); FlushRegisters();
Call(GetType<void>(), "__resinterp", m_thread); Call(GetType<void>(), "__resinterp", m_thread);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
return; return;
} }
@ -2649,6 +2652,7 @@ void PPUTranslator::LDARX(ppu_opcode_t op)
RegStore(Trunc(GetAddr()), m_cia); RegStore(Trunc(GetAddr()), m_cia);
FlushRegisters(); FlushRegisters();
Call(GetType<void>(), "__resinterp", m_thread); Call(GetType<void>(), "__resinterp", m_thread);
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
return; return;
} }
@ -2786,11 +2790,7 @@ void PPUTranslator::MTOCRF(ppu_opcode_t op)
const auto index = m_ir->CreateAnd(m_ir->CreateLShr(value, 28 - i * 4), 15); const auto index = m_ir->CreateAnd(m_ir->CreateLShr(value, 28 - i * 4), 15);
const auto src = m_ir->CreateGEP(dyn_cast<GlobalVariable>(m_mtocr_table)->getValueType(), m_mtocr_table, {m_ir->getInt32(0), m_ir->CreateShl(index, 2)}); const auto src = m_ir->CreateGEP(dyn_cast<GlobalVariable>(m_mtocr_table)->getValueType(), m_mtocr_table, {m_ir->getInt32(0), m_ir->CreateShl(index, 2)});
const auto dst = bitcast(m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(m_cr - m_locals) + i * 4), GetType<u8*>()); const auto dst = bitcast(m_ir->CreateStructGEP(m_thread_type, m_thread, static_cast<uint>(m_cr - m_locals) + i * 4), GetType<u8*>());
#if LLVM_VERSION_MAJOR < 15
Call(GetType<void>(), "llvm.memcpy.p0i8.p0i8.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse());
#else
Call(GetType<void>(), "llvm.memcpy.p0.p0.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse()); Call(GetType<void>(), "llvm.memcpy.p0.p0.i32", dst, src, m_ir->getInt32(4), m_ir->getFalse());
#endif
} }
} }
} }
@ -3313,7 +3313,7 @@ void PPUTranslator::STVLX(ppu_opcode_t op)
const auto mask = bitcast<bool[16]>(splat<u16>(0xffff) << trunc<u16>(value<u64>(addr) & 0xf)); const auto mask = bitcast<bool[16]>(splat<u16>(0xffff) << trunc<u16>(value<u64>(addr) & 0xf));
const auto ptr = value<u8(*)[16]>(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>())); const auto ptr = value<u8(*)[16]>(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>()));
const auto align = splat<u32>(16); const auto align = splat<u32>(16);
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0v16i8", {data, ptr, align, mask}}); eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0", {data, ptr, align, mask}});
} }
void PPUTranslator::STDBRX(ppu_opcode_t op) void PPUTranslator::STDBRX(ppu_opcode_t op)
@ -3343,7 +3343,7 @@ void PPUTranslator::STVRX(ppu_opcode_t op)
const auto mask = bitcast<bool[16]>(trunc<u16>(splat<u64>(0xffff) << (value<u64>(addr) & 0xf) >> 16)); const auto mask = bitcast<bool[16]>(trunc<u16>(splat<u64>(0xffff) << (value<u64>(addr) & 0xf) >> 16));
const auto ptr = value<u8(*)[16]>(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>())); const auto ptr = value<u8(*)[16]>(GetMemory(m_ir->CreateAnd(addr, ~0xfull), GetType<u8[16]>()));
const auto align = splat<u32>(16); const auto align = splat<u32>(16);
eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0v16i8", {data, ptr, align, mask}}); eval(llvm_calli<void, decltype(data), decltype(ptr), decltype(align), decltype(mask)>{"llvm.masked.store.v16i8.p0", {data, ptr, align, mask}});
} }
void PPUTranslator::STFSUX(ppu_opcode_t op) void PPUTranslator::STFSUX(ppu_opcode_t op)
@ -3524,7 +3524,7 @@ void PPUTranslator::DCBZ(ppu_opcode_t op)
} }
else else
{ {
Call(GetType<void>(), "llvm.memset.p0i8.i32", GetMemory(addr, GetType<u8>()), m_ir->getInt8(0), m_ir->getInt32(128), m_ir->getTrue()); Call(GetType<void>(), "llvm.memset.p0.i32", GetMemory(addr, GetType<u8>()), m_ir->getInt8(0), m_ir->getInt32(128), m_ir->getFalse());
} }
} }
@ -4601,6 +4601,7 @@ void PPUTranslator::UNK(ppu_opcode_t op)
{ {
FlushRegisters(); FlushRegisters();
Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode)); Call(GetType<void>(), "__error", m_thread, GetAddr(), m_ir->getInt32(op.opcode));
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
} }
@ -4862,6 +4863,7 @@ Value* PPUTranslator::CheckTrapCondition(u32 to, Value* left, Value* right)
void PPUTranslator::Trap() void PPUTranslator::Trap()
{ {
Call(GetType<void>(), "__trap", m_thread, GetAddr()); Call(GetType<void>(), "__trap", m_thread, GetAddr());
//Call(GetType<void>(), "__escape", m_thread)->setTailCall();
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
} }
@ -4909,4 +4911,184 @@ MDNode* PPUTranslator::CheckBranchProbability(u32 bo)
return nullptr; return nullptr;
} }
void PPUTranslator::build_interpreter()
{
#define BUILD_VEC_INST(i) { \
m_function = llvm::cast<llvm::Function>(m_module->getOrInsertFunction("op_" #i, get_type<void>(), m_thread_type->getPointerTo()).getCallee()); \
std::fill(std::begin(m_globals), std::end(m_globals), nullptr); \
std::fill(std::begin(m_locals), std::end(m_locals), nullptr); \
IRBuilder<> irb(BasicBlock::Create(m_context, "__entry", m_function)); \
m_ir = &irb; \
m_thread = m_function->getArg(0); \
ppu_opcode_t op{}; \
op.vd = 0; \
op.va = 1; \
op.vb = 2; \
op.vc = 3; \
this->i(op); \
FlushRegisters(); \
m_ir->CreateRetVoid(); \
replace_intrinsics(*m_function); \
}
BUILD_VEC_INST(VADDCUW);
BUILD_VEC_INST(VADDFP);
BUILD_VEC_INST(VADDSBS);
BUILD_VEC_INST(VADDSHS);
BUILD_VEC_INST(VADDSWS);
BUILD_VEC_INST(VADDUBM);
BUILD_VEC_INST(VADDUBS);
BUILD_VEC_INST(VADDUHM);
BUILD_VEC_INST(VADDUHS);
BUILD_VEC_INST(VADDUWM);
BUILD_VEC_INST(VADDUWS);
BUILD_VEC_INST(VAND);
BUILD_VEC_INST(VANDC);
BUILD_VEC_INST(VAVGSB);
BUILD_VEC_INST(VAVGSH);
BUILD_VEC_INST(VAVGSW);
BUILD_VEC_INST(VAVGUB);
BUILD_VEC_INST(VAVGUH);
BUILD_VEC_INST(VAVGUW);
BUILD_VEC_INST(VCFSX);
BUILD_VEC_INST(VCFUX);
BUILD_VEC_INST(VCMPBFP);
BUILD_VEC_INST(VCMPBFP_);
BUILD_VEC_INST(VCMPEQFP);
BUILD_VEC_INST(VCMPEQFP_);
BUILD_VEC_INST(VCMPEQUB);
BUILD_VEC_INST(VCMPEQUB_);
BUILD_VEC_INST(VCMPEQUH);
BUILD_VEC_INST(VCMPEQUH_);
BUILD_VEC_INST(VCMPEQUW);
BUILD_VEC_INST(VCMPEQUW_);
BUILD_VEC_INST(VCMPGEFP);
BUILD_VEC_INST(VCMPGEFP_);
BUILD_VEC_INST(VCMPGTFP);
BUILD_VEC_INST(VCMPGTFP_);
BUILD_VEC_INST(VCMPGTSB);
BUILD_VEC_INST(VCMPGTSB_);
BUILD_VEC_INST(VCMPGTSH);
BUILD_VEC_INST(VCMPGTSH_);
BUILD_VEC_INST(VCMPGTSW);
BUILD_VEC_INST(VCMPGTSW_);
BUILD_VEC_INST(VCMPGTUB);
BUILD_VEC_INST(VCMPGTUB_);
BUILD_VEC_INST(VCMPGTUH);
BUILD_VEC_INST(VCMPGTUH_);
BUILD_VEC_INST(VCMPGTUW);
BUILD_VEC_INST(VCMPGTUW_);
BUILD_VEC_INST(VCTSXS);
BUILD_VEC_INST(VCTUXS);
BUILD_VEC_INST(VEXPTEFP);
BUILD_VEC_INST(VLOGEFP);
BUILD_VEC_INST(VMADDFP);
BUILD_VEC_INST(VMAXFP);
BUILD_VEC_INST(VMAXSB);
BUILD_VEC_INST(VMAXSH);
BUILD_VEC_INST(VMAXSW);
BUILD_VEC_INST(VMAXUB);
BUILD_VEC_INST(VMAXUH);
BUILD_VEC_INST(VMAXUW);
BUILD_VEC_INST(VMHADDSHS);
BUILD_VEC_INST(VMHRADDSHS);
BUILD_VEC_INST(VMINFP);
BUILD_VEC_INST(VMINSB);
BUILD_VEC_INST(VMINSH);
BUILD_VEC_INST(VMINSW);
BUILD_VEC_INST(VMINUB);
BUILD_VEC_INST(VMINUH);
BUILD_VEC_INST(VMINUW);
BUILD_VEC_INST(VMLADDUHM);
BUILD_VEC_INST(VMRGHB);
BUILD_VEC_INST(VMRGHH);
BUILD_VEC_INST(VMRGHW);
BUILD_VEC_INST(VMRGLB);
BUILD_VEC_INST(VMRGLH);
BUILD_VEC_INST(VMRGLW);
BUILD_VEC_INST(VMSUMMBM);
BUILD_VEC_INST(VMSUMSHM);
BUILD_VEC_INST(VMSUMSHS);
BUILD_VEC_INST(VMSUMUBM);
BUILD_VEC_INST(VMSUMUHM);
BUILD_VEC_INST(VMSUMUHS);
BUILD_VEC_INST(VMULESB);
BUILD_VEC_INST(VMULESH);
BUILD_VEC_INST(VMULEUB);
BUILD_VEC_INST(VMULEUH);
BUILD_VEC_INST(VMULOSB);
BUILD_VEC_INST(VMULOSH);
BUILD_VEC_INST(VMULOUB);
BUILD_VEC_INST(VMULOUH);
BUILD_VEC_INST(VNMSUBFP);
BUILD_VEC_INST(VNOR);
BUILD_VEC_INST(VOR);
BUILD_VEC_INST(VPERM);
BUILD_VEC_INST(VPKPX);
BUILD_VEC_INST(VPKSHSS);
BUILD_VEC_INST(VPKSHUS);
BUILD_VEC_INST(VPKSWSS);
BUILD_VEC_INST(VPKSWUS);
BUILD_VEC_INST(VPKUHUM);
BUILD_VEC_INST(VPKUHUS);
BUILD_VEC_INST(VPKUWUM);
BUILD_VEC_INST(VPKUWUS);
BUILD_VEC_INST(VREFP);
BUILD_VEC_INST(VRFIM);
BUILD_VEC_INST(VRFIN);
BUILD_VEC_INST(VRFIP);
BUILD_VEC_INST(VRFIZ);
BUILD_VEC_INST(VRLB);
BUILD_VEC_INST(VRLH);
BUILD_VEC_INST(VRLW);
BUILD_VEC_INST(VRSQRTEFP);
BUILD_VEC_INST(VSEL);
BUILD_VEC_INST(VSL);
BUILD_VEC_INST(VSLB);
BUILD_VEC_INST(VSLDOI);
BUILD_VEC_INST(VSLH);
BUILD_VEC_INST(VSLO);
BUILD_VEC_INST(VSLW);
BUILD_VEC_INST(VSPLTB);
BUILD_VEC_INST(VSPLTH);
BUILD_VEC_INST(VSPLTISB);
BUILD_VEC_INST(VSPLTISH);
BUILD_VEC_INST(VSPLTISW);
BUILD_VEC_INST(VSPLTW);
BUILD_VEC_INST(VSR);
BUILD_VEC_INST(VSRAB);
BUILD_VEC_INST(VSRAH);
BUILD_VEC_INST(VSRAW);
BUILD_VEC_INST(VSRB);
BUILD_VEC_INST(VSRH);
BUILD_VEC_INST(VSRO);
BUILD_VEC_INST(VSRW);
BUILD_VEC_INST(VSUBCUW);
BUILD_VEC_INST(VSUBFP);
BUILD_VEC_INST(VSUBSBS);
BUILD_VEC_INST(VSUBSHS);
BUILD_VEC_INST(VSUBSWS);
BUILD_VEC_INST(VSUBUBM);
BUILD_VEC_INST(VSUBUBS);
BUILD_VEC_INST(VSUBUHM);
BUILD_VEC_INST(VSUBUHS);
BUILD_VEC_INST(VSUBUWM);
BUILD_VEC_INST(VSUBUWS);
BUILD_VEC_INST(VSUMSWS);
BUILD_VEC_INST(VSUM2SWS);
BUILD_VEC_INST(VSUM4SBS);
BUILD_VEC_INST(VSUM4SHS);
BUILD_VEC_INST(VSUM4UBS);
BUILD_VEC_INST(VUPKHPX);
BUILD_VEC_INST(VUPKHSB);
BUILD_VEC_INST(VUPKHSH);
BUILD_VEC_INST(VUPKLPX);
BUILD_VEC_INST(VUPKLSB);
BUILD_VEC_INST(VUPKLSH);
BUILD_VEC_INST(VXOR);
#undef BUILD_VEC_INST
}
#endif #endif

View file

@ -856,6 +856,8 @@ public:
void FCTID_(ppu_opcode_t op) { return FCTID(op); } void FCTID_(ppu_opcode_t op) { return FCTID(op); }
void FCTIDZ_(ppu_opcode_t op) { return FCTIDZ(op); } void FCTIDZ_(ppu_opcode_t op) { return FCTIDZ(op); }
void FCFID_(ppu_opcode_t op) { return FCFID(op); } void FCFID_(ppu_opcode_t op) { return FCFID(op); }
void build_interpreter();
}; };
#endif #endif

View file

@ -3914,6 +3914,7 @@ void spu_recompiler_base::dump(const spu_program& result, std::string& out)
#if LLVM_VERSION_MAJOR < 17 #if LLVM_VERSION_MAJOR < 17
#include "llvm/ADT/Triple.h" #include "llvm/ADT/Triple.h"
#endif #endif
#include "llvm/Support/Host.h"
#include "llvm/IR/LegacyPassManager.h" #include "llvm/IR/LegacyPassManager.h"
#include "llvm/IR/Verifier.h" #include "llvm/IR/Verifier.h"
#include "llvm/IR/InlineAsm.h" #include "llvm/IR/InlineAsm.h"
@ -5006,7 +5007,11 @@ public:
// Create LLVM module // Create LLVM module
std::unique_ptr<Module> _module = std::make_unique<Module>(m_hash + ".obj", m_context); std::unique_ptr<Module> _module = std::make_unique<Module>(m_hash + ".obj", m_context);
_module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple)); #if defined(_WIN32) && defined(ARCH_X64)
_module->setTargetTriple(Triple::normalize("x86_64-unknown-linux-gnu"));
#else
_module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
#endif
_module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout()); _module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout());
m_module = _module.get(); m_module = _module.get();
@ -5227,6 +5232,7 @@ public:
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
m_ir->SetInsertPoint(label_stop); m_ir->SetInsertPoint(label_stop);
call("spu_escape", spu_runtime::g_escape, m_thread)->setTailCall();
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
m_ir->SetInsertPoint(label_diff); m_ir->SetInsertPoint(label_diff);
@ -5681,7 +5687,11 @@ public:
// Create LLVM module // Create LLVM module
std::unique_ptr<Module> _module = std::make_unique<Module>("spu_interpreter.obj", m_context); std::unique_ptr<Module> _module = std::make_unique<Module>("spu_interpreter.obj", m_context);
_module->setTargetTriple(Triple::normalize(utils::c_llvm_default_triple)); #if defined(_WIN32) && defined(ARCH_X64)
_module->setTargetTriple(Triple::normalize("x86_64-unknown-linux-gnu"));
#else
_module->setTargetTriple(Triple::normalize(sys::getProcessTriple()));
#endif
_module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout()); _module->setDataLayout(m_jit.get_engine().getTargetMachine()->createDataLayout());
m_module = _module.get(); m_module = _module.get();
@ -5982,7 +5992,8 @@ public:
ncall->setTailCall(); ncall->setTailCall();
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
m_ir->SetInsertPoint(_stop); m_ir->SetInsertPoint(_stop);
m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc)); m_ir->CreateStore(m_interp_pc, spu_ptr<u32>(&spu_thread::pc), true);
call("spu_escape", spu_runtime::g_escape, m_thread)->setTailCall();
m_ir->CreateRetVoid(); m_ir->CreateRetVoid();
} }
} }
@ -6355,7 +6366,7 @@ public:
llvm::Value* get_rchcnt(u32 off, u64 inv = 0) llvm::Value* get_rchcnt(u32 off, u64 inv = 0)
{ {
const auto val = m_ir->CreateLoad(get_type<u64>(), _ptr<u64>(m_thread, off), true); const auto val = m_ir->CreateLoad(get_type<u64>(), _ptr<u64>(m_thread, off));
const auto shv = m_ir->CreateLShr(val, spu_channel::off_count); const auto shv = m_ir->CreateLShr(val, spu_channel::off_count);
return m_ir->CreateTrunc(m_ir->CreateXor(shv, u64{inv}), get_type<u32>()); return m_ir->CreateTrunc(m_ir->CreateXor(shv, u64{inv}), get_type<u32>());
} }
@ -6415,20 +6426,20 @@ public:
} }
case MFC_Cmd: case MFC_Cmd:
{ {
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_size), true); res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::mfc_size));
res.value = m_ir->CreateSub(m_ir->getInt32(16), res.value); res.value = m_ir->CreateSub(m_ir->getInt32(16), res.value);
break; break;
} }
case SPU_RdInMbox: case SPU_RdInMbox:
{ {
res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_in_mbox), true); res.value = m_ir->CreateLoad(get_type<u32>(), spu_ptr<u32>(&spu_thread::ch_in_mbox));
res.value = m_ir->CreateLShr(res.value, 8); res.value = m_ir->CreateLShr(res.value, 8);
res.value = m_ir->CreateAnd(res.value, 7); res.value = m_ir->CreateAnd(res.value, 7);
break; break;
} }
case SPU_RdEventStat: case SPU_RdEventStat:
{ {
const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_events), true), 32), get_type<u32>()); const auto mask = m_ir->CreateTrunc(m_ir->CreateLShr(m_ir->CreateLoad(get_type<u64>(), spu_ptr<u64>(&spu_thread::ch_events)), 32), get_type<u32>());
res.value = call("spu_get_events", &exec_get_events, m_thread, mask); res.value = call("spu_get_events", &exec_get_events, m_thread, mask);
break; break;
} }
@ -6815,7 +6826,7 @@ public:
if (csize > 0 && csize <= 16) if (csize > 0 && csize <= 16)
{ {
// Generate single copy operation // Generate single copy operation
m_ir->CreateStore(m_ir->CreateLoad(vtype, m_ir->CreateBitCast(src, vtype->getPointerTo()), true), m_ir->CreateBitCast(dst, vtype->getPointerTo()), true); m_ir->CreateStore(m_ir->CreateLoad(vtype, m_ir->CreateBitCast(src, vtype->getPointerTo())), m_ir->CreateBitCast(dst, vtype->getPointerTo()));
} }
else if (csize <= stride * 16 && !(csize % 32)) else if (csize <= stride * 16 && !(csize % 32))
{ {
@ -6826,7 +6837,7 @@ public:
const auto _dst = m_ir->CreateGEP(get_type<u8>(), dst, m_ir->getInt32(i)); const auto _dst = m_ir->CreateGEP(get_type<u8>(), dst, m_ir->getInt32(i));
if (csize - i < stride) if (csize - i < stride)
{ {
m_ir->CreateStore(m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(_src, get_type<u8(*)[16]>()), true), m_ir->CreateBitCast(_dst, get_type<u8(*)[16]>()), true); m_ir->CreateStore(m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(_src, get_type<u8(*)[16]>())), m_ir->CreateBitCast(_dst, get_type<u8(*)[16]>()));
} }
else else
{ {
@ -7086,18 +7097,51 @@ public:
void ROTM(spu_opcode_t op) void ROTM(spu_opcode_t op)
{ {
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb); const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
set_vr(op.rt, inf_lshr(a, -b & 63));
auto minusb = eval(-b);
if (auto [ok, x] = match_expr(b, -match<u32[4]>()); ok)
{
minusb = eval(x);
}
if (auto k = get_known_bits(minusb); (k & kbc<u32>(32)).isZero())
{
set_vr(op.rt, a >> (minusb & 31));
return;
}
set_vr(op.rt, inf_lshr(a, minusb & 63));
} }
void ROTMA(spu_opcode_t op) void ROTMA(spu_opcode_t op)
{ {
const auto [a, b] = get_vrs<s32[4]>(op.ra, op.rb); const auto [a, b] = get_vrs<s32[4]>(op.ra, op.rb);
set_vr(op.rt, inf_ashr(a, -b & 63));
auto minusb = eval(-b);
if (auto [ok, x] = match_expr(b, -match<s32[4]>()); ok)
{
minusb = eval(x);
}
if (auto k = get_known_bits(minusb); (k & kbc<u32>(32)).isZero())
{
set_vr(op.rt, a >> (minusb & 31));
return;
}
set_vr(op.rt, inf_ashr(a, minusb & 63));
} }
void SHL(spu_opcode_t op) void SHL(spu_opcode_t op)
{ {
const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb); const auto [a, b] = get_vrs<u32[4]>(op.ra, op.rb);
if (auto k = get_known_bits(b); (k & kbc<u32>(32)).isZero())
{
set_vr(op.rt, a << (b & 31));
return;
}
set_vr(op.rt, inf_shl(a, b & 63)); set_vr(op.rt, inf_shl(a, b & 63));
} }
@ -7110,18 +7154,51 @@ public:
void ROTHM(spu_opcode_t op) void ROTHM(spu_opcode_t op)
{ {
const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb); const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb);
set_vr(op.rt, inf_lshr(a, -b & 31));
auto minusb = eval(-b);
if (auto [ok, x] = match_expr(b, -match<u16[8]>()); ok)
{
minusb = eval(x);
}
if (auto k = get_known_bits(minusb); (k & kbc<u16>(16)).isZero())
{
set_vr(op.rt, a >> (minusb & 15));
return;
}
set_vr(op.rt, inf_lshr(a, minusb & 31));
} }
void ROTMAH(spu_opcode_t op) void ROTMAH(spu_opcode_t op)
{ {
const auto [a, b] = get_vrs<s16[8]>(op.ra, op.rb); const auto [a, b] = get_vrs<s16[8]>(op.ra, op.rb);
set_vr(op.rt, inf_ashr(a, -b & 31));
auto minusb = eval(-b);
if (auto [ok, x] = match_expr(b, -match<s16[8]>()); ok)
{
minusb = eval(x);
}
if (auto k = get_known_bits(minusb); (k & kbc<u16>(16)).isZero())
{
set_vr(op.rt, a >> (minusb & 15));
return;
}
set_vr(op.rt, inf_ashr(a, minusb & 31));
} }
void SHLH(spu_opcode_t op) void SHLH(spu_opcode_t op)
{ {
const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb); const auto [a, b] = get_vrs<u16[8]>(op.ra, op.rb);
if (auto k = get_known_bits(b); (k & kbc<u16>(16)).isZero())
{
set_vr(op.rt, a << (b & 15));
return;
}
set_vr(op.rt, inf_shl(a, b & 31)); set_vr(op.rt, inf_shl(a, b & 31));
} }
@ -8093,6 +8170,12 @@ public:
} }
} }
if (auto [ok, y] = match_expr(x, bitcast<bool[std::extent_v<VT>]>(match<get_int_vt<std::extent_v<VT>>>())); ok)
{
// Don't ruin FSMB/FSM/FSMH instructions
return false;
}
set_vr(op.rt4, select(x, get_vr<VT>(op.rb), get_vr<VT>(op.ra))); set_vr(op.rt4, select(x, get_vr<VT>(op.rb), get_vr<VT>(op.ra)));
return true; return true;
} }
@ -8337,9 +8420,9 @@ public:
const auto bx = pshufb(bs, c); const auto bx = pshufb(bs, c);
if (perm_only) if (perm_only)
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, bx)); set_vr(op.rt4, select_by_bit4(c, ax, bx));
else else
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, bx) | x); set_vr(op.rt4, select_by_bit4(c, ax, bx) | x);
return; return;
} }
@ -8352,9 +8435,9 @@ public:
const auto ax = pshufb(as, c); const auto ax = pshufb(as, c);
if (perm_only) if (perm_only)
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, b)); set_vr(op.rt4, select_by_bit4(c, ax, b));
else else
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, ax, b) | x); set_vr(op.rt4, select_by_bit4(c, ax, b) | x);
return; return;
} }
} }
@ -8371,9 +8454,9 @@ public:
const auto bx = pshufb(bs, c); const auto bx = pshufb(bs, c);
if (perm_only) if (perm_only)
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, a, bx)); set_vr(op.rt4, select_by_bit4(c, a, bx));
else else
set_vr(op.rt4, select(noncast<s8[16]>(c << 3) >= 0, a, bx) | x); set_vr(op.rt4, select_by_bit4(c, a, bx) | x);
return; return;
} }
} }
@ -8401,9 +8484,9 @@ public:
const auto bx = pshufb(b, cr); const auto bx = pshufb(b, cr);
if (perm_only) if (perm_only)
set_vr(op.rt4, select(noncast<s8[16]>(cr << 3) >= 0, ax, bx)); set_vr(op.rt4, select_by_bit4(cr, ax, bx));
else else
set_vr(op.rt4, select(noncast<s8[16]>(cr << 3) >= 0, ax, bx) | x); set_vr(op.rt4, select_by_bit4(cr, ax, bx) | x);
} }
void MPYA(spu_opcode_t op) void MPYA(spu_opcode_t op)
@ -9611,13 +9694,13 @@ public:
void make_store_ls(value_t<u64> addr, value_t<u8[16]> data) void make_store_ls(value_t<u64> addr, value_t<u8[16]> data)
{ {
const auto bswapped = byteswap(data); const auto bswapped = byteswap(data);
m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()), true); m_ir->CreateStore(bswapped.eval(m_ir), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()));
} }
auto make_load_ls(value_t<u64> addr) auto make_load_ls(value_t<u64> addr)
{ {
value_t<u8[16]> data; value_t<u8[16]> data;
data.value = m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()), true); data.value = m_ir->CreateLoad(get_type<u8[16]>(), m_ir->CreateBitCast(m_ir->CreateGEP(get_type<u8>(), m_lsptr, addr.value), get_type<u8(*)[16]>()));
return byteswap(data); return byteswap(data);
} }
@ -9839,7 +9922,7 @@ public:
target->addIncoming(e_addr, e_exec); target->addIncoming(e_addr, e_exec);
m_ir->CreateCondBr(get_imm<bool>(op.d).value, d_exec, d_done, m_md_unlikely); m_ir->CreateCondBr(get_imm<bool>(op.d).value, d_exec, d_done, m_md_unlikely);
m_ir->SetInsertPoint(d_exec); m_ir->SetInsertPoint(d_exec);
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled))->setVolatile(true); m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled));
m_ir->CreateBr(d_done); m_ir->CreateBr(d_done);
m_ir->SetInsertPoint(d_done); m_ir->SetInsertPoint(d_done);
m_ir->CreateBr(m_interp_bblock); m_ir->CreateBr(m_interp_bblock);
@ -9890,7 +9973,7 @@ public:
if (op.d) if (op.d)
{ {
m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled))->setVolatile(true); m_ir->CreateStore(m_ir->getFalse(), spu_ptr<bool>(&spu_thread::interrupts_enabled));
} }
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc)); m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
@ -10211,7 +10294,7 @@ public:
// Exit function on unexpected target // Exit function on unexpected target
m_ir->SetInsertPoint(sw->getDefaultDest()); m_ir->SetInsertPoint(sw->getDefaultDest());
m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc), true); m_ir->CreateStore(addr.value, spu_ptr<u32>(&spu_thread::pc));
if (m_finfo && m_finfo->fn) if (m_finfo && m_finfo->fn)
{ {

View file

@ -136,7 +136,7 @@ namespace psf
{ {
std::string_view value{value_array, CharN}; std::string_view value{value_array, CharN};
value = value.substr(0, std::min<usz>(value.find_first_of('\0'), value.size())); value = value.substr(0, std::min<usz>(value.find_first_of('\0'), value.size()));
return string(CharN, value, allow_truncate); return string(max_size, value, allow_truncate);
} }
// Make array entry // Make array entry

View file

@ -71,16 +71,4 @@ namespace utils
u64 _get_main_tid(); u64 _get_main_tid();
inline const u64 main_tid = _get_main_tid(); inline const u64 main_tid = _get_main_tid();
#ifdef LLVM_AVAILABLE
#if defined(ARCH_X64)
const std::string c_llvm_default_triple = "x86_64-unknown-linux-gnu";
#elif defined(ARCH_ARM64)
const std::string c_llvm_default_triple = "arm64-unknown-linux-gnu";
#else
const std::string c_llvm_default_triple = "Unimplemented!"
#endif
#endif
} }

View file

@ -115,7 +115,7 @@ namespace std
} }
#endif #endif
#if defined(__INTELLISENSE__) #if defined(__INTELLISENSE__) || (defined (__clang__) && (__clang_major__ <= 16))
#define consteval constexpr #define consteval constexpr
#define constinit #define constinit
#endif #endif