Use LLVM 9

Use new add/sub with saturation intrinsics
This commit is contained in:
Nekotekina 2019-03-29 16:35:00 +03:00
parent 7e0b941e9f
commit d873802b9c
9 changed files with 148 additions and 92 deletions

8
3rdparty/llvm.cmake vendored
View file

@ -26,7 +26,7 @@ if(WITH_LLVM)
set(CMAKE_CXX_FLAGS ${CXX_FLAGS_OLD})
# now tries to find LLVM again
find_package(LLVM 8.0 CONFIG)
find_package(LLVM 9.0 CONFIG)
if(NOT LLVM_FOUND)
message(FATAL_ERROR "Couldn't build LLVM from the submodule. You might need to run `git submodule update --init`")
endif()
@ -39,11 +39,11 @@ if(WITH_LLVM)
set(LLVM_DIR ${CMAKE_SOURCE_DIR}/${LLVM_DIR})
endif()
find_package(LLVM 8.0 CONFIG)
find_package(LLVM 9.0 CONFIG)
if (NOT LLVM_FOUND)
if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 8)
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 8.0. \
if (LLVM_VERSION AND LLVM_VERSION_MAJOR LESS 9)
message(FATAL_ERROR "Found LLVM version ${LLVM_VERSION}. Required version 9.0. \
Enable BUILD_LLVM_SUBMODULE option to build LLVM from included as a git submodule.")
endif()

View file

@ -570,7 +570,7 @@ struct EventListener : llvm::JITEventListener
{
}
void NotifyObjectEmitted(const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override
void notifyObjectLoaded(ObjectKey K, const llvm::object::ObjectFile& obj, const llvm::RuntimeDyld::LoadedObjectInfo& inf) override
{
#ifdef _WIN32
for (auto it = obj.section_begin(), end = obj.section_end(); it != end; ++it)

2
llvm

@ -1 +1 @@
Subproject commit 5c906fd1694e3c8f0b9548581d275ef01dc0972a
Subproject commit 71ca0f4f293dbfda4e73cc17ae5e60a9070e43a0

View file

@ -992,13 +992,6 @@ public:
return (b ^ s) & ~(a ^ b);
}
// Get signed subtraction overflow into the sign bit (d = a - b)
template <typename T>
static inline auto sborrow(T a, T b, T d)
{
return (a ^ b) & (a ^ d);
}
// Bitwise select (c ? a : b)
template <typename T>
static inline auto merge(T c, T a, T b)
@ -1014,12 +1007,96 @@ public:
return a << (b & mask) | a >> (-b & mask);
}
// Rotate left
// Add with saturation
template <typename T>
static inline auto rol(T a, u64 b)
inline auto add_sat(T a, T b)
{
static constexpr u64 mask = value_t<typename T::type>::esize - 1;
return a << (b & mask) | a >> ((0 - b) & mask);
value_t<typename T::type> result;
const auto eva = a.eval(m_ir);
const auto evb = b.eval(m_ir);
// Compute constant result immediately if possible
if (llvm::isa<llvm::Constant>(eva) && llvm::isa<llvm::Constant>(evb))
{
static_assert(result.is_sint || result.is_uint);
if constexpr (result.is_sint)
{
llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2);
if constexpr (result.is_vector != 0)
cast_to = llvm::VectorType::get(cast_to, result.is_vector);
const auto axt = m_ir->CreateSExt(eva, cast_to);
const auto bxt = m_ir->CreateSExt(evb, cast_to);
result.value = m_ir->CreateAdd(axt, bxt);
const auto _max = m_ir->getInt(llvm::APInt::getSignedMaxValue(result.esize * 2).ashr(result.esize));
const auto _min = m_ir->getInt(llvm::APInt::getSignedMinValue(result.esize * 2).ashr(result.esize));
const auto smax = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max;
const auto smin = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min;
result.value = m_ir->CreateSelect(m_ir->CreateICmpSGT(result.value, smax), smax, result.value);
result.value = m_ir->CreateSelect(m_ir->CreateICmpSLT(result.value, smin), smin, result.value);
result.value = m_ir->CreateTrunc(result.value, result.get_type(m_context));
}
else
{
const auto _max = m_ir->getInt(llvm::APInt::getMaxValue(result.esize));
const auto ones = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max;
result.value = m_ir->CreateAdd(eva, evb);
result.value = m_ir->CreateSelect(m_ir->CreateICmpULT(result.value, eva), ones, result.value);
}
}
else
{
result.value = m_ir->CreateCall(get_intrinsic<typename T::type>(result.is_sint ? llvm::Intrinsic::sadd_sat : llvm::Intrinsic::uadd_sat), {eva, evb});
}
return result;
}
// Subtract with saturation
template <typename T>
inline auto sub_sat(T a, T b)
{
value_t<typename T::type> result;
const auto eva = a.eval(m_ir);
const auto evb = b.eval(m_ir);
// Compute constant result immediately if possible
if (llvm::isa<llvm::Constant>(eva) && llvm::isa<llvm::Constant>(evb))
{
static_assert(result.is_sint || result.is_uint);
if constexpr (result.is_sint)
{
llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2);
if constexpr (result.is_vector != 0)
cast_to = llvm::VectorType::get(cast_to, result.is_vector);
const auto axt = m_ir->CreateSExt(eva, cast_to);
const auto bxt = m_ir->CreateSExt(evb, cast_to);
result.value = m_ir->CreateSub(axt, bxt);
const auto _max = m_ir->getInt(llvm::APInt::getSignedMaxValue(result.esize * 2).ashr(result.esize));
const auto _min = m_ir->getInt(llvm::APInt::getSignedMinValue(result.esize * 2).ashr(result.esize));
const auto smax = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _max) : _max;
const auto smin = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min;
result.value = m_ir->CreateSelect(m_ir->CreateICmpSGT(result.value, smax), smax, result.value);
result.value = m_ir->CreateSelect(m_ir->CreateICmpSLT(result.value, smin), smin, result.value);
result.value = m_ir->CreateTrunc(result.value, result.get_type(m_context));
}
else
{
const auto _min = m_ir->getInt(llvm::APInt::getMinValue(result.esize));
const auto zero = result.is_vector != 0 ? llvm::ConstantVector::getSplat(result.is_vector, _min) : _min;
result.value = m_ir->CreateSub(eva, evb);
result.value = m_ir->CreateSelect(m_ir->CreateICmpULT(eva, evb), zero, result.value);
}
}
else
{
result.value = m_ir->CreateCall(get_intrinsic<typename T::type>(result.is_sint ? llvm::Intrinsic::ssub_sat : llvm::Intrinsic::usub_sat), {eva, evb});
}
return result;
}
// Average: (a + b + 1) >> 1
@ -1029,18 +1106,15 @@ public:
//return (a >> 1) + (b >> 1) + ((a | b) & 1);
value_t<typename T::type> result;
llvm::Instruction::CastOps cast_op = llvm::Instruction::BitCast;
if (result.is_sint)
cast_op = llvm::Instruction::SExt;
if (result.is_uint)
cast_op = llvm::Instruction::ZExt;
llvm::Type* cast_t = m_ir->getIntNTy(result.esize * 2);
if (result.is_vector != 0)
cast_t = llvm::VectorType::get(cast_t, result.is_vector);
static_assert(result.is_sint || result.is_uint);
const auto cast_op = result.is_sint ? llvm::Instruction::SExt : llvm::Instruction::ZExt;
llvm::Type* cast_to = m_ir->getIntNTy(result.esize * 2);
if constexpr (result.is_vector != 0)
cast_to = llvm::VectorType::get(cast_to, result.is_vector);
const auto axt = m_ir->CreateCast(cast_op, a.eval(m_ir), cast_t);
const auto bxt = m_ir->CreateCast(cast_op, b.eval(m_ir), cast_t);
const auto cxt = llvm::ConstantInt::get(cast_t, 1, false);
const auto axt = m_ir->CreateCast(cast_op, a.eval(m_ir), cast_to);
const auto bxt = m_ir->CreateCast(cast_op, b.eval(m_ir), cast_to);
const auto cxt = llvm::ConstantInt::get(cast_to, 1, false);
const auto abc = m_ir->CreateAdd(m_ir->CreateAdd(axt, bxt), cxt);
result.value = m_ir->CreateTrunc(m_ir->CreateLShr(abc, 1), result.get_type(m_context));
return result;

View file

@ -1690,7 +1690,7 @@ static void ppu_initialize2(jit_compiler& jit, const ppu_module& module_part, co
{
if (func.size)
{
const auto f = cast<Function>(module->getOrInsertFunction(func.name, _func));
const auto f = cast<Function>(module->getOrInsertFunction(func.name, _func).getCallee());
f->addAttribute(1, Attribute::NoAlias);
}
}

View file

@ -267,7 +267,7 @@ void PPUTranslator::CallFunction(u64 target, Value* indirect)
return;
}
indirect = m_module->getOrInsertFunction(fmt::format("__0x%llx", target), type);
indirect = m_module->getOrInsertFunction(fmt::format("__0x%llx", target), type).getCallee();
}
else
{
@ -597,33 +597,27 @@ void PPUTranslator::VADDSBS(ppu_opcode_t op)
{
const auto a = get_vr<s8[16]>(op.va);
const auto b = get_vr<s8[16]>(op.vb);
const auto s = eval(a + b);
const auto z = eval((a >> 7) ^ 0x7f);
const auto x = eval(scarry(a, b, s) >> 7);
set_vr(op.vd, eval(merge(x, z, s)));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}
void PPUTranslator::VADDSHS(ppu_opcode_t op)
{
const auto a = get_vr<s16[8]>(op.va);
const auto b = get_vr<s16[8]>(op.vb);
const auto s = eval(a + b);
const auto z = eval((a >> 15) ^ 0x7fff);
const auto x = eval(scarry(a, b, s) >> 15);
set_vr(op.vd, eval(merge(x, z, s)));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}
void PPUTranslator::VADDSWS(ppu_opcode_t op)
{
const auto a = get_vr<s32[4]>(op.va);
const auto b = get_vr<s32[4]>(op.vb);
const auto s = eval(a + b);
const auto z = eval((a >> 31) ^ 0x7fffffff);
const auto x = eval(scarry(a, b, s) >> 31);
set_vr(op.vd, eval(merge(x, z, s)));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}
void PPUTranslator::VADDUBM(ppu_opcode_t op)
@ -637,10 +631,9 @@ void PPUTranslator::VADDUBS(ppu_opcode_t op)
{
const auto a = get_vr<u8[16]>(op.va);
const auto b = get_vr<u8[16]>(op.vb);
const auto s = eval(a + b);
const auto x = eval(s < a);
set_vr(op.vd, select(x, splat<u8[16]>(-1), s));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}
void PPUTranslator::VADDUHM(ppu_opcode_t op)
@ -654,10 +647,9 @@ void PPUTranslator::VADDUHS(ppu_opcode_t op)
{
const auto a = get_vr<u16[8]>(op.va);
const auto b = get_vr<u16[8]>(op.vb);
const auto s = eval(a + b);
const auto x = eval(s < a);
set_vr(op.vd, select(x, splat<u16[8]>(-1), s));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}
void PPUTranslator::VADDUWM(ppu_opcode_t op)
@ -671,10 +663,9 @@ void PPUTranslator::VADDUWS(ppu_opcode_t op)
{
const auto a = get_vr<u32[4]>(op.va);
const auto b = get_vr<u32[4]>(op.vb);
const auto s = eval(a + b);
const auto x = eval(s < a);
set_vr(op.vd, select(x, splat<u32[4]>(-1), s));
SetSat(IsNotZero(x.value));
const auto r = add_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a + b)).value));
}
void PPUTranslator::VAND(ppu_opcode_t op)
@ -1491,33 +1482,27 @@ void PPUTranslator::VSUBSBS(ppu_opcode_t op)
{
const auto a = get_vr<s8[16]>(op.va);
const auto b = get_vr<s8[16]>(op.vb);
const auto d = eval(a - b);
const auto z = eval((a >> 7) ^ 0x7f);
const auto x = eval(sborrow(a, b, d) >> 7);
set_vr(op.vd, eval(merge(x, z, d)));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}
void PPUTranslator::VSUBSHS(ppu_opcode_t op)
{
const auto a = get_vr<s16[8]>(op.va);
const auto b = get_vr<s16[8]>(op.vb);
const auto d = eval(a - b);
const auto z = eval((a >> 15) ^ 0x7fff);
const auto x = eval(sborrow(a, b, d) >> 15);
set_vr(op.vd, eval(merge(x, z, d)));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}
void PPUTranslator::VSUBSWS(ppu_opcode_t op)
{
const auto a = get_vr<s32[4]>(op.va);
const auto b = get_vr<s32[4]>(op.vb);
const auto d = eval(a - b);
const auto z = eval((a >> 31) ^ 0x7fffffff);
const auto x = eval(sborrow(a, b, d) >> 31);
set_vr(op.vd, eval(merge(x, z, d)));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}
void PPUTranslator::VSUBUBM(ppu_opcode_t op)
@ -1531,10 +1516,9 @@ void PPUTranslator::VSUBUBS(ppu_opcode_t op)
{
const auto a = get_vr<u8[16]>(op.va);
const auto b = get_vr<u8[16]>(op.vb);
const auto d = eval(a - b);
const auto x = eval(a < b);
set_vr(op.vd, select(x, splat<u8[16]>(0), d));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}
void PPUTranslator::VSUBUHM(ppu_opcode_t op)
@ -1548,10 +1532,9 @@ void PPUTranslator::VSUBUHS(ppu_opcode_t op)
{
const auto a = get_vr<u16[8]>(op.va);
const auto b = get_vr<u16[8]>(op.vb);
const auto d = eval(a - b);
const auto x = eval(a < b);
set_vr(op.vd, select(x, splat<u16[8]>(0), d));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}
void PPUTranslator::VSUBUWM(ppu_opcode_t op)
@ -1565,10 +1548,9 @@ void PPUTranslator::VSUBUWS(ppu_opcode_t op)
{
const auto a = get_vr<u32[4]>(op.va);
const auto b = get_vr<u32[4]>(op.vb);
const auto d = eval(a - b);
const auto x = eval(a < b);
set_vr(op.vd, select(x, splat<u32[4]>(0), d));
SetSat(IsNotZero(x.value));
const auto r = sub_sat(a, b);
set_vr(op.vd, r);
SetSat(IsNotZero(eval(r != (a - b)).value));
}
void PPUTranslator::VSUMSWS(ppu_opcode_t op)

View file

@ -302,7 +302,7 @@ public:
llvm::CallInst* Call(llvm::Type* ret, llvm::AttributeList attr, llvm::StringRef name, Args... args)
{
// Call the function
return m_ir->CreateCall(m_module->getOrInsertFunction(name, attr, ret, args->getType()...), {args...});
return m_ir->CreateCall(m_module->getOrInsertFunction(name, attr, ret, args->getType()...).getCallee(), {args...});
}
// Call a function

View file

@ -2193,7 +2193,7 @@ class spu_llvm_recompiler : public spu_recompiler_base, public cpu_translator
{
// Get function chunk name
const std::string name = fmt::format("spu-chunk-0x%05x", addr);
llvm::Function* result = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(name, get_type<void>(), get_type<u8*>(), get_type<u8*>(), get_type<u32>()));
llvm::Function* result = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(name, get_ftype<void, u8*, u8*, u32>()).getCallee());
// Set parameters
result->setLinkage(llvm::GlobalValue::InternalLinkage);
@ -3089,7 +3089,7 @@ public:
m_ir = &irb;
// Add entry function (contains only state/code check)
const auto main_func = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(hash, get_type<void>(), get_type<u8*>(), get_type<u8*>(), get_type<u8*>()));
const auto main_func = llvm::cast<llvm::Function>(m_module->getOrInsertFunction(hash, get_ftype<void, u8*, u8*, u8*>()).getCallee());
const auto main_arg2 = &*(main_func->arg_begin() + 2);
set_function(main_func);
@ -3423,7 +3423,7 @@ public:
std::vector<llvm::Constant*> chunks;
chunks.reserve(m_size / 4);
const auto null = cast<Function>(module->getOrInsertFunction("spu-null", get_type<void>(), get_type<u8*>(), get_type<u8*>(), get_type<u32>()));
const auto null = cast<Function>(module->getOrInsertFunction("spu-null", get_ftype<void, u8*, u8*, u32>()).getCallee());
null->setLinkage(llvm::GlobalValue::InternalLinkage);
set_function(null);
m_ir->CreateRetVoid();
@ -3599,7 +3599,7 @@ public:
m_function_table = new GlobalVariable(*m_module, ArrayType::get(if_type->getPointerTo(), 1u << m_interp_magn), true, GlobalValue::InternalLinkage, nullptr);
// Add return function
const auto ret_func = cast<Function>(module->getOrInsertFunction("spu_ret", if_type));
const auto ret_func = cast<Function>(module->getOrInsertFunction("spu_ret", if_type).getCallee());
ret_func->setCallingConv(CallingConv::GHC);
ret_func->setLinkage(GlobalValue::InternalLinkage);
m_ir->SetInsertPoint(BasicBlock::Create(m_context, "", ret_func));
@ -3609,7 +3609,7 @@ public:
m_ir->CreateRetVoid();
// Add entry function, serves as a trampoline
const auto main_func = llvm::cast<Function>(m_module->getOrInsertFunction("spu_interpreter", get_ftype<void, u8*, u8*, u8*>()));
const auto main_func = llvm::cast<Function>(m_module->getOrInsertFunction("spu_interpreter", get_ftype<void, u8*, u8*, u8*>()).getCallee());
set_function(main_func);
// Load pc and opcode
@ -3681,7 +3681,7 @@ public:
}
// Decode instruction name, access function
const auto f = cast<Function>(module->getOrInsertFunction(fname, if_type));
const auto f = cast<Function>(module->getOrInsertFunction(fname, if_type).getCallee());
// Build if necessary
if (f->empty())

View file

@ -10,7 +10,7 @@
<Lib>
<AdditionalLibraryDirectories Condition="'$(Configuration)'=='Debug - LLVM'">..\llvm_build\Debug\lib</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories Condition="'$(Configuration)'=='Release - LLVM'">..\llvm_build\Release\lib</AdditionalLibraryDirectories>
<AdditionalDependencies>LLVMProfileData.lib;LLVMDebugInfoCodeView.lib;LLVMDebugInfoMSF.lib;LLVMInstrumentation.lib;LLVMMCJIT.lib;LLVMRuntimeDyld.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMGlobalISel.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib;LLVMMCDisassembler.lib;LLVMipo.lib;LLVMBinaryFormat.lib;LLVMPasses.lib;LLVMIRReader.lib;LLVMLinker.lib;LLVMAsmParser.lib</AdditionalDependencies>
<AdditionalDependencies>LLVMProfileData.lib;LLVMDebugInfoCodeView.lib;LLVMDebugInfoMSF.lib;LLVMInstrumentation.lib;LLVMMCJIT.lib;LLVMRuntimeDyld.lib;LLVMVectorize.lib;LLVMX86CodeGen.lib;LLVMGlobalISel.lib;LLVMX86Disassembler.lib;LLVMExecutionEngine.lib;LLVMAsmPrinter.lib;LLVMSelectionDAG.lib;LLVMCodeGen.lib;LLVMScalarOpts.lib;LLVMInstCombine.lib;LLVMTransformUtils.lib;LLVMAnalysis.lib;LLVMTarget.lib;LLVMX86Desc.lib;LLVMX86AsmPrinter.lib;LLVMObject.lib;LLVMMCParser.lib;LLVMBitReader.lib;LLVMCore.lib;LLVMX86Utils.lib;LLVMMC.lib;LLVMX86Info.lib;LLVMSupport.lib;LLVMMCDisassembler.lib;LLVMipo.lib;LLVMBinaryFormat.lib;LLVMPasses.lib;LLVMIRReader.lib;LLVMLinker.lib;LLVMAsmParser.lib;LLVMDemangle.lib;LLVMDebugInfoDWARF.lib</AdditionalDependencies>
</Lib>
</ItemDefinitionGroup>
<ItemGroup />