From 139d5007613696147437159a7f0d0cdcac702529 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Thu, 28 Apr 2022 20:16:47 +0200 Subject: [PATCH] Vendor import of llvm-project branch release/14.x llvmorg-14.0.2-0-g0e27d08cdeb3. --- clang/include/clang/Driver/Options.td | 3 +- clang/include/clang/Interpreter/Interpreter.h | 2 + clang/lib/CodeGen/CGExpr.cpp | 14 +- clang/lib/Driver/ToolChains/Arch/AArch64.cpp | 3 + clang/lib/Driver/ToolChains/OpenBSD.cpp | 9 + clang/lib/Driver/ToolChains/OpenBSD.h | 4 +- clang/lib/Interpreter/IncrementalExecutor.h | 1 + clang/lib/Interpreter/Interpreter.cpp | 6 + clang/utils/TableGen/NeonEmitter.cpp | 23 ++- compiler-rt/lib/asan/asan_linux.cpp | 30 ++-- compiler-rt/lib/builtins/clear_cache.c | 5 +- compiler-rt/lib/scudo/scudo_allocator.cpp | 4 +- compiler-rt/lib/scudo/scudo_crc32.cpp | 4 +- compiler-rt/lib/scudo/scudo_crc32.h | 12 +- compiler-rt/lib/scudo/standalone/checksum.h | 8 +- compiler-rt/lib/scudo/standalone/chunk.h | 4 +- compiler-rt/lib/scudo/standalone/crc32_hw.cpp | 4 +- libcxx/include/__filesystem/directory_entry.h | 2 +- libcxx/include/__ranges/views.h | 35 ++++ libcxx/include/module.modulemap | 1 + libcxx/include/ranges | 11 +- libcxx/src/random.cpp | 2 +- lld/COFF/DebugTypes.cpp | 19 +- lld/ELF/SyntheticSections.h | 4 +- lld/ELF/Writer.cpp | 37 ++-- llvm/include/llvm/CodeGen/FastISel.h | 7 + llvm/include/llvm/CodeGen/SelectionDAG.h | 13 ++ llvm/include/llvm/CodeGen/SelectionDAGISel.h | 1 + llvm/lib/CodeGen/MachineFunction.cpp | 3 - llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 4 +- .../lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 5 +- llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h | 3 +- .../CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 3 +- .../SelectionDAG/ScheduleDAGSDNodes.cpp | 3 +- .../lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 12 +- .../SelectionDAG/SelectionDAGBuilder.cpp | 5 +- .../CodeGen/SelectionDAG/SelectionDAGISel.cpp | 10 +- llvm/lib/MC/ELFObjectWriter.cpp | 1 + llvm/lib/Object/ELF.cpp | 2 - llvm/lib/Support/AArch64TargetParser.cpp | 2 + llvm/lib/Target/AArch64/AArch64.td | 12 +- llvm/lib/Target/AArch64/AArch64CollectLOH.cpp | 2 +- .../AArch64/AArch64ExpandPseudoInsts.cpp | 34 ++++ llvm/lib/Target/AArch64/AArch64FastISel.cpp | 8 + .../Target/AArch64/AArch64ISelLowering.cpp | 163 ++++++++++-------- llvm/lib/Target/AArch64/AArch64ISelLowering.h | 9 +- llvm/lib/Target/AArch64/AArch64InstrInfo.td | 11 ++ llvm/lib/Target/AArch64/AArch64Subtarget.h | 6 + .../AArch64/AsmParser/AArch64AsmParser.cpp | 16 +- .../AArch64/GISel/AArch64CallLowering.cpp | 12 +- .../GISel/AArch64PreLegalizerCombiner.cpp | 7 +- .../MCTargetDesc/AArch64ELFStreamer.cpp | 1 + .../Target/PowerPC/AsmParser/PPCAsmParser.cpp | 63 +++---- .../PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 2 + .../MCTargetDesc/PPCELFObjectWriter.cpp | 2 + .../PowerPC/MCTargetDesc/PPCFixupKinds.h | 4 + .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 4 +- .../Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 13 +- llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 2 + llvm/lib/Target/PowerPC/PPCInstrInfo.td | 4 +- .../RISCV/MCTargetDesc/RISCVAsmBackend.cpp | 8 +- .../Target/RISCV/MCTargetDesc/RISCVMatInt.cpp | 54 +++--- llvm/lib/Target/RISCV/RISCVFrameLowering.cpp | 23 ++- llvm/lib/Target/RISCV/RISCVISelLowering.cpp | 71 ++++---- .../Target/RISCV/RISCVMachineFunctionInfo.cpp | 30 ++++ .../Target/RISCV/RISCVMachineFunctionInfo.h | 25 +++ llvm/lib/Target/RISCV/RISCVTargetMachine.cpp | 23 +++ llvm/lib/Target/RISCV/RISCVTargetMachine.h | 8 + llvm/lib/Target/X86/X86ISelLowering.cpp | 24 ++- .../InstCombine/InstCombineAndOrXor.cpp | 6 + .../InstCombine/InstCombineSelect.cpp | 17 ++ .../Transforms/Vectorize/LoopVectorize.cpp | 37 ++-- .../Transforms/Vectorize/VectorCombine.cpp | 8 +- llvm/tools/llvm-objdump/ELFDump.cpp | 8 +- 74 files changed, 692 insertions(+), 346 deletions(-) create mode 100644 libcxx/include/__ranges/views.h create mode 100644 llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 53e68ed2cef9..e0d215840714 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -3372,7 +3372,7 @@ def mmark_bti_property : Flag<["-"], "mmark-bti-property">, def mno_bti_at_return_twice : Flag<["-"], "mno-bti-at-return-twice">, Group, HelpText<"Do not add a BTI instruction after a setjmp or other" - " return-twice construct (Arm only)">; + " return-twice construct (Arm/AArch64 only)">; foreach i = {1-31} in def ffixed_x#i : Flag<["-"], "ffixed-x"#i>, Group, @@ -3400,6 +3400,7 @@ def msign_return_address_EQ : Joined<["-"], "msign-return-address=">, Flags<[CC1Option]>, Group, Values<"none,all,non-leaf">, HelpText<"Select return address signing scope">; def mbranch_protection_EQ : Joined<["-"], "mbranch-protection=">, + Group, HelpText<"Enforce targets of indirect branches and function returns">; def mharden_sls_EQ : Joined<["-"], "mharden-sls=">, diff --git a/clang/include/clang/Interpreter/Interpreter.h b/clang/include/clang/Interpreter/Interpreter.h index 721a649deb43..f2fdb90f5ba4 100644 --- a/clang/include/clang/Interpreter/Interpreter.h +++ b/clang/include/clang/Interpreter/Interpreter.h @@ -26,6 +26,7 @@ namespace llvm { namespace orc { +class LLJIT; class ThreadSafeContext; } } // namespace llvm @@ -56,6 +57,7 @@ class Interpreter { static llvm::Expected> create(std::unique_ptr CI); const CompilerInstance *getCompilerInstance() const; + const llvm::orc::LLJIT *getExecutionEngine() const; llvm::Expected Parse(llvm::StringRef Code); llvm::Error Execute(PartialTranslationUnit &T); llvm::Error ParseAndExecute(llvm::StringRef Code) { diff --git a/clang/lib/CodeGen/CGExpr.cpp b/clang/lib/CodeGen/CGExpr.cpp index bb5d18b74894..2a9b108c31bc 100644 --- a/clang/lib/CodeGen/CGExpr.cpp +++ b/clang/lib/CodeGen/CGExpr.cpp @@ -4895,6 +4895,16 @@ RValue CodeGenFunction::EmitSimpleCallExpr(const CallExpr *E, return EmitCall(E->getCallee()->getType(), Callee, E, ReturnValue); } +// Detect the unusual situation where an inline version is shadowed by a +// non-inline version. In that case we should pick the external one +// everywhere. That's GCC behavior too. +static bool OnlyHasInlineBuiltinDeclaration(const FunctionDecl *FD) { + for (const FunctionDecl *PD = FD; PD; PD = PD->getPreviousDecl()) + if (!PD->isInlineBuiltinDeclaration()) + return false; + return true; +} + static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { const FunctionDecl *FD = cast(GD.getDecl()); @@ -4902,8 +4912,8 @@ static CGCallee EmitDirectCallee(CodeGenFunction &CGF, GlobalDecl GD) { std::string FDInlineName = (FD->getName() + ".inline").str(); // When directing calling an inline builtin, call it through it's mangled // name to make it clear it's not the actual builtin. - if (FD->isInlineBuiltinDeclaration() && - CGF.CurFn->getName() != FDInlineName) { + if (CGF.CurFn->getName() != FDInlineName && + OnlyHasInlineBuiltinDeclaration(FD)) { llvm::Constant *CalleePtr = EmitFunctionDeclPointer(CGF.CGM, GD); llvm::Function *Fn = llvm::cast(CalleePtr); llvm::Module *M = Fn->getParent(); diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index ca0ca4bf4eea..53610f0909a2 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -592,4 +592,7 @@ void aarch64::getAArch64TargetFeatures(const Driver &D, // Enabled A53 errata (835769) workaround by default on android Features.push_back("+fix-cortex-a53-835769"); } + + if (Args.getLastArg(options::OPT_mno_bti_at_return_twice)) + Features.push_back("+no-bti-at-return-twice"); } diff --git a/clang/lib/Driver/ToolChains/OpenBSD.cpp b/clang/lib/Driver/ToolChains/OpenBSD.cpp index bcd54bedfa89..86a10ce4b0e7 100644 --- a/clang/lib/Driver/ToolChains/OpenBSD.cpp +++ b/clang/lib/Driver/ToolChains/OpenBSD.cpp @@ -342,3 +342,12 @@ Tool *OpenBSD::buildAssembler() const { Tool *OpenBSD::buildLinker() const { return new tools::openbsd::Linker(*this); } bool OpenBSD::HasNativeLLVMSupport() const { return true; } + +bool OpenBSD::IsUnwindTablesDefault(const ArgList &Args) const { + switch (getArch()) { + case llvm::Triple::arm: + return false; + default: + return true; + } +} diff --git a/clang/lib/Driver/ToolChains/OpenBSD.h b/clang/lib/Driver/ToolChains/OpenBSD.h index 9d668711b91b..2d4c4e34520b 100644 --- a/clang/lib/Driver/ToolChains/OpenBSD.h +++ b/clang/lib/Driver/ToolChains/OpenBSD.h @@ -82,9 +82,7 @@ class LLVM_LIBRARY_VISIBILITY OpenBSD : public Generic_ELF { std::string getCompilerRT(const llvm::opt::ArgList &Args, StringRef Component, FileType Type = ToolChain::FT_Static) const override; - bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override { - return true; - } + bool IsUnwindTablesDefault(const llvm::opt::ArgList &Args) const override; LangOptions::StackProtectorMode GetDefaultStackProtectorLevel(bool KernelOrKext) const override { diff --git a/clang/lib/Interpreter/IncrementalExecutor.h b/clang/lib/Interpreter/IncrementalExecutor.h index 24447994d5f1..51b4d83d10b1 100644 --- a/clang/lib/Interpreter/IncrementalExecutor.h +++ b/clang/lib/Interpreter/IncrementalExecutor.h @@ -45,6 +45,7 @@ class IncrementalExecutor { llvm::Error runCtors() const; llvm::Expected getSymbolAddress(llvm::StringRef Name, SymbolNameKind NameKind) const; + llvm::orc::LLJIT *getExecutionEngine() const { return Jit.get(); } }; } // end namespace clang diff --git a/clang/lib/Interpreter/Interpreter.cpp b/clang/lib/Interpreter/Interpreter.cpp index b2e7727be39a..470c9c289a74 100644 --- a/clang/lib/Interpreter/Interpreter.cpp +++ b/clang/lib/Interpreter/Interpreter.cpp @@ -196,6 +196,12 @@ const CompilerInstance *Interpreter::getCompilerInstance() const { return IncrParser->getCI(); } +const llvm::orc::LLJIT *Interpreter::getExecutionEngine() const { + if (IncrExecutor) + return IncrExecutor->getExecutionEngine(); + return nullptr; +} + llvm::Expected Interpreter::Parse(llvm::StringRef Code) { return IncrParser->Parse(Code); diff --git a/clang/utils/TableGen/NeonEmitter.cpp b/clang/utils/TableGen/NeonEmitter.cpp index a69fbe0e42dc..1550a8a1e273 100644 --- a/clang/utils/TableGen/NeonEmitter.cpp +++ b/clang/utils/TableGen/NeonEmitter.cpp @@ -502,6 +502,7 @@ class Intrinsic { void emitBody(StringRef CallPrefix); void emitShadowedArgs(); void emitArgumentReversal(); + void emitReturnVarDecl(); void emitReturnReversal(); void emitReverseVariable(Variable &Dest, Variable &Src); void emitNewLine(); @@ -1228,6 +1229,15 @@ void Intrinsic::emitArgumentReversal() { } } +void Intrinsic::emitReturnVarDecl() { + assert(RetVar.getType() == Types[0]); + // Create a return variable, if we're not void. + if (!RetVar.getType().isVoid()) { + OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; + emitNewLine(); + } +} + void Intrinsic::emitReturnReversal() { if (isBigEndianSafe()) return; @@ -1353,13 +1363,6 @@ void Intrinsic::emitBodyAsBuiltinCall() { void Intrinsic::emitBody(StringRef CallPrefix) { std::vector Lines; - assert(RetVar.getType() == Types[0]); - // Create a return variable, if we're not void. - if (!RetVar.getType().isVoid()) { - OS << " " << RetVar.getType().str() << " " << RetVar.getName() << ";"; - emitNewLine(); - } - if (!Body || Body->getValues().empty()) { // Nothing specific to output - must output a builtin. emitBodyAsBuiltinCall(); @@ -1849,6 +1852,9 @@ void Intrinsic::generateImpl(bool ReverseArguments, OS << " __attribute__((unavailable));"; } else { emitOpeningBrace(); + // Emit return variable declaration first as to not trigger + // -Wdeclaration-after-statement. + emitReturnVarDecl(); emitShadowedArgs(); if (ReverseArguments) emitArgumentReversal(); @@ -1867,6 +1873,9 @@ void Intrinsic::indexBody() { CurrentRecord = R; initVariables(); + // Emit return variable declaration first as to not trigger + // -Wdeclaration-after-statement. + emitReturnVarDecl(); emitBody(""); OS.str(""); diff --git a/compiler-rt/lib/asan/asan_linux.cpp b/compiler-rt/lib/asan/asan_linux.cpp index 1d92c530bd11..defd81bc19e2 100644 --- a/compiler-rt/lib/asan/asan_linux.cpp +++ b/compiler-rt/lib/asan/asan_linux.cpp @@ -131,30 +131,24 @@ static int FindFirstDSOCallback(struct dl_phdr_info *info, size_t size, VReport(2, "info->dlpi_name = %s\tinfo->dlpi_addr = %p\n", info->dlpi_name, (void *)info->dlpi_addr); - // Continue until the first dynamic library is found - if (!info->dlpi_name || info->dlpi_name[0] == 0) - return 0; + const char **name = (const char **)data; - // Ignore vDSO - if (internal_strncmp(info->dlpi_name, "linux-", sizeof("linux-") - 1) == 0) - return 0; - -#if SANITIZER_FREEBSD || SANITIZER_NETBSD // Ignore first entry (the main program) - char **p = (char **)data; - if (!(*p)) { - *p = (char *)-1; + if (!*name) { + *name = ""; return 0; } -#endif -#if SANITIZER_SOLARIS - // Ignore executable on Solaris - if (info->dlpi_addr == 0) +# if SANITIZER_LINUX + // Ignore vDSO. glibc versions earlier than 2.15 (and some patched + // by distributors) return an empty name for the vDSO entry, so + // detect this as well. + if (!info->dlpi_name[0] || + internal_strncmp(info->dlpi_name, "linux-", sizeof("linux-") - 1) == 0) return 0; -#endif +# endif - *(const char **)data = info->dlpi_name; + *name = info->dlpi_name; return 1; } @@ -175,7 +169,7 @@ void AsanCheckDynamicRTPrereqs() { // Ensure that dynamic RT is the first DSO in the list const char *first_dso_name = nullptr; dl_iterate_phdr(FindFirstDSOCallback, &first_dso_name); - if (first_dso_name && !IsDynamicRTName(first_dso_name)) { + if (first_dso_name && first_dso_name[0] && !IsDynamicRTName(first_dso_name)) { Report("ASan runtime does not come first in initial library list; " "you should either link runtime to your application or " "manually preload it with LD_PRELOAD.\n"); diff --git a/compiler-rt/lib/builtins/clear_cache.c b/compiler-rt/lib/builtins/clear_cache.c index da0715914b41..1f1efbff3ab2 100644 --- a/compiler-rt/lib/builtins/clear_cache.c +++ b/compiler-rt/lib/builtins/clear_cache.c @@ -130,7 +130,10 @@ void __clear_cache(void *start, void *end) { __asm __volatile("dsb ish"); } __asm __volatile("isb sy"); -#elif defined(__powerpc64__) +#elif defined(__powerpc__) + // Newer CPUs have a bigger line size made of multiple blocks, so the + // following value is a minimal common denominator for what used to be + // a single block cache line and is therefore inneficient. const size_t line_size = 32; const size_t len = (uintptr_t)end - (uintptr_t)start; diff --git a/compiler-rt/lib/scudo/scudo_allocator.cpp b/compiler-rt/lib/scudo/scudo_allocator.cpp index 5b6ac8b35493..6a6b577ab002 100644 --- a/compiler-rt/lib/scudo/scudo_allocator.cpp +++ b/compiler-rt/lib/scudo/scudo_allocator.cpp @@ -49,7 +49,7 @@ inline u32 computeCRC32(u32 Crc, uptr Value, uptr *Array, uptr ArraySize) { // as opposed to only for scudo_crc32.cpp. This means that other hardware // specific instructions were likely emitted at other places, and as a // result there is no reason to not use it here. -#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) Crc = CRC32_INTRINSIC(Crc, Value); for (uptr i = 0; i < ArraySize; i++) Crc = CRC32_INTRINSIC(Crc, Array[i]); @@ -65,7 +65,7 @@ inline u32 computeCRC32(u32 Crc, uptr Value, uptr *Array, uptr ArraySize) { for (uptr i = 0; i < ArraySize; i++) Crc = computeSoftwareCRC32(Crc, Array[i]); return Crc; -#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) } static BackendT &getBackend(); diff --git a/compiler-rt/lib/scudo/scudo_crc32.cpp b/compiler-rt/lib/scudo/scudo_crc32.cpp index 87473505fe79..137c44c5c1cd 100644 --- a/compiler-rt/lib/scudo/scudo_crc32.cpp +++ b/compiler-rt/lib/scudo/scudo_crc32.cpp @@ -15,10 +15,10 @@ namespace __scudo { -#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) u32 computeHardwareCRC32(u32 Crc, uptr Data) { return CRC32_INTRINSIC(Crc, Data); } -#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) } // namespace __scudo diff --git a/compiler-rt/lib/scudo/scudo_crc32.h b/compiler-rt/lib/scudo/scudo_crc32.h index ef40595a56d1..4314d30e929f 100644 --- a/compiler-rt/lib/scudo/scudo_crc32.h +++ b/compiler-rt/lib/scudo/scudo_crc32.h @@ -16,13 +16,17 @@ #include "sanitizer_common/sanitizer_internal_defs.h" // Hardware CRC32 is supported at compilation via the following: -// - for i386 & x86_64: -msse4.2 +// - for i386 & x86_64: -mcrc32 (earlier: -msse4.2) // - for ARM & AArch64: -march=armv8-a+crc or -mcrc // An additional check must be performed at runtime as well to make sure the // emitted instructions are valid on the target host. -#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) -# ifdef __SSE4_2__ +#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +# if defined(__CRC32__) +// NB: clang has but GCC does not +# include +# define CRC32_INTRINSIC FIRST_32_SECOND_64(__builtin_ia32_crc32si, __builtin_ia32_crc32di) +# elif defined(__SSE4_2__) # include # define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64) # endif @@ -30,7 +34,7 @@ # include # define CRC32_INTRINSIC FIRST_32_SECOND_64(__crc32cw, __crc32cd) # endif -#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) namespace __scudo { diff --git a/compiler-rt/lib/scudo/standalone/checksum.h b/compiler-rt/lib/scudo/standalone/checksum.h index a63b1b4f064d..0f787ce2b5cd 100644 --- a/compiler-rt/lib/scudo/standalone/checksum.h +++ b/compiler-rt/lib/scudo/standalone/checksum.h @@ -12,12 +12,16 @@ #include "internal_defs.h" // Hardware CRC32 is supported at compilation via the following: -// - for i386 & x86_64: -msse4.2 +// - for i386 & x86_64: -mcrc32 (earlier: -msse4.2) // - for ARM & AArch64: -march=armv8-a+crc or -mcrc // An additional check must be performed at runtime as well to make sure the // emitted instructions are valid on the target host. -#ifdef __SSE4_2__ +#if defined(__CRC32__) +// NB: clang has but GCC does not +#include +#define CRC32_INTRINSIC FIRST_32_SECOND_64(__builtin_ia32_crc32si, __builtin_ia32_crc32di) +#elif defined(__SSE4_2__) #include #define CRC32_INTRINSIC FIRST_32_SECOND_64(_mm_crc32_u32, _mm_crc32_u64) #endif diff --git a/compiler-rt/lib/scudo/standalone/chunk.h b/compiler-rt/lib/scudo/standalone/chunk.h index 69b8e1b12a91..0581420dfc99 100644 --- a/compiler-rt/lib/scudo/standalone/chunk.h +++ b/compiler-rt/lib/scudo/standalone/chunk.h @@ -25,7 +25,7 @@ inline u16 computeChecksum(u32 Seed, uptr Value, uptr *Array, uptr ArraySize) { // as opposed to only for crc32_hw.cpp. This means that other hardware // specific instructions were likely emitted at other places, and as a result // there is no reason to not use it here. -#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) u32 Crc = static_cast(CRC32_INTRINSIC(Seed, Value)); for (uptr I = 0; I < ArraySize; I++) Crc = static_cast(CRC32_INTRINSIC(Crc, Array[I])); @@ -42,7 +42,7 @@ inline u16 computeChecksum(u32 Seed, uptr Value, uptr *Array, uptr ArraySize) { Checksum = computeBSDChecksum(Checksum, Array[I]); return Checksum; } -#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) } namespace Chunk { diff --git a/compiler-rt/lib/scudo/standalone/crc32_hw.cpp b/compiler-rt/lib/scudo/standalone/crc32_hw.cpp index 62841ba51019..d13c615498ff 100644 --- a/compiler-rt/lib/scudo/standalone/crc32_hw.cpp +++ b/compiler-rt/lib/scudo/standalone/crc32_hw.cpp @@ -10,10 +10,10 @@ namespace scudo { -#if defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#if defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) u32 computeHardwareCRC32(u32 Crc, uptr Data) { return static_cast(CRC32_INTRINSIC(Crc, Data)); } -#endif // defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) +#endif // defined(__CRC32__) || defined(__SSE4_2__) || defined(__ARM_FEATURE_CRC32) } // namespace scudo diff --git a/libcxx/include/__filesystem/directory_entry.h b/libcxx/include/__filesystem/directory_entry.h index 91dd1a214588..95e45c02307f 100644 --- a/libcxx/include/__filesystem/directory_entry.h +++ b/libcxx/include/__filesystem/directory_entry.h @@ -249,7 +249,7 @@ class directory_entry { private: friend class directory_iterator; friend class recursive_directory_iterator; - friend class __dir_stream; + friend class _LIBCPP_HIDDEN __dir_stream; enum _CacheType : unsigned char { _Empty, diff --git a/libcxx/include/__ranges/views.h b/libcxx/include/__ranges/views.h new file mode 100644 index 000000000000..8cc5ba3d2aca --- /dev/null +++ b/libcxx/include/__ranges/views.h @@ -0,0 +1,35 @@ +// -*- C++ -*- +//===----------------------------------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef _LIBCPP___RANGES_VIEWS +#define _LIBCPP___RANGES_VIEWS + +#include <__config> + +#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER) +# pragma GCC system_header +#endif + +_LIBCPP_BEGIN_NAMESPACE_STD + +#if _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +namespace ranges { + +namespace views { } + +} // namespace ranges + +namespace views = ranges::views; + +#endif // _LIBCPP_STD_VER > 17 && !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) + +_LIBCPP_END_NAMESPACE_STD + +#endif // _LIBCPP___RANGES_VIEWS diff --git a/libcxx/include/module.modulemap b/libcxx/include/module.modulemap index 749c17f8bc08..9e3643faee09 100644 --- a/libcxx/include/module.modulemap +++ b/libcxx/include/module.modulemap @@ -817,6 +817,7 @@ module std [system] { export functional.__functional.perfect_forward } module view_interface { private header "__ranges/view_interface.h" } + module views { private header "__ranges/views.h" } } } module ratio { diff --git a/libcxx/include/ranges b/libcxx/include/ranges index 82ad7f4d507f..9b831160d683 100644 --- a/libcxx/include/ranges +++ b/libcxx/include/ranges @@ -223,6 +223,7 @@ namespace std::ranges { #include <__ranges/take_view.h> #include <__ranges/transform_view.h> #include <__ranges/view_interface.h> +#include <__ranges/views.h> #include // Required by the standard. #include // Required by the standard. #include // Required by the standard. @@ -233,14 +234,4 @@ namespace std::ranges { #pragma GCC system_header #endif -#if !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) - -_LIBCPP_BEGIN_NAMESPACE_STD - -namespace views = ranges::views; - -_LIBCPP_END_NAMESPACE_STD - -#endif // !defined(_LIBCPP_HAS_NO_INCOMPLETE_RANGES) - #endif // _LIBCPP_RANGES diff --git a/libcxx/src/random.cpp b/libcxx/src/random.cpp index 6472a8dbcba3..146b7c56bdeb 100644 --- a/libcxx/src/random.cpp +++ b/libcxx/src/random.cpp @@ -210,7 +210,7 @@ random_device::entropy() const noexcept return std::numeric_limits::digits; return ent; -#elif defined(__OpenBSD__) || defined(_LIBCPP_USING_FUCHSIA_CPRNG) +#elif defined(_LIBCPP_USING_ARC4_RANDOM) || defined(_LIBCPP_USING_FUCHSIA_CPRNG) return std::numeric_limits::digits; #else return 0; diff --git a/lld/COFF/DebugTypes.cpp b/lld/COFF/DebugTypes.cpp index 67b708c5b36a..0d25de464f9f 100644 --- a/lld/COFF/DebugTypes.cpp +++ b/lld/COFF/DebugTypes.cpp @@ -56,8 +56,12 @@ class TypeServerSource : public TpiSource { return; Guid = expectedInfo->getGuid(); auto it = ctx.typeServerSourceMappings.emplace(Guid, this); - assert(it.second); - (void)it; + if (!it.second) { + // If we hit here we have collision on Guid's in two PDB files. + // This can happen if the PDB Guid is invalid or if we are really + // unlucky. This should fall back on stright file-system lookup. + it.first->second = nullptr; + } } Error mergeDebugT(TypeMerger *m) override; @@ -398,11 +402,12 @@ Expected UseTypeServerSource::getTypeServerSource() { const codeview::GUID &tsId = typeServerDependency.getGuid(); StringRef tsPath = typeServerDependency.getName(); - TypeServerSource *tsSrc; + TypeServerSource *tsSrc = nullptr; auto it = ctx.typeServerSourceMappings.find(tsId); if (it != ctx.typeServerSourceMappings.end()) { tsSrc = (TypeServerSource *)it->second; - } else { + } + if (tsSrc == nullptr) { // The file failed to load, lookup by name PDBInputFile *pdb = PDBInputFile::findFromRecordPath(ctx, tsPath, file); if (!pdb) @@ -897,7 +902,11 @@ struct GHashTable { /// A ghash table cell for deduplicating types from TpiSources. class GHashCell { - uint64_t data = 0; + // Force "data" to be 64-bit aligned; otherwise, some versions of clang + // will generate calls to libatomic when using some versions of libstdc++ + // on 32-bit targets. (Also, in theory, there could be a target where + // new[] doesn't always return an 8-byte-aligned allocation.) + alignas(sizeof(uint64_t)) uint64_t data = 0; public: GHashCell() = default; diff --git a/lld/ELF/SyntheticSections.h b/lld/ELF/SyntheticSections.h index e609b3d7982a..3161785988f6 100644 --- a/lld/ELF/SyntheticSections.h +++ b/lld/ELF/SyntheticSections.h @@ -187,9 +187,7 @@ class BuildIdSection : public SyntheticSection { class BssSection final : public SyntheticSection { public: BssSection(StringRef name, uint64_t size, uint32_t alignment); - void writeTo(uint8_t *) override { - llvm_unreachable("unexpected writeTo() call for SHT_NOBITS section"); - } + void writeTo(uint8_t *) override {} bool isNeeded() const override { return size != 0; } size_t getSize() const override { return size; } diff --git a/lld/ELF/Writer.cpp b/lld/ELF/Writer.cpp index 9383ac46c8e7..5794f048c990 100644 --- a/lld/ELF/Writer.cpp +++ b/lld/ELF/Writer.cpp @@ -722,23 +722,30 @@ template void Writer::addSectionSymbols() { auto *sec = dyn_cast(cmd); if (!sec) continue; - auto i = llvm::find_if(sec->commands, [](SectionCommand *cmd) { - if (auto *isd = dyn_cast(cmd)) - return !isd->sections.empty(); - return false; - }); - if (i == sec->commands.end()) - continue; - InputSectionBase *isec = cast(*i)->sections[0]; + OutputSection &osec = *sec; + InputSectionBase *isec = nullptr; + // Iterate over all input sections and add a STT_SECTION symbol if any input + // section may be a relocation target. + for (SectionCommand *cmd : osec.commands) { + auto *isd = dyn_cast(cmd); + if (!isd) + continue; + for (InputSectionBase *s : isd->sections) { + // Relocations are not using REL[A] section symbols. + if (s->type == SHT_REL || s->type == SHT_RELA) + continue; - // Relocations are not using REL[A] section symbols. - if (isec->type == SHT_REL || isec->type == SHT_RELA) - continue; + // Unlike other synthetic sections, mergeable output sections contain + // data copied from input sections, and there may be a relocation + // pointing to its contents if -r or --emit-reloc is given. + if (isa(s) && !(s->flags & SHF_MERGE)) + continue; - // Unlike other synthetic sections, mergeable output sections contain data - // copied from input sections, and there may be a relocation pointing to its - // contents if -r or --emit-reloc is given. - if (isa(isec) && !(isec->flags & SHF_MERGE)) + isec = s; + break; + } + } + if (!isec) continue; // Set the symbol to be relative to the output section so that its st_value diff --git a/llvm/include/llvm/CodeGen/FastISel.h b/llvm/include/llvm/CodeGen/FastISel.h index 775698a66ada..6de8ac4273f7 100644 --- a/llvm/include/llvm/CodeGen/FastISel.h +++ b/llvm/include/llvm/CodeGen/FastISel.h @@ -212,6 +212,7 @@ class FastISel { const TargetRegisterInfo &TRI; const TargetLibraryInfo *LibInfo; bool SkipTargetIndependentISel; + bool UseInstrRefDebugInfo = false; /// The position of the last instruction for materializing constants /// for use in the current block. It resets to EmitStartPt when it makes sense @@ -318,6 +319,12 @@ class FastISel { /// Reset InsertPt to the given old insert position. void leaveLocalValueArea(SavePoint Old); + /// Signal whether instruction referencing variable locations are desired for + /// this function's debug-info. + void useInstrRefDebugInfo(bool Flag) { + UseInstrRefDebugInfo = Flag; + } + protected: explicit FastISel(FunctionLoweringInfo &FuncInfo, const TargetLibraryInfo *LibInfo, diff --git a/llvm/include/llvm/CodeGen/SelectionDAG.h b/llvm/include/llvm/CodeGen/SelectionDAG.h index e31719bcff0b..4f348c9feaa5 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAG.h +++ b/llvm/include/llvm/CodeGen/SelectionDAG.h @@ -278,6 +278,9 @@ class SelectionDAG { uint16_t NextPersistentId = 0; + /// Are instruction referencing variable locations desired for this function? + bool UseInstrRefDebugInfo = false; + public: /// Clients of various APIs that cause global effects on /// the DAG can optionally implement this interface. This allows the clients @@ -1702,6 +1705,16 @@ class SelectionDAG { /// function mirrors \c llvm::salvageDebugInfo. void salvageDebugInfo(SDNode &N); + /// Signal whether instruction referencing variable locations are desired for + /// this function's debug-info. + void useInstrRefDebugInfo(bool Flag) { + UseInstrRefDebugInfo = Flag; + } + + bool getUseInstrRefDebugInfo() const { + return UseInstrRefDebugInfo; + } + void dump() const; /// In most cases this function returns the ABI alignment for a given type, diff --git a/llvm/include/llvm/CodeGen/SelectionDAGISel.h b/llvm/include/llvm/CodeGen/SelectionDAGISel.h index 9cea197724cc..fc3fdf3e4583 100644 --- a/llvm/include/llvm/CodeGen/SelectionDAGISel.h +++ b/llvm/include/llvm/CodeGen/SelectionDAGISel.h @@ -53,6 +53,7 @@ class SelectionDAGISel : public MachineFunctionPass { const TargetLowering *TLI; bool FastISelFailed; SmallPtrSet ElidedArgCopyInstrs; + bool UseInstrRefDebugInfo = false; /// Current optimization remark emitter. /// Used to report things like combines and FastISel failures. diff --git a/llvm/lib/CodeGen/MachineFunction.cpp b/llvm/lib/CodeGen/MachineFunction.cpp index fd5ea5cad072..02f58ca5eef0 100644 --- a/llvm/lib/CodeGen/MachineFunction.cpp +++ b/llvm/lib/CodeGen/MachineFunction.cpp @@ -1181,9 +1181,6 @@ void MachineFunction::finalizeDebugInstrRefs() { MI.getOperand(1).ChangeToRegister(0, false); }; - if (!useDebugInstrRef()) - return; - for (auto &MBB : *this) { for (auto &MI : MBB) { if (!MI.isDebugRef() || !MI.getOperand(0).isReg()) diff --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index d8ef79fe9a7b..87a1ebe4c1db 100644 --- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -1265,7 +1265,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // If using instruction referencing, mutate this into a DBG_INSTR_REF, // to be later patched up by finalizeDebugInstrRefs. Tack a deref onto // the expression, we don't have an "indirect" flag in DBG_INSTR_REF. - if (FuncInfo.MF->useDebugInstrRef() && Op->isReg()) { + if (UseInstrRefDebugInfo && Op->isReg()) { Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF)); Builder->getOperand(1).ChangeToImmediate(0); auto *NewExpr = @@ -1324,7 +1324,7 @@ bool FastISel::selectIntrinsicCall(const IntrinsicInst *II) { // If using instruction referencing, mutate this into a DBG_INSTR_REF, // to be later patched up by finalizeDebugInstrRefs. - if (FuncInfo.MF->useDebugInstrRef()) { + if (UseInstrRefDebugInfo) { Builder->setDesc(TII.get(TargetOpcode::DBG_INSTR_REF)); Builder->getOperand(1).ChangeToImmediate(0); } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 331e0325aea3..e3e05c868102 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -1341,11 +1341,12 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, /// InstrEmitter - Construct an InstrEmitter and set it to start inserting /// at the given position in the given block. InstrEmitter::InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb, - MachineBasicBlock::iterator insertpos) + MachineBasicBlock::iterator insertpos, + bool UseInstrRefDebugInfo) : MF(mbb->getParent()), MRI(&MF->getRegInfo()), TII(MF->getSubtarget().getInstrInfo()), TRI(MF->getSubtarget().getRegisterInfo()), TLI(MF->getSubtarget().getTargetLowering()), MBB(mbb), InsertPos(insertpos) { - EmitDebugInstrRefs = MF->useDebugInstrRef(); + EmitDebugInstrRefs = UseInstrRefDebugInfo; } diff --git a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index ac8a70156522..ced8f064b9be 100644 --- a/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -154,7 +154,8 @@ class LLVM_LIBRARY_VISIBILITY InstrEmitter { /// InstrEmitter - Construct an InstrEmitter and set it to start inserting /// at the given position in the given block. InstrEmitter(const TargetMachine &TM, MachineBasicBlock *mbb, - MachineBasicBlock::iterator insertpos); + MachineBasicBlock::iterator insertpos, + bool UseInstrRefDebugInfo); private: void EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 1b89864116cb..1a6be0cc2091 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -758,7 +758,8 @@ void ScheduleDAGLinearize::Schedule() { MachineBasicBlock* ScheduleDAGLinearize::EmitSchedule(MachineBasicBlock::iterator &InsertPos) { - InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); + InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos, + DAG->getUseInstrRefDebugInfo()); DenseMap VRBaseMap; LLVM_DEBUG({ dbgs() << "\n*** Final schedule ***\n"; }); diff --git a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index 55f6f288f3e3..92897aca7f6b 100644 --- a/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -843,7 +843,8 @@ EmitPhysRegCopy(SUnit *SU, DenseMap &VRBaseMap, /// not necessarily refer to returned BB. The emitter may split blocks. MachineBasicBlock *ScheduleDAGSDNodes:: EmitSchedule(MachineBasicBlock::iterator &InsertPos) { - InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos); + InstrEmitter Emitter(DAG->getTarget(), BB, InsertPos, + DAG->getUseInstrRefDebugInfo()); DenseMap VRBaseMap; DenseMap CopyVRBaseMap; SmallVector, 32> Orders; diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index d5998d166d25..40d861702e86 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -5494,8 +5494,18 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, const SDLoc &DL, // Build vector (integer) scalar operands may need implicit // truncation - do this before constant folding. - if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) + if (ScalarVT.isInteger() && ScalarVT.bitsGT(InSVT)) { + // Don't create illegally-typed nodes unless they're constants or undef + // - if we fail to constant fold we can't guarantee the (dead) nodes + // we're creating will be cleaned up before being visited for + // legalization. + if (NewNodesMustHaveLegalTypes && !ScalarOp.isUndef() && + !isa(ScalarOp) && + TLI->getTypeAction(*getContext(), InSVT) != + TargetLowering::TypeLegal) + return SDValue(); ScalarOp = getNode(ISD::TRUNCATE, DL, InSVT, ScalarOp); + } ScalarOps.push_back(ScalarOp); } diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 01230a36e744..c61716ba1676 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -926,7 +926,10 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, CallConv.getValue(), RegVTs[Value]) : RegVTs[Value]; - if (ExtendKind == ISD::ANY_EXTEND && TLI.isZExtFree(Val, RegisterVT)) + // We need to zero extend constants that are liveout to match assumptions + // in FunctionLoweringInfo::ComputePHILiveOutRegInfo. + if (ExtendKind == ISD::ANY_EXTEND && + (TLI.isZExtFree(Val, RegisterVT) || isa(Val))) ExtendKind = ISD::ZERO_EXTEND; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), &Parts[Part], diff --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 3c786904620a..b83a60129c78 100644 --- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -425,6 +425,11 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { const Function &Fn = mf.getFunction(); MF = &mf; + // Decide what flavour of variable location debug-info will be used, before + // we change the optimisation level. + UseInstrRefDebugInfo = mf.useDebugInstrRef(); + CurDAG->useInstrRefDebugInfo(UseInstrRefDebugInfo); + // Reset the target options before resetting the optimization // level below. // FIXME: This is a horrible hack and should be processed via @@ -654,7 +659,8 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // For debug-info, in instruction referencing mode, we need to perform some // post-isel maintenence. - MF->finalizeDebugInstrRefs(); + if (UseInstrRefDebugInfo) + MF->finalizeDebugInstrRefs(); // Determine if there are any calls in this machine function. MachineFrameInfo &MFI = MF->getFrameInfo(); @@ -1380,6 +1386,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (TM.Options.EnableFastISel) { LLVM_DEBUG(dbgs() << "Enabling fast-isel\n"); FastIS = TLI->createFastISel(*FuncInfo, LibInfo); + if (FastIS) + FastIS->useInstrRefDebugInfo(UseInstrRefDebugInfo); } ReversePostOrderTraversal RPOT(&Fn); diff --git a/llvm/lib/MC/ELFObjectWriter.cpp b/llvm/lib/MC/ELFObjectWriter.cpp index 883735fcc293..6fd2f7e7a718 100644 --- a/llvm/lib/MC/ELFObjectWriter.cpp +++ b/llvm/lib/MC/ELFObjectWriter.cpp @@ -1336,6 +1336,7 @@ bool ELFObjectWriter::shouldRelocateWithSymbol(const MCAssembler &Asm, // can update it. return true; case ELF::STB_GLOBAL: + case ELF::STB_GNU_UNIQUE: // Global ELF symbols can be preempted by the dynamic linker. The relocation // has to point to the symbol for a reason analogous to the STB_WEAK case. return true; diff --git a/llvm/lib/Object/ELF.cpp b/llvm/lib/Object/ELF.cpp index 6e56da1a31f3..56a426211755 100644 --- a/llvm/lib/Object/ELF.cpp +++ b/llvm/lib/Object/ELF.cpp @@ -561,11 +561,9 @@ Expected ELFFile::dynamicEntries() const { } if (Dyn.empty()) - // TODO: this error is untested. return createError("invalid empty dynamic section"); if (Dyn.back().d_tag != ELF::DT_NULL) - // TODO: this error is untested. return createError("dynamic sections must be DT_NULL terminated"); return Dyn; diff --git a/llvm/lib/Support/AArch64TargetParser.cpp b/llvm/lib/Support/AArch64TargetParser.cpp index cdf7c8ade9aa..bb19e2714be1 100644 --- a/llvm/lib/Support/AArch64TargetParser.cpp +++ b/llvm/lib/Support/AArch64TargetParser.cpp @@ -120,6 +120,8 @@ bool AArch64::getExtensionFeatures(uint64_t Extensions, Features.push_back("+mops"); if (Extensions & AArch64::AEK_PERFMON) Features.push_back("+perfmon"); + if (Extensions & AArch64::AEK_SSBS) + Features.push_back("+ssbs"); return true; } diff --git a/llvm/lib/Target/AArch64/AArch64.td b/llvm/lib/Target/AArch64/AArch64.td index 9a04b28a8b8f..70c7b7b3f5dc 100644 --- a/llvm/lib/Target/AArch64/AArch64.td +++ b/llvm/lib/Target/AArch64/AArch64.td @@ -464,6 +464,11 @@ def FeatureEL3 : SubtargetFeature<"el3", "HasEL3", "true", def FeatureFixCortexA53_835769 : SubtargetFeature<"fix-cortex-a53-835769", "FixCortexA53_835769", "true", "Mitigate Cortex-A53 Erratum 835769">; +def FeatureNoBTIAtReturnTwice : SubtargetFeature<"no-bti-at-return-twice", + "NoBTIAtReturnTwice", "true", + "Don't place a BTI instruction " + "after a return-twice">; + //===----------------------------------------------------------------------===// // Architectures. // @@ -953,7 +958,7 @@ def ProcessorFeatures { FeatureRCPC, FeatureSSBS]; list A77 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureFullFP16, FeatureDotProd, - FeatureRCPC]; + FeatureRCPC, FeatureSSBS]; list A78 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureFullFP16, FeatureDotProd, FeatureRCPC, FeaturePerfMon, FeatureSPE, @@ -971,11 +976,12 @@ def ProcessorFeatures { FeatureSB, FeatureSpecRestrict]; list X1 = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureRCPC, FeaturePerfMon, - FeatureSPE, FeatureFullFP16, FeatureDotProd]; + FeatureSPE, FeatureFullFP16, FeatureDotProd, + FeatureSSBS]; list X1C = [HasV8_2aOps, FeatureCrypto, FeatureFPARMv8, FeatureNEON, FeatureRCPC, FeaturePerfMon, FeatureSPE, FeatureFullFP16, FeatureDotProd, - FeaturePAuth]; + FeaturePAuth, FeatureSSBS]; list X2 = [HasV9_0aOps, FeatureNEON, FeaturePerfMon, FeatureMatMulInt8, FeatureBF16, FeatureAM, FeatureMTE, FeatureETE, FeatureSVE2BitPerm, diff --git a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp index ac243347b24d..b31b709c0c0a 100644 --- a/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp +++ b/llvm/lib/Target/AArch64/AArch64CollectLOH.cpp @@ -559,7 +559,7 @@ bool AArch64CollectLOH::runOnMachineFunction(MachineFunction &MF) { // Walk the basic block backwards and update the per register state machine // in the process. for (const MachineInstr &MI : - instructionsWithoutDebug(MBB.rbegin(), MBB.rend())) { + instructionsWithoutDebug(MBB.instr_rbegin(), MBB.instr_rend())) { unsigned Opcode = MI.getOpcode(); switch (Opcode) { case AArch64::ADDXri: diff --git a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp index b0f739cc26e6..910f8cdede75 100644 --- a/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp +++ b/llvm/lib/Target/AArch64/AArch64ExpandPseudoInsts.cpp @@ -86,6 +86,7 @@ class AArch64ExpandPseudo : public MachineFunctionPass { unsigned N); bool expandCALL_RVMARKER(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); + bool expandCALL_BTI(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); bool expandStoreSwiftAsyncContext(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI); }; @@ -759,6 +760,37 @@ bool AArch64ExpandPseudo::expandCALL_RVMARKER( return true; } +bool AArch64ExpandPseudo::expandCALL_BTI(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MBBI) { + // Expand CALL_BTI pseudo to: + // - a branch to the call target + // - a BTI instruction + // Mark the sequence as a bundle, to avoid passes moving other code in + // between. + + MachineInstr &MI = *MBBI; + MachineOperand &CallTarget = MI.getOperand(0); + assert((CallTarget.isGlobal() || CallTarget.isReg()) && + "invalid operand for regular call"); + unsigned Opc = CallTarget.isGlobal() ? AArch64::BL : AArch64::BLR; + MachineInstr *Call = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc)).getInstr(); + Call->addOperand(CallTarget); + + MachineInstr *BTI = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(AArch64::HINT)) + // BTI J so that setjmp can to BR to this. + .addImm(36) + .getInstr(); + + if (MI.shouldUpdateCallSiteInfo()) + MBB.getParent()->moveCallSiteInfo(&MI, Call); + + MI.eraseFromParent(); + finalizeBundle(MBB, Call->getIterator(), std::next(BTI->getIterator())); + return true; +} + bool AArch64ExpandPseudo::expandStoreSwiftAsyncContext( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI) { Register CtxReg = MBBI->getOperand(0).getReg(); @@ -1238,6 +1270,8 @@ bool AArch64ExpandPseudo::expandMI(MachineBasicBlock &MBB, return expandSVESpillFill(MBB, MBBI, AArch64::LDR_ZXI, 2); case AArch64::BLR_RVMARKER: return expandCALL_RVMARKER(MBB, MBBI); + case AArch64::BLR_BTI: + return expandCALL_BTI(MBB, MBBI); case AArch64::StoreSwiftAsyncContext: return expandStoreSwiftAsyncContext(MBB, MBBI); } diff --git a/llvm/lib/Target/AArch64/AArch64FastISel.cpp b/llvm/lib/Target/AArch64/AArch64FastISel.cpp index c67fa62c7a92..dc5e6807945d 100644 --- a/llvm/lib/Target/AArch64/AArch64FastISel.cpp +++ b/llvm/lib/Target/AArch64/AArch64FastISel.cpp @@ -14,6 +14,7 @@ #include "AArch64.h" #include "AArch64CallingConvention.h" +#include "AArch64MachineFunctionInfo.h" #include "AArch64RegisterInfo.h" #include "AArch64Subtarget.h" #include "MCTargetDesc/AArch64AddressingModes.h" @@ -3127,6 +3128,13 @@ bool AArch64FastISel::fastLowerCall(CallLoweringInfo &CLI) { if (!Callee && !Symbol) return false; + // Allow SelectionDAG isel to handle calls to functions like setjmp that need + // a bti instruction following the call. + if (CLI.CB && CLI.CB->hasFnAttr(Attribute::ReturnsTwice) && + !Subtarget->noBTIAtReturnTwice() && + MF->getInfo()->branchTargetEnforcement()) + return false; + // Allow SelectionDAG isel to handle tail calls. if (IsTailCall) return false; diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index ac5e51e47ddf..4d1cb0720a5a 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -2278,6 +2278,7 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { MAKE_CASE(AArch64ISD::MOPS_MEMSET_TAGGING) MAKE_CASE(AArch64ISD::MOPS_MEMCOPY) MAKE_CASE(AArch64ISD::MOPS_MEMMOVE) + MAKE_CASE(AArch64ISD::CALL_BTI) } #undef MAKE_CASE return nullptr; @@ -5843,14 +5844,62 @@ static bool mayTailCallThisCC(CallingConv::ID CC) { } } +static void analyzeCallOperands(const AArch64TargetLowering &TLI, + const AArch64Subtarget *Subtarget, + const TargetLowering::CallLoweringInfo &CLI, + CCState &CCInfo) { + const SelectionDAG &DAG = CLI.DAG; + CallingConv::ID CalleeCC = CLI.CallConv; + bool IsVarArg = CLI.IsVarArg; + const SmallVector &Outs = CLI.Outs; + bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC); + + unsigned NumArgs = Outs.size(); + for (unsigned i = 0; i != NumArgs; ++i) { + MVT ArgVT = Outs[i].VT; + ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; + + bool UseVarArgCC = false; + if (IsVarArg) { + // On Windows, the fixed arguments in a vararg call are passed in GPRs + // too, so use the vararg CC to force them to integer registers. + if (IsCalleeWin64) { + UseVarArgCC = true; + } else { + UseVarArgCC = !Outs[i].IsFixed; + } + } else { + // Get type of the original argument. + EVT ActualVT = + TLI.getValueType(DAG.getDataLayout(), CLI.Args[Outs[i].OrigArgIndex].Ty, + /*AllowUnknown*/ true); + MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ArgVT; + // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. + if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) + ArgVT = MVT::i8; + else if (ActualMVT == MVT::i16) + ArgVT = MVT::i16; + } + + CCAssignFn *AssignFn = TLI.CCAssignFnForCall(CalleeCC, UseVarArgCC); + bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); + assert(!Res && "Call operand has unhandled type"); + (void)Res; + } +} + bool AArch64TargetLowering::isEligibleForTailCallOptimization( - SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, SelectionDAG &DAG) const { + const CallLoweringInfo &CLI) const { + CallingConv::ID CalleeCC = CLI.CallConv; if (!mayTailCallThisCC(CalleeCC)) return false; + SDValue Callee = CLI.Callee; + bool IsVarArg = CLI.IsVarArg; + const SmallVector &Outs = CLI.Outs; + const SmallVector &OutVals = CLI.OutVals; + const SmallVector &Ins = CLI.Ins; + const SelectionDAG &DAG = CLI.DAG; MachineFunction &MF = DAG.getMachineFunction(); const Function &CallerF = MF.getFunction(); CallingConv::ID CallerCC = CallerF.getCallingConv(); @@ -5915,30 +5964,14 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( // I want anyone implementing a new calling convention to think long and hard // about this assert. - assert((!isVarArg || CalleeCC == CallingConv::C) && + assert((!IsVarArg || CalleeCC == CallingConv::C) && "Unexpected variadic calling convention"); LLVMContext &C = *DAG.getContext(); - if (isVarArg && !Outs.empty()) { - // At least two cases here: if caller is fastcc then we can't have any - // memory arguments (we'd be expected to clean up the stack afterwards). If - // caller is C then we could potentially use its argument area. - - // FIXME: for now we take the most conservative of these in both cases: - // disallow all variadic memory operands. - SmallVector ArgLocs; - CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); - - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, true)); - for (const CCValAssign &ArgLoc : ArgLocs) - if (!ArgLoc.isRegLoc()) - return false; - } - // Check that the call results are passed in the same way. if (!CCState::resultsCompatible(CalleeCC, CallerCC, MF, C, Ins, - CCAssignFnForCall(CalleeCC, isVarArg), - CCAssignFnForCall(CallerCC, isVarArg))) + CCAssignFnForCall(CalleeCC, IsVarArg), + CCAssignFnForCall(CallerCC, IsVarArg))) return false; // The callee has to preserve all registers the caller needs to preserve. const AArch64RegisterInfo *TRI = Subtarget->getRegisterInfo(); @@ -5958,9 +5991,22 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization( return true; SmallVector ArgLocs; - CCState CCInfo(CalleeCC, isVarArg, MF, ArgLocs, C); + CCState CCInfo(CalleeCC, IsVarArg, MF, ArgLocs, C); - CCInfo.AnalyzeCallOperands(Outs, CCAssignFnForCall(CalleeCC, isVarArg)); + analyzeCallOperands(*this, Subtarget, CLI, CCInfo); + + if (IsVarArg && !(CLI.CB && CLI.CB->isMustTailCall())) { + // When we are musttail, additional checks have been done and we can safely ignore this check + // At least two cases here: if caller is fastcc then we can't have any + // memory arguments (we'd be expected to clean up the stack afterwards). If + // caller is C then we could potentially use its argument area. + + // FIXME: for now we take the most conservative of these in both cases: + // disallow all variadic memory operands. + for (const CCValAssign &ArgLoc : ArgLocs) + if (!ArgLoc.isRegLoc()) + return false; + } const AArch64FunctionInfo *FuncInfo = MF.getInfo(); @@ -6051,7 +6097,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &IsTailCall = CLI.IsTailCall; - CallingConv::ID CallConv = CLI.CallConv; + CallingConv::ID &CallConv = CLI.CallConv; bool IsVarArg = CLI.IsVarArg; MachineFunction &MF = DAG.getMachineFunction(); @@ -6061,7 +6107,12 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, AArch64FunctionInfo *FuncInfo = MF.getInfo(); bool TailCallOpt = MF.getTarget().Options.GuaranteedTailCallOpt; bool IsSibCall = false; - bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CallConv); + bool GuardWithBTI = false; + + if (CLI.CB && CLI.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && + !Subtarget->noBTIAtReturnTwice()) { + GuardWithBTI = FuncInfo->branchTargetEnforcement(); + } // Check callee args/returns for SVE registers and set calling convention // accordingly. @@ -6079,8 +6130,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, if (IsTailCall) { // Check if it's really possible to do a tail call. - IsTailCall = isEligibleForTailCallOptimization( - Callee, CallConv, IsVarArg, Outs, OutVals, Ins, DAG); + IsTailCall = isEligibleForTailCallOptimization(CLI); // A sibling call is one where we're under the usual C ABI and not planning // to change that but can still do a tail call: @@ -6101,56 +6151,17 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); if (IsVarArg) { - // Handle fixed and variable vector arguments differently. - // Variable vector arguments always go into memory. unsigned NumArgs = Outs.size(); for (unsigned i = 0; i != NumArgs; ++i) { - MVT ArgVT = Outs[i].VT; - if (!Outs[i].IsFixed && ArgVT.isScalableVector()) + if (!Outs[i].IsFixed && Outs[i].VT.isScalableVector()) report_fatal_error("Passing SVE types to variadic functions is " "currently not supported"); - - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - bool UseVarArgCC = !Outs[i].IsFixed; - // On Windows, the fixed arguments in a vararg call are passed in GPRs - // too, so use the vararg CC to force them to integer registers. - if (IsCalleeWin64) - UseVarArgCC = true; - CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, UseVarArgCC); - bool Res = AssignFn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, CCInfo); - assert(!Res && "Call operand has unhandled type"); - (void)Res; - } - } else { - // At this point, Outs[].VT may already be promoted to i32. To correctly - // handle passing i8 as i8 instead of i32 on stack, we pass in both i32 and - // i8 to CC_AArch64_AAPCS with i32 being ValVT and i8 being LocVT. - // Since AnalyzeCallOperands uses Ins[].VT for both ValVT and LocVT, here - // we use a special version of AnalyzeCallOperands to pass in ValVT and - // LocVT. - unsigned NumArgs = Outs.size(); - for (unsigned i = 0; i != NumArgs; ++i) { - MVT ValVT = Outs[i].VT; - // Get type of the original argument. - EVT ActualVT = getValueType(DAG.getDataLayout(), - CLI.getArgs()[Outs[i].OrigArgIndex].Ty, - /*AllowUnknown*/ true); - MVT ActualMVT = ActualVT.isSimple() ? ActualVT.getSimpleVT() : ValVT; - ISD::ArgFlagsTy ArgFlags = Outs[i].Flags; - // If ActualMVT is i1/i8/i16, we should set LocVT to i8/i8/i16. - if (ActualMVT == MVT::i1 || ActualMVT == MVT::i8) - ValVT = MVT::i8; - else if (ActualMVT == MVT::i16) - ValVT = MVT::i16; - - CCAssignFn *AssignFn = CCAssignFnForCall(CallConv, /*IsVarArg=*/false); - bool Res = AssignFn(i, ValVT, ValVT, CCValAssign::Full, ArgFlags, CCInfo); - assert(!Res && "Call operand has unhandled type"); - (void)Res; } } + analyzeCallOperands(*this, Subtarget, CLI, CCInfo); + // Get a count of how many bytes are to be pushed on the stack. unsigned NumBytes = CCInfo.getNextStackOffset(); @@ -6536,7 +6547,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Function *ARCFn = *objcarc::getAttachedARCFunction(CLI.CB); auto GA = DAG.getTargetGlobalAddress(ARCFn, DL, PtrVT); Ops.insert(Ops.begin() + 1, GA); - } + } else if (GuardWithBTI) + CallOpc = AArch64ISD::CALL_BTI; // Returns a chain and a flag for retval copy to use. Chain = DAG.getNode(CallOpc, DL, NodeTys, Ops); @@ -17111,13 +17123,14 @@ static SDValue performGlobalAddressCombine(SDNode *N, SelectionDAG &DAG, // Check whether folding this offset is legal. It must not go out of bounds of // the referenced object to avoid violating the code model, and must be - // smaller than 2^21 because this is the largest offset expressible in all - // object formats. + // smaller than 2^20 because this is the largest offset expressible in all + // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF + // stores an immediate signed 21 bit offset.) // // This check also prevents us from folding negative offsets, which will end // up being treated in the same way as large positive ones. They could also // cause code model violations, and aren't really common enough to matter. - if (Offset >= (1 << 21)) + if (Offset >= (1 << 20)) return SDValue(); const GlobalValue *GV = GN->getGlobal(); diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/llvm/lib/Target/AArch64/AArch64ISelLowering.h index 2138c0ffe70a..80b7e84872cd 100644 --- a/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -55,6 +55,8 @@ enum NodeType : unsigned { // x29, x29` marker instruction. CALL_RVMARKER, + CALL_BTI, // Function call followed by a BTI instruction. + // Produces the full sequence of instructions for getting the thread pointer // offset of a variable into X0, using the TLSDesc model. TLSDESC_CALLSEQ, @@ -898,11 +900,8 @@ class AArch64TargetLowering : public TargetLowering { SDValue LowerINTRINSIC_W_CHAIN(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) const; - bool isEligibleForTailCallOptimization( - SDValue Callee, CallingConv::ID CalleeCC, bool isVarArg, - const SmallVectorImpl &Outs, - const SmallVectorImpl &OutVals, - const SmallVectorImpl &Ins, SelectionDAG &DAG) const; + bool + isEligibleForTailCallOptimization(const CallLoweringInfo &CLI) const; /// Finds the incoming stack arguments which overlap the given fixed stack /// object and incorporates their load into the current chain. This prevents diff --git a/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/llvm/lib/Target/AArch64/AArch64InstrInfo.td index 1316161f05f1..2680b5ac094e 100644 --- a/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -473,6 +473,11 @@ def AArch64call : SDNode<"AArch64ISD::CALL", [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, SDNPVariadic]>; +def AArch64call_bti : SDNode<"AArch64ISD::CALL_BTI", + SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, + [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, + SDNPVariadic]>; + def AArch64call_rvmarker: SDNode<"AArch64ISD::CALL_RVMARKER", SDTypeProfile<0, -1, [SDTCisPtrTy<0>]>, [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue, @@ -2320,6 +2325,8 @@ let isCall = 1, Defs = [LR], Uses = [SP] in { PseudoInstExpansion<(BLR GPR64:$Rn)>; def BLR_RVMARKER : Pseudo<(outs), (ins variable_ops), []>, Sched<[WriteBrReg]>; + def BLR_BTI : Pseudo<(outs), (ins variable_ops), []>, + Sched<[WriteBrReg]>; } // isCall def : Pat<(AArch64call GPR64:$Rn), @@ -2333,6 +2340,10 @@ def : Pat<(AArch64call_rvmarker (i64 tglobaladdr:$rvfunc), GPR64:$Rn), (BLR_RVMARKER tglobaladdr:$rvfunc, GPR64:$Rn)>, Requires<[NoSLSBLRMitigation]>; +def : Pat<(AArch64call_bti GPR64:$Rn), + (BLR_BTI GPR64:$Rn)>, + Requires<[NoSLSBLRMitigation]>; + let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def BR : BranchReg<0b0000, "br", [(brind GPR64:$Rn)]>; } // isBranch, isTerminator, isBarrier, isIndirectBranch diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h index 7b2bbad30f85..061db926ee2b 100644 --- a/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -243,6 +243,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { // Enable 64-bit vectorization in SLP. unsigned MinVectorRegisterBitWidth = 64; + // Do not place a BTI instruction after a call to a return twice function like + // setjmp. + bool NoBTIAtReturnTwice = false; + bool OutlineAtomics = false; bool PredictableSelectIsExpensive = false; bool BalanceFPOps = false; @@ -588,6 +592,8 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo { bool fixCortexA53_835769() const { return FixCortexA53_835769; } + bool noBTIAtReturnTwice() const { return NoBTIAtReturnTwice; } + bool addrSinkUsingGEPs() const override { // Keeping GEPs inbounds is important for exploiting AArch64 // addressing-modes in ILP32 mode. diff --git a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 33ed7ae9780e..8f71eb3095cf 100644 --- a/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -6515,21 +6515,13 @@ bool AArch64AsmParser::parseDirectiveCFIBKeyFrame() { /// parseDirectiveVariantPCS /// ::= .variant_pcs symbolname bool AArch64AsmParser::parseDirectiveVariantPCS(SMLoc L) { - const AsmToken &Tok = getTok(); - if (Tok.isNot(AsmToken::Identifier)) + StringRef Name; + if (getParser().parseIdentifier(Name)) return TokError("expected symbol name"); - - StringRef SymbolName = Tok.getIdentifier(); - - MCSymbol *Sym = getContext().lookupSymbol(SymbolName); - if (!Sym) - return TokError("unknown symbol"); - - Lex(); // Eat the symbol - if (parseEOL()) return true; - getTargetStreamer().emitDirectiveVariantPCS(Sym); + getTargetStreamer().emitDirectiveVariantPCS( + getContext().getOrCreateSymbol(Name)); return false; } diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp index 097b93e4fcca..bb6c7938791e 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp @@ -1127,14 +1127,22 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder, // Create a temporarily-floating call instruction so we can add the implicit // uses of arg registers. - unsigned Opc = getCallOpcode(MF, Info.Callee.isReg(), false); + const AArch64Subtarget &Subtarget = MF.getSubtarget(); + unsigned Opc = 0; + // A call to a returns twice function like setjmp must be followed by a bti + // instruction. + if (Info.CB && Info.CB->getAttributes().hasFnAttr(Attribute::ReturnsTwice) && + !Subtarget.noBTIAtReturnTwice() && + MF.getInfo()->branchTargetEnforcement()) + Opc = AArch64::BLR_BTI; + else + Opc = getCallOpcode(MF, Info.Callee.isReg(), false); auto MIB = MIRBuilder.buildInstrNoInsert(Opc); MIB.add(Info.Callee); // Tell the call which registers are clobbered. const uint32_t *Mask; - const AArch64Subtarget &Subtarget = MF.getSubtarget(); const auto *TRI = Subtarget.getRegisterInfo(); AArch64OutgoingValueAssigner Assigner(AssignFnFixed, AssignFnVarArg, diff --git a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp index d3f4130d2ba1..3730030aa140 100644 --- a/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp +++ b/llvm/lib/Target/AArch64/GISel/AArch64PreLegalizerCombiner.cpp @@ -162,13 +162,14 @@ static bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI, // Check whether folding this offset is legal. It must not go out of bounds of // the referenced object to avoid violating the code model, and must be - // smaller than 2^21 because this is the largest offset expressible in all - // object formats. + // smaller than 2^20 because this is the largest offset expressible in all + // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF + // stores an immediate signed 21 bit offset.) // // This check also prevents us from folding negative offsets, which will end // up being treated in the same way as large positive ones. They could also // cause code model violations, and aren't really common enough to matter. - if (NewOffset >= (1 << 21)) + if (NewOffset >= (1 << 20)) return false; Type *T = GV->getValueType(); diff --git a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 78c0e90b1384..46edb12959d2 100644 --- a/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -254,6 +254,7 @@ void AArch64TargetELFStreamer::emitInst(uint32_t Inst) { } void AArch64TargetELFStreamer::emitDirectiveVariantPCS(MCSymbol *Symbol) { + getStreamer().getAssembler().registerSymbol(*Symbol); cast(Symbol)->setOther(ELF::STO_AARCH64_VARIANT_PCS); } diff --git a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp index 715cff72dcab..7113fe33b5d7 100644 --- a/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp +++ b/llvm/lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp @@ -341,31 +341,11 @@ struct PPCOperand : public MCParsedAsmOperand { bool isU10Imm() const { return Kind == Immediate && isUInt<10>(getImm()); } bool isU12Imm() const { return Kind == Immediate && isUInt<12>(getImm()); } - bool isU16Imm() const { - switch (Kind) { - case Expression: - return true; - case Immediate: - case ContextImmediate: - return isUInt<16>(getImmU16Context()); - default: - return false; - } - } - bool isS16Imm() const { - switch (Kind) { - case Expression: - return true; - case Immediate: - case ContextImmediate: - return isInt<16>(getImmS16Context()); - default: - return false; - } - } - bool isS16ImmX4() const { return Kind == Expression || - (Kind == Immediate && isInt<16>(getImm()) && - (getImm() & 3) == 0); } + bool isU16Imm() const { return isExtImm<16>(/*Signed*/ false, 1); } + bool isS16Imm() const { return isExtImm<16>(/*Signed*/ true, 1); } + bool isS16ImmX4() const { return isExtImm<16>(/*Signed*/ true, 4); } + bool isS16ImmX16() const { return isExtImm<16>(/*Signed*/ true, 16); } + bool isS17Imm() const { return isExtImm<17>(/*Signed*/ true, 1); } bool isHashImmX8() const { // The Hash Imm form is used for instructions that check or store a hash. @@ -375,9 +355,6 @@ struct PPCOperand : public MCParsedAsmOperand { (getImm() & 7) == 0); } - bool isS16ImmX16() const { return Kind == Expression || - (Kind == Immediate && isInt<16>(getImm()) && - (getImm() & 15) == 0); } bool isS34ImmX16() const { return Kind == Expression || (Kind == Immediate && isInt<34>(getImm()) && (getImm() & 15) == 0); @@ -388,17 +365,6 @@ struct PPCOperand : public MCParsedAsmOperand { return Kind == Expression || (Kind == Immediate && isInt<34>(getImm())); } - bool isS17Imm() const { - switch (Kind) { - case Expression: - return true; - case Immediate: - case ContextImmediate: - return isInt<17>(getImmS16Context()); - default: - return false; - } - } bool isTLSReg() const { return Kind == TLSRegister; } bool isDirectBr() const { if (Kind == Expression) @@ -712,6 +678,25 @@ struct PPCOperand : public MCParsedAsmOperand { return CreateExpr(Val, S, E, IsPPC64); } + +private: + template + bool isExtImm(bool Signed, unsigned Multiple) const { + switch (Kind) { + default: + return false; + case Expression: + return true; + case Immediate: + case ContextImmediate: + if (Signed) + return isInt(getImmS16Context()) && + (getImmS16Context() & (Multiple - 1)) == 0; + else + return isUInt(getImmU16Context()) && + (getImmU16Context() & (Multiple - 1)) == 0; + } + } }; } // end anonymous namespace. diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp index 9df94edc8cdf..2e678ffd58c2 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp @@ -44,6 +44,7 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { case PPC::fixup_ppc_half16: return Value & 0xffff; case PPC::fixup_ppc_half16ds: + case PPC::fixup_ppc_half16dq: return Value & 0xfffc; case PPC::fixup_ppc_pcrel34: case PPC::fixup_ppc_imm34: @@ -60,6 +61,7 @@ static unsigned getFixupKindNumBytes(unsigned Kind) { case FK_Data_2: case PPC::fixup_ppc_half16: case PPC::fixup_ppc_half16ds: + case PPC::fixup_ppc_half16dq: return 2; case FK_Data_4: case PPC::fixup_ppc_brcond14: diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp index 94ef7b45434f..1e58039582c2 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp @@ -125,6 +125,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, } break; case PPC::fixup_ppc_half16ds: + case PPC::fixup_ppc_half16dq: Target.print(errs()); errs() << '\n'; report_fatal_error("Invalid PC-relative half16ds relocation"); @@ -349,6 +350,7 @@ unsigned PPCELFObjectWriter::getRelocType(MCContext &Ctx, const MCValue &Target, } break; case PPC::fixup_ppc_half16ds: + case PPC::fixup_ppc_half16dq: switch (Modifier) { default: llvm_unreachable("Unsupported Modifier"); case MCSymbolRefExpr::VK_None: diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h index 73292f7b7938..df0c666f5b11 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h @@ -51,6 +51,10 @@ enum Fixups { /// register number. fixup_ppc_nofixup, + /// A 16-bit fixup corresponding to lo16(_foo) with implied 3 zero bits for + /// instrs like 'lxv'. Produces the same relocation as fixup_ppc_half16ds. + fixup_ppc_half16dq, + // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp index 4dfa7d5e600c..d76795fcebc4 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp @@ -198,8 +198,8 @@ unsigned PPCMCCodeEmitter::getMemRIX16Encoding(const MCInst &MI, unsigned OpNo, } // Otherwise add a fixup for the displacement field. - Fixups.push_back(MCFixup::create(IsLittleEndian? 0 : 2, MO.getExpr(), - (MCFixupKind)PPC::fixup_ppc_half16ds)); + Fixups.push_back(MCFixup::create(IsLittleEndian ? 0 : 2, MO.getExpr(), + (MCFixupKind)PPC::fixup_ppc_half16dq)); return RegBits; } diff --git a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp index abff44449131..6cd04ee018fd 100644 --- a/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp +++ b/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp @@ -110,9 +110,18 @@ PPCMCExpr::evaluateAsRelocatableImpl(MCValue &Res, if (Value.isAbsolute()) { int64_t Result = evaluateAsInt64(Value.getConstant()); - if ((Fixup == nullptr || (unsigned)Fixup->getKind() != PPC::fixup_ppc_half16) && - (Result >= 0x8000)) + bool IsHalf16 = Fixup && Fixup->getTargetKind() == PPC::fixup_ppc_half16; + bool IsHalf16DS = + Fixup && Fixup->getTargetKind() == PPC::fixup_ppc_half16ds; + bool IsHalf16DQ = + Fixup && Fixup->getTargetKind() == PPC::fixup_ppc_half16dq; + bool IsHalf = IsHalf16 || IsHalf16DS || IsHalf16DQ; + + if (!IsHalf && Result >= 0x8000) return false; + if ((IsHalf16DS && (Result & 0x3)) || (IsHalf16DQ && (Result & 0xf))) + return false; + Res = MCValue::get(Result); } else { if (!Layout) diff --git a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp index cbeae0ab03b8..6c9d43ad8c03 100644 --- a/llvm/lib/Target/PowerPC/PPCISelLowering.cpp +++ b/llvm/lib/Target/PowerPC/PPCISelLowering.cpp @@ -1305,11 +1305,13 @@ PPCTargetLowering::PPCTargetLowering(const PPCTargetMachine &TM, setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); } + setLibcallName(RTLIB::MULO_I128, nullptr); if (!isPPC64) { // These libcalls are not available in 32-bit. setLibcallName(RTLIB::SHL_I128, nullptr); setLibcallName(RTLIB::SRL_I128, nullptr); setLibcallName(RTLIB::SRA_I128, nullptr); + setLibcallName(RTLIB::MUL_I128, nullptr); setLibcallName(RTLIB::MULO_I64, nullptr); } diff --git a/llvm/lib/Target/PowerPC/PPCInstrInfo.td b/llvm/lib/Target/PowerPC/PPCInstrInfo.td index c26b4f6ceb7d..53e73e33b003 100644 --- a/llvm/lib/Target/PowerPC/PPCInstrInfo.td +++ b/llvm/lib/Target/PowerPC/PPCInstrInfo.td @@ -1016,7 +1016,7 @@ def dispRI : Operand { } def PPCDispRIXOperand : AsmOperandClass { let Name = "DispRIX"; let PredicateMethod = "isS16ImmX4"; - let RenderMethod = "addImmOperands"; + let RenderMethod = "addS16ImmOperands"; } def dispRIX : Operand { let ParserMatchClass = PPCDispRIXOperand; @@ -1030,7 +1030,7 @@ def dispRIHash : Operand { } def PPCDispRIX16Operand : AsmOperandClass { let Name = "DispRIX16"; let PredicateMethod = "isS16ImmX16"; - let RenderMethod = "addImmOperands"; + let RenderMethod = "addS16ImmOperands"; } def dispRIX16 : Operand { let ParserMatchClass = PPCDispRIX16Operand; diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp index 514789b3f645..4b940093482f 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVAsmBackend.cpp @@ -583,10 +583,11 @@ void RISCVAsmBackend::applyFixup(const MCAssembler &Asm, const MCFixup &Fixup, bool RISCVAsmBackend::shouldInsertExtraNopBytesForCodeAlign( const MCAlignFragment &AF, unsigned &Size) { // Calculate Nops Size only when linker relaxation enabled. - if (!STI.getFeatureBits()[RISCV::FeatureRelax]) + const MCSubtargetInfo *STI = AF.getSubtargetInfo(); + if (!STI->getFeatureBits()[RISCV::FeatureRelax]) return false; - bool HasStdExtC = STI.getFeatureBits()[RISCV::FeatureStdExtC]; + bool HasStdExtC = STI->getFeatureBits()[RISCV::FeatureStdExtC]; unsigned MinNopLen = HasStdExtC ? 2 : 4; if (AF.getAlignment() <= MinNopLen) { @@ -606,7 +607,8 @@ bool RISCVAsmBackend::shouldInsertFixupForCodeAlign(MCAssembler &Asm, const MCAsmLayout &Layout, MCAlignFragment &AF) { // Insert the fixup only when linker relaxation enabled. - if (!STI.getFeatureBits()[RISCV::FeatureRelax]) + const MCSubtargetInfo *STI = AF.getSubtargetInfo(); + if (!STI->getFeatureBits()[RISCV::FeatureRelax]) return false; // Calculate total Nops we need to insert. If there are none to insert diff --git a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp index e935179e5f9b..4adcd25600f2 100644 --- a/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp +++ b/llvm/lib/Target/RISCV/MCTargetDesc/RISCVMatInt.cpp @@ -302,32 +302,34 @@ InstSeq generateInstSeq(int64_t Val, const FeatureBitset &ActiveFeatures) { TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0)); if (TmpSeq.size() < Res.size()) Res = TmpSeq; - } - // Try to use LUI+SH*ADD+ADDI. - int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull; - int64_t Lo12 = SignExtend64<12>(Val); - Div = 0; - if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) { - Div = 3; - Opc = RISCV::SH1ADD; - } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) { - Div = 5; - Opc = RISCV::SH2ADD; - } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) { - Div = 9; - Opc = RISCV::SH3ADD; - } - // Build the new instruction sequence. - if (Div > 0) { - // For Val that has zero Lo12 (implies Val equals to Hi52) should has - // already been processed to LUI+SH*ADD by previous optimization. - assert(Lo12 != 0 && - "unexpected instruction sequence for immediate materialisation"); - generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq); - TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0)); - TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12)); - if (TmpSeq.size() < Res.size()) - Res = TmpSeq; + } else { + // Try to use LUI+SH*ADD+ADDI. + int64_t Hi52 = ((uint64_t)Val + 0x800ull) & ~0xfffull; + int64_t Lo12 = SignExtend64<12>(Val); + Div = 0; + if (isInt<32>(Hi52 / 3) && (Hi52 % 3) == 0) { + Div = 3; + Opc = RISCV::SH1ADD; + } else if (isInt<32>(Hi52 / 5) && (Hi52 % 5) == 0) { + Div = 5; + Opc = RISCV::SH2ADD; + } else if (isInt<32>(Hi52 / 9) && (Hi52 % 9) == 0) { + Div = 9; + Opc = RISCV::SH3ADD; + } + // Build the new instruction sequence. + if (Div > 0) { + // For Val that has zero Lo12 (implies Val equals to Hi52) should has + // already been processed to LUI+SH*ADD by previous optimization. + assert(Lo12 != 0 && + "unexpected instruction sequence for immediate materialisation"); + assert(TmpSeq.empty() && "Expected empty TmpSeq"); + generateInstSeqImpl(Hi52 / Div, ActiveFeatures, TmpSeq); + TmpSeq.push_back(RISCVMatInt::Inst(Opc, 0)); + TmpSeq.push_back(RISCVMatInt::Inst(RISCV::ADDI, Lo12)); + if (TmpSeq.size() < Res.size()) + Res = TmpSeq; + } } } diff --git a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp index f3cc7d3fb46f..8f250eeb7248 100644 --- a/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVFrameLowering.cpp @@ -674,7 +674,10 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, if (hasBP(MF)) { FrameReg = RISCVABI::getBPReg(); // |--------------------------| -- <-- FP - // | callee-saved registers | | <----. + // | callee-allocated save | | <----| + // | area for register varargs| | | + // |--------------------------| | | + // | callee-saved registers | | | // |--------------------------| -- | // | realignment (the size of | | | // | this area is not counted | | | @@ -699,7 +702,10 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, } else { FrameReg = RISCV::X2; // |--------------------------| -- <-- FP - // | callee-saved registers | | <----. + // | callee-allocated save | | <----| + // | area for register varargs| | | + // |--------------------------| | | + // | callee-saved registers | | | // |--------------------------| -- | // | realignment (the size of | | | // | this area is not counted | | | @@ -742,6 +748,9 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, // the frame size. // // |--------------------------| -- <-- FP + // | callee-allocated save | | + // | area for register varargs| | + // |--------------------------| | // | callee-saved registers | | // |--------------------------| | MFI.getStackSize() // | scalar local variables | | @@ -756,7 +765,10 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, // When using SP to access frame objects, we need to add RVV stack size. // // |--------------------------| -- <-- FP - // | callee-saved registers | | <----. + // | callee-allocated save | | <----| + // | area for register varargs| | | + // |--------------------------| | | + // | callee-saved registers | | | // |--------------------------| -- | // | Padding after RVV | | | // | (not counted in | | | @@ -786,8 +798,11 @@ RISCVFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI, Offset += StackOffset::getFixed(MFI.getStackSize()); } } else if (MFI.getStackID(FI) == TargetStackID::ScalableVector) { + int ScalarLocalVarSize = MFI.getStackSize() - + RVFI->getCalleeSavedStackSize() - + RVFI->getVarArgsSaveSize(); Offset += StackOffset::get( - alignTo(MFI.getStackSize() - RVFI->getCalleeSavedStackSize(), 8), + alignTo(ScalarLocalVarSize, 8), RVFI->getRVVStackSize()); } } diff --git a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp index e7672a7652cd..274b86593e0f 100644 --- a/llvm/lib/Target/RISCV/RISCVISelLowering.cpp +++ b/llvm/lib/Target/RISCV/RISCVISelLowering.cpp @@ -1908,37 +1908,27 @@ static Optional isSimpleVIDSequence(SDValue Op) { // A zero-value value difference means that we're somewhere in the middle // of a fractional step, e.g. <0,0,0*,0,1,1,1,1>. Wait until we notice a // step change before evaluating the sequence. - if (ValDiff != 0) { - int64_t Remainder = ValDiff % IdxDiff; - // Normalize the step if it's greater than 1. - if (Remainder != ValDiff) { - // The difference must cleanly divide the element span. - if (Remainder != 0) - return None; - ValDiff /= IdxDiff; - IdxDiff = 1; - } + if (ValDiff == 0) + continue; - if (!SeqStepNum) - SeqStepNum = ValDiff; - else if (ValDiff != SeqStepNum) - return None; - - if (!SeqStepDenom) - SeqStepDenom = IdxDiff; - else if (IdxDiff != *SeqStepDenom) + int64_t Remainder = ValDiff % IdxDiff; + // Normalize the step if it's greater than 1. + if (Remainder != ValDiff) { + // The difference must cleanly divide the element span. + if (Remainder != 0) return None; + ValDiff /= IdxDiff; + IdxDiff = 1; } - } - // Record and/or check any addend. - if (SeqStepNum && SeqStepDenom) { - uint64_t ExpectedVal = - (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; - int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits); - if (!SeqAddend) - SeqAddend = Addend; - else if (SeqAddend != Addend) + if (!SeqStepNum) + SeqStepNum = ValDiff; + else if (ValDiff != SeqStepNum) + return None; + + if (!SeqStepDenom) + SeqStepDenom = IdxDiff; + else if (IdxDiff != *SeqStepDenom) return None; } @@ -1946,11 +1936,29 @@ static Optional isSimpleVIDSequence(SDValue Op) { if (!PrevElt || PrevElt->first != Val) PrevElt = std::make_pair(Val, Idx); } - // We need to have logged both a step and an addend for this to count as - // a legal index sequence. - if (!SeqStepNum || !SeqStepDenom || !SeqAddend) + + // We need to have logged a step for this to count as a legal index sequence. + if (!SeqStepNum || !SeqStepDenom) return None; + // Loop back through the sequence and validate elements we might have skipped + // while waiting for a valid step. While doing this, log any sequence addend. + for (unsigned Idx = 0; Idx < NumElts; Idx++) { + if (Op.getOperand(Idx).isUndef()) + continue; + uint64_t Val = Op.getConstantOperandVal(Idx) & + maskTrailingOnes(EltSizeInBits); + uint64_t ExpectedVal = + (int64_t)(Idx * (uint64_t)*SeqStepNum) / *SeqStepDenom; + int64_t Addend = SignExtend64(Val - ExpectedVal, EltSizeInBits); + if (!SeqAddend) + SeqAddend = Addend; + else if (Addend != SeqAddend) + return None; + } + + assert(SeqAddend && "Must have an addend if we have a step"); + return VIDSequence{*SeqStepNum, *SeqStepDenom, *SeqAddend}; } @@ -2109,7 +2117,8 @@ static SDValue lowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // a single addi instruction. if (((StepOpcode == ISD::MUL && isInt<12>(SplatStepVal)) || (StepOpcode == ISD::SHL && isUInt<5>(SplatStepVal))) && - isPowerOf2_32(StepDenominator) && isInt<5>(Addend)) { + isPowerOf2_32(StepDenominator) && + (SplatStepVal >= 0 || StepDenominator == 1) && isInt<5>(Addend)) { SDValue VID = DAG.getNode(RISCVISD::VID_VL, DL, ContainerVT, Mask, VL); // Convert right out of the scalable type so we can use standard ISD // nodes for the rest of the computation. If we used scalable types with diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp new file mode 100644 index 000000000000..bde0326a8de4 --- /dev/null +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.cpp @@ -0,0 +1,30 @@ +//=- RISCVMachineFunctionInfo.cpp - RISCV machine function info ---*- C++ -*-=// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file declares RISCV-specific per-machine-function information. +// +//===----------------------------------------------------------------------===// + +#include "RISCVMachineFunctionInfo.h" + +using namespace llvm; + +yaml::RISCVMachineFunctionInfo::RISCVMachineFunctionInfo( + const llvm::RISCVMachineFunctionInfo &MFI) + : VarArgsFrameIndex(MFI.getVarArgsFrameIndex()), + VarArgsSaveSize(MFI.getVarArgsSaveSize()) {} + +void yaml::RISCVMachineFunctionInfo::mappingImpl(yaml::IO &YamlIO) { + MappingTraits::mapping(YamlIO, *this); +} + +void RISCVMachineFunctionInfo::initializeBaseYamlFields( + const yaml::RISCVMachineFunctionInfo &YamlMFI) { + VarArgsFrameIndex = YamlMFI.VarArgsFrameIndex; + VarArgsSaveSize = YamlMFI.VarArgsSaveSize; +} diff --git a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h index b5609e9a3890..a549ec211a94 100644 --- a/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h +++ b/llvm/lib/Target/RISCV/RISCVMachineFunctionInfo.h @@ -14,11 +14,34 @@ #define LLVM_LIB_TARGET_RISCV_RISCVMACHINEFUNCTIONINFO_H #include "RISCVSubtarget.h" +#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" namespace llvm { +class RISCVMachineFunctionInfo; + +namespace yaml { +struct RISCVMachineFunctionInfo final : public yaml::MachineFunctionInfo { + int VarArgsFrameIndex; + int VarArgsSaveSize; + + RISCVMachineFunctionInfo() = default; + RISCVMachineFunctionInfo(const llvm::RISCVMachineFunctionInfo &MFI); + + void mappingImpl(yaml::IO &YamlIO) override; + ~RISCVMachineFunctionInfo() = default; +}; + +template <> struct MappingTraits { + static void mapping(IO &YamlIO, RISCVMachineFunctionInfo &MFI) { + YamlIO.mapOptional("varArgsFrameIndex", MFI.VarArgsFrameIndex); + YamlIO.mapOptional("varArgsSaveSize", MFI.VarArgsSaveSize); + } +}; +} // end namespace yaml + /// RISCVMachineFunctionInfo - This class is derived from MachineFunctionInfo /// and contains private RISCV-specific information for each MachineFunction. class RISCVMachineFunctionInfo : public MachineFunctionInfo { @@ -74,6 +97,8 @@ class RISCVMachineFunctionInfo : public MachineFunctionInfo { unsigned getCalleeSavedStackSize() const { return CalleeSavedStackSize; } void setCalleeSavedStackSize(unsigned Size) { CalleeSavedStackSize = Size; } + + void initializeBaseYamlFields(const yaml::RISCVMachineFunctionInfo &YamlMFI); }; } // end namespace llvm diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp index db5e2f1eeb6f..0248ea0039bc 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.cpp @@ -13,6 +13,7 @@ #include "RISCVTargetMachine.h" #include "MCTargetDesc/RISCVBaseInfo.h" #include "RISCV.h" +#include "RISCVMachineFunctionInfo.h" #include "RISCVTargetObjectFile.h" #include "RISCVTargetTransformInfo.h" #include "TargetInfo/RISCVTargetInfo.h" @@ -22,6 +23,8 @@ #include "llvm/CodeGen/GlobalISel/InstructionSelect.h" #include "llvm/CodeGen/GlobalISel/Legalizer.h" #include "llvm/CodeGen/GlobalISel/RegBankSelect.h" +#include "llvm/CodeGen/MIRParser/MIParser.h" +#include "llvm/CodeGen/MIRYamlMapping.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/CodeGen/TargetPassConfig.h" @@ -208,3 +211,23 @@ void RISCVPassConfig::addPreRegAlloc() { addPass(createRISCVMergeBaseOffsetOptPass()); addPass(createRISCVInsertVSETVLIPass()); } + +yaml::MachineFunctionInfo * +RISCVTargetMachine::createDefaultFuncInfoYAML() const { + return new yaml::RISCVMachineFunctionInfo(); +} + +yaml::MachineFunctionInfo * +RISCVTargetMachine::convertFuncInfoToYAML(const MachineFunction &MF) const { + const auto *MFI = MF.getInfo(); + return new yaml::RISCVMachineFunctionInfo(*MFI); +} + +bool RISCVTargetMachine::parseMachineFunctionInfo( + const yaml::MachineFunctionInfo &MFI, PerFunctionMIParsingState &PFS, + SMDiagnostic &Error, SMRange &SourceRange) const { + const auto &YamlMFI = + static_cast(MFI); + PFS.MF.getInfo()->initializeBaseYamlFields(YamlMFI); + return false; +} diff --git a/llvm/lib/Target/RISCV/RISCVTargetMachine.h b/llvm/lib/Target/RISCV/RISCVTargetMachine.h index 3156333f7ee1..2a0212c846a0 100644 --- a/llvm/lib/Target/RISCV/RISCVTargetMachine.h +++ b/llvm/lib/Target/RISCV/RISCVTargetMachine.h @@ -46,6 +46,14 @@ class RISCVTargetMachine : public LLVMTargetMachine { virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DstAS) const override; + + yaml::MachineFunctionInfo *createDefaultFuncInfoYAML() const override; + yaml::MachineFunctionInfo * + convertFuncInfoToYAML(const MachineFunction &MF) const override; + bool parseMachineFunctionInfo(const yaml::MachineFunctionInfo &, + PerFunctionMIParsingState &PFS, + SMDiagnostic &Error, + SMRange &SourceRange) const override; }; } // namespace llvm diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp index 77c2e7d16990..682932b8f3e6 100644 --- a/llvm/lib/Target/X86/X86ISelLowering.cpp +++ b/llvm/lib/Target/X86/X86ISelLowering.cpp @@ -15796,7 +15796,8 @@ static SDValue lowerV8I16Shuffle(const SDLoc &DL, ArrayRef Mask, V1 = extract128BitVector(V1V2, 0, DAG, DL); V2 = extract128BitVector(V1V2, 4, DAG, DL); } else { - SmallVector DWordClearOps(4, DAG.getConstant(0, DL, MVT::i32)); + SmallVector DWordClearOps(4, + DAG.getConstant(0, DL, MVT::i32)); for (unsigned i = 0; i != 4; i += 1 << (NumEvenDrops - 1)) DWordClearOps[i] = DAG.getConstant(0xFFFF, DL, MVT::i32); SDValue DWordClearMask = @@ -47109,8 +47110,7 @@ static SDValue combineLogicBlendIntoPBLENDV(SDNode *N, SelectionDAG &DAG, // into: // srl(ctlz x), log2(bitsize(x)) // Input pattern is checked by caller. -static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy, - SelectionDAG &DAG) { +static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) { SDValue Cmp = Op.getOperand(1); EVT VT = Cmp.getOperand(0).getValueType(); unsigned Log2b = Log2_32(VT.getSizeInBits()); @@ -47121,7 +47121,7 @@ static SDValue lowerX86CmpEqZeroToCtlzSrl(SDValue Op, EVT ExtTy, SDValue Trunc = DAG.getZExtOrTrunc(Clz, dl, MVT::i32); SDValue Scc = DAG.getNode(ISD::SRL, dl, MVT::i32, Trunc, DAG.getConstant(Log2b, dl, MVT::i8)); - return DAG.getZExtOrTrunc(Scc, dl, ExtTy); + return Scc; } // Try to transform: @@ -47181,11 +47181,10 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, // or(srl(ctlz),srl(ctlz)). // The dag combiner can then fold it into: // srl(or(ctlz, ctlz)). - EVT VT = OR->getValueType(0); - SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, VT, DAG); + SDValue NewLHS = lowerX86CmpEqZeroToCtlzSrl(LHS, DAG); SDValue Ret, NewRHS; - if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG))) - Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, NewLHS, NewRHS); + if (NewLHS && (NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG))) + Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, NewLHS, NewRHS); if (!Ret) return SDValue(); @@ -47198,16 +47197,13 @@ static SDValue combineOrCmpEqZeroToCtlzSrl(SDNode *N, SelectionDAG &DAG, // Swap rhs with lhs to match or(setcc(eq, cmp, 0), or). if (RHS->getOpcode() == ISD::OR) std::swap(LHS, RHS); - NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, VT, DAG); + NewRHS = lowerX86CmpEqZeroToCtlzSrl(RHS, DAG); if (!NewRHS) return SDValue(); - Ret = DAG.getNode(ISD::OR, SDLoc(OR), VT, Ret, NewRHS); + Ret = DAG.getNode(ISD::OR, SDLoc(OR), MVT::i32, Ret, NewRHS); } - if (Ret) - Ret = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret); - - return Ret; + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N->getValueType(0), Ret); } static SDValue foldMaskedMergeImpl(SDValue And0_L, SDValue And0_R, diff --git a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp index 6bbb0251f2bc..2aab79e89078 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp @@ -1961,6 +1961,12 @@ Instruction *InstCombinerImpl::visitAnd(BinaryOperator &I) { } } + // If this 'and' clears the sign-bits added by ashr, replace with lshr: + // and (ashr X, ShiftC), C --> lshr X, ShiftC + if (match(Op0, m_AShr(m_Value(X), m_APInt(ShiftC))) && ShiftC->ult(Width) && + C->isMask(Width - ShiftC->getZExtValue())) + return BinaryOperator::CreateLShr(X, ConstantInt::get(Ty, *ShiftC)); + const APInt *AddC; if (match(Op0, m_Add(m_Value(X), m_APInt(AddC)))) { // If we add zeros to every bit below a mask, the add has no effect: diff --git a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp index 65e60498ff95..881b00f2a55a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1572,6 +1572,23 @@ Instruction *InstCombinerImpl::foldSelectInstWithICmp(SelectInst &SI, } } + // Canonicalize a signbit condition to use zero constant by swapping: + // (CmpLHS > -1) ? TV : FV --> (CmpLHS < 0) ? FV : TV + // To avoid conflicts (infinite loops) with other canonicalizations, this is + // not applied with any constant select arm. + if (Pred == ICmpInst::ICMP_SGT && match(CmpRHS, m_AllOnes()) && + !match(TrueVal, m_Constant()) && !match(FalseVal, m_Constant()) && + ICI->hasOneUse()) { + InstCombiner::BuilderTy::InsertPointGuard Guard(Builder); + Builder.SetInsertPoint(&SI); + Value *IsNeg = Builder.CreateICmpSLT( + CmpLHS, ConstantInt::getNullValue(CmpLHS->getType()), ICI->getName()); + replaceOperand(SI, 0, IsNeg); + SI.swapValues(); + SI.swapProfMetadata(); + return &SI; + } + // FIXME: This code is nearly duplicated in InstSimplify. Using/refactoring // decomposeBitTestICmp() might help. { diff --git a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp index 21c16f07e237..46ff0994e04e 100644 --- a/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -2544,19 +2544,18 @@ void InnerLoopVectorizer::widenIntOrFpInduction( Type *ScalarTy = IntegerType::get(ScalarIV->getContext(), Step->getType()->getScalarSizeInBits()); - Instruction::BinaryOps IncOp = ID.getInductionOpcode(); - if (IncOp == Instruction::BinaryOpsEnd) - IncOp = Instruction::Add; for (unsigned Part = 0; Part < UF; ++Part) { Value *StartIdx = ConstantInt::get(ScalarTy, Part); - Instruction::BinaryOps MulOp = Instruction::Mul; + Value *EntryPart; if (Step->getType()->isFloatingPointTy()) { StartIdx = Builder.CreateUIToFP(StartIdx, Step->getType()); - MulOp = Instruction::FMul; + Value *MulOp = Builder.CreateFMul(StartIdx, Step); + EntryPart = Builder.CreateBinOp(ID.getInductionOpcode(), ScalarIV, + MulOp, "induction"); + } else { + EntryPart = Builder.CreateAdd( + ScalarIV, Builder.CreateMul(StartIdx, Step), "induction"); } - - Value *Mul = Builder.CreateBinOp(MulOp, StartIdx, Step); - Value *EntryPart = Builder.CreateBinOp(IncOp, ScalarIV, Mul, "induction"); State.set(Def, EntryPart, Part); if (Trunc) { assert(!Step->getType()->isFloatingPointTy() && @@ -6035,6 +6034,18 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, !(InterleaveSmallLoopScalarReduction && HasReductions && VF.isScalar())) return 1; + // If we did not calculate the cost for VF (because the user selected the VF) + // then we calculate the cost of VF here. + if (LoopCost == 0) { + InstructionCost C = expectedCost(VF).first; + assert(C.isValid() && "Expected to have chosen a VF with valid cost"); + LoopCost = *C.getValue(); + + // Loop body is free and there is no need for interleaving. + if (LoopCost == 0) + return 1; + } + RegisterUsage R = calculateRegisterUsage({VF})[0]; // We divide by these constants so assume that we have at least one // instruction that uses at least one register. @@ -6126,16 +6137,6 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(ElementCount VF, assert(IC > 0 && "Interleave count must be greater than 0."); - // If we did not calculate the cost for VF (because the user selected the VF) - // then we calculate the cost of VF here. - if (LoopCost == 0) { - InstructionCost C = expectedCost(VF).first; - assert(C.isValid() && "Expected to have chosen a VF with valid cost"); - LoopCost = *C.getValue(); - } - - assert(LoopCost && "Non-zero loop cost expected"); - // Interleave if we vectorized this loop and there is a reduction that could // benefit from interleaving. if (VF.isVector() && HasReductions) { diff --git a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp index 620d388199e0..258f6c67e54d 100644 --- a/llvm/lib/Transforms/Vectorize/VectorCombine.cpp +++ b/llvm/lib/Transforms/Vectorize/VectorCombine.cpp @@ -152,12 +152,7 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { Value *SrcPtr = Load->getPointerOperand()->stripPointerCasts(); assert(isa(SrcPtr->getType()) && "Expected a pointer type"); - // If original AS != Load's AS, we can't bitcast the original pointer and have - // to use Load's operand instead. Ideally we would want to strip pointer casts - // without changing AS, but there's no API to do that ATM. unsigned AS = Load->getPointerAddressSpace(); - if (AS != SrcPtr->getType()->getPointerAddressSpace()) - SrcPtr = Load->getPointerOperand(); // We are potentially transforming byte-sized (8-bit) memory accesses, so make // sure we have all of our type-based constraints in place for this target. @@ -245,7 +240,8 @@ bool VectorCombine::vectorizeLoadInsert(Instruction &I) { // It is safe and potentially profitable to load a vector directly: // inselt undef, load Scalar, 0 --> load VecPtr IRBuilder<> Builder(Load); - Value *CastedPtr = Builder.CreateBitCast(SrcPtr, MinVecTy->getPointerTo(AS)); + Value *CastedPtr = Builder.CreatePointerBitCastOrAddrSpaceCast( + SrcPtr, MinVecTy->getPointerTo(AS)); Value *VecLd = Builder.CreateAlignedLoad(MinVecTy, CastedPtr, Alignment); VecLd = Builder.CreateShuffleVector(VecLd, Mask); diff --git a/llvm/tools/llvm-objdump/ELFDump.cpp b/llvm/tools/llvm-objdump/ELFDump.cpp index 98e71497d022..ca73dafe2b8e 100644 --- a/llvm/tools/llvm-objdump/ELFDump.cpp +++ b/llvm/tools/llvm-objdump/ELFDump.cpp @@ -171,8 +171,12 @@ uint64_t objdump::getELFSectionLMA(const object::ELFSectionRef &Sec) { template static void printDynamicSection(const ELFFile &Elf, StringRef Filename) { - ArrayRef DynamicEntries = - unwrapOrError(Elf.dynamicEntries(), Filename); + auto DynamicEntriesOrErr = Elf.dynamicEntries(); + if (!DynamicEntriesOrErr) { + reportWarning(toString(DynamicEntriesOrErr.takeError()), Filename); + return; + } + ArrayRef DynamicEntries = *DynamicEntriesOrErr; // Find the maximum tag name length to format the value column properly. size_t MaxLen = 0;