Upgrade our copies of clang, llvm, lld, lldb, compiler-rt and libc++ to

6.0.0 (branches/release_60 r325932). This corresponds to 6.0.0 rc3. MFC after: 3 months X-MFC-With: r327952 PR: 224669
svn path=/head/; revision=329983
2024-07-23 19:28:36 +00:00 · 2018-02-25 13:20:32 +00:00 · 2018-02-25 13:20:32 +00:00 · 4f8786afe3 · 2020-12-20 02:59:44 +00:00
parent bf56a3fe47 93179bb90b 2fb14b7233 6100a9db7d 59909f3a4e adc606d1b7 0f8e52dfc6
commit 4f8786afe3
21 changed files with 379 additions and 168 deletions
--- a/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
+++ b/contrib/llvm/include/llvm/Bitcode/LLVMBitCodes.h
@ -395,6 +395,20 @@ enum OverflowingBinaryOperatorOptionalFlags {
  OBO_NO_SIGNED_WRAP = 1
 };

+/// FastMath Flags
+/// This is a fixed layout derived from the bitcode emitted by LLVM 5.0
+/// intended to decouple the in-memory representation from the serialization.
+enum FastMathMap {
+  UnsafeAlgebra   = (1 << 0), // Legacy
+  NoNaNs          = (1 << 1),
+  NoInfs          = (1 << 2),
+  NoSignedZeros   = (1 << 3),
+  AllowReciprocal = (1 << 4),
+  AllowContract   = (1 << 5),
+  ApproxFunc      = (1 << 6),
+  AllowReassoc    = (1 << 7)
+};
+
 /// PossiblyExactOperatorOptionalFlags - Flags for serializing
 /// PossiblyExactOperator's SubclassOptionalData contents.
 enum PossiblyExactOperatorOptionalFlags { PEO_EXACT = 0 };
--- a/contrib/llvm/include/llvm/MC/MCAsmMacro.h
+++ b/contrib/llvm/include/llvm/MC/MCAsmMacro.h
@ -33,6 +33,6 @@ struct MCAsmMacro {
  MCAsmMacro(StringRef N, StringRef B, MCAsmMacroParameters P)
      : Name(N), Body(B), Parameters(std::move(P)) {}
 };
-}; // namespace llvm
+} // namespace llvm

 #endif
--- a/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
+++ b/contrib/llvm/include/llvm/Transforms/Utils/LoopUtils.h
@ -21,6 +21,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DemandedBits.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Dominators.h"
@ -172,15 +173,25 @@ class RecurrenceDescriptor {
                               Value *Left, Value *Right);

  /// Returns true if Phi is a reduction of type Kind and adds it to the
-  /// RecurrenceDescriptor.
+  /// RecurrenceDescriptor. If either \p DB is non-null or \p AC and \p DT are
+  /// non-null, the minimal bit width needed to compute the reduction will be
+  /// computed.
  static bool AddReductionVar(PHINode *Phi, RecurrenceKind Kind, Loop *TheLoop,
                              bool HasFunNoNaNAttr,
-                              RecurrenceDescriptor &RedDes);
+                              RecurrenceDescriptor &RedDes,
+                              DemandedBits *DB = nullptr,
+                              AssumptionCache *AC = nullptr,
+                              DominatorTree *DT = nullptr);

-  /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor is
-  /// returned in RedDes.
+  /// Returns true if Phi is a reduction in TheLoop. The RecurrenceDescriptor
+  /// is returned in RedDes. If either \p DB is non-null or \p AC and \p DT are
+  /// non-null, the minimal bit width needed to compute the reduction will be
+  /// computed.
  static bool isReductionPHI(PHINode *Phi, Loop *TheLoop,
-                             RecurrenceDescriptor &RedDes);
+                             RecurrenceDescriptor &RedDes,
+                             DemandedBits *DB = nullptr,
+                             AssumptionCache *AC = nullptr,
+                             DominatorTree *DT = nullptr);

  /// Returns true if Phi is a first-order recurrence. A first-order recurrence
  /// is a non-reduction recurrence relation in which the value of the
@ -218,24 +229,6 @@ class RecurrenceDescriptor {
  /// Returns true if the recurrence kind is an arithmetic kind.
  static bool isArithmeticRecurrenceKind(RecurrenceKind Kind);

-  /// Determines if Phi may have been type-promoted. If Phi has a single user
-  /// that ANDs the Phi with a type mask, return the user. RT is updated to
-  /// account for the narrower bit width represented by the mask, and the AND
-  /// instruction is added to CI.
-  static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
-                                     SmallPtrSetImpl<Instruction *> &Visited,
-                                     SmallPtrSetImpl<Instruction *> &CI);
-
-  /// Returns true if all the source operands of a recurrence are either
-  /// SExtInsts or ZExtInsts. This function is intended to be used with
-  /// lookThroughAnd to determine if the recurrence has been type-promoted. The
-  /// source operands are added to CI, and IsSigned is updated to indicate if
-  /// all source operands are SExtInsts.
-  static bool getSourceExtensionKind(Instruction *Start, Instruction *Exit,
-                                     Type *RT, bool &IsSigned,
-                                     SmallPtrSetImpl<Instruction *> &Visited,
-                                     SmallPtrSetImpl<Instruction *> &CI);
-
  /// Returns the type of the recurrence. This type can be narrower than the
  /// actual type of the Phi if the recurrence has been type-promoted.
  Type *getRecurrenceType() { return RecurrenceType; }
--- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
+++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp
@ -205,6 +205,11 @@ static cl::opt<unsigned>
                  cl::desc("Max coefficients in AddRec during evolving"),
                  cl::init(16));

+static cl::opt<bool> VersionUnknown(
+    "scev-version-unknown", cl::Hidden,
+    cl::desc("Use predicated scalar evolution to version SCEVUnknowns"),
+    cl::init(false));
+
 //===----------------------------------------------------------------------===//
 //                           SCEV class definitions
 //===----------------------------------------------------------------------===//
@ -11467,6 +11472,8 @@ class SCEVPredicateRewriter : public SCEVRewriteVisitor<SCEVPredicateRewriter> {
  // couldn't create an AddRec for it, or couldn't add the predicate), we just
  // return \p Expr.
  const SCEV *convertToAddRecWithPreds(const SCEVUnknown *Expr) {
+    if (!VersionUnknown)
+      return Expr;
    if (!isa<PHINode>(Expr->getValue()))
      return Expr;
    Optional<std::pair<const SCEV *, SmallVector<const SCEVPredicate *, 3>>>
--- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp
@ -1046,19 +1046,21 @@ static Comdat::SelectionKind getDecodedComdatSelectionKind(unsigned Val) {

 static FastMathFlags getDecodedFastMathFlags(unsigned Val) {
  FastMathFlags FMF;
-  if (0 != (Val & FastMathFlags::AllowReassoc))
+  if (0 != (Val & bitc::UnsafeAlgebra))
+    FMF.setFast();
+  if (0 != (Val & bitc::AllowReassoc))
    FMF.setAllowReassoc();
-  if (0 != (Val & FastMathFlags::NoNaNs))
+  if (0 != (Val & bitc::NoNaNs))
    FMF.setNoNaNs();
-  if (0 != (Val & FastMathFlags::NoInfs))
+  if (0 != (Val & bitc::NoInfs))
    FMF.setNoInfs();
-  if (0 != (Val & FastMathFlags::NoSignedZeros))
+  if (0 != (Val & bitc::NoSignedZeros))
    FMF.setNoSignedZeros();
-  if (0 != (Val & FastMathFlags::AllowReciprocal))
+  if (0 != (Val & bitc::AllowReciprocal))
    FMF.setAllowReciprocal();
-  if (0 != (Val & FastMathFlags::AllowContract))
+  if (0 != (Val & bitc::AllowContract))
    FMF.setAllowContract(true);
-  if (0 != (Val & FastMathFlags::ApproxFunc))
+  if (0 != (Val & bitc::ApproxFunc))
    FMF.setApproxFunc();
  return FMF;
 }
--- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
+++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp
@ -1330,19 +1330,19 @@ static uint64_t getOptimizationFlags(const Value *V) {
      Flags |= 1 << bitc::PEO_EXACT;
  } else if (const auto *FPMO = dyn_cast<FPMathOperator>(V)) {
    if (FPMO->hasAllowReassoc())
-      Flags |= FastMathFlags::AllowReassoc;
+      Flags |= bitc::AllowReassoc;
    if (FPMO->hasNoNaNs())
-      Flags |= FastMathFlags::NoNaNs;
+      Flags |= bitc::NoNaNs;
    if (FPMO->hasNoInfs())
-      Flags |= FastMathFlags::NoInfs;
+      Flags |= bitc::NoInfs;
    if (FPMO->hasNoSignedZeros())
-      Flags |= FastMathFlags::NoSignedZeros;
+      Flags |= bitc::NoSignedZeros;
    if (FPMO->hasAllowReciprocal())
-      Flags |= FastMathFlags::AllowReciprocal;
+      Flags |= bitc::AllowReciprocal;
    if (FPMO->hasAllowContract())
-      Flags |= FastMathFlags::AllowContract;
+      Flags |= bitc::AllowContract;
    if (FPMO->hasApproxFunc())
-      Flags |= FastMathFlags::ApproxFunc;
+      Flags |= bitc::ApproxFunc;
  }

  return Flags;
@ -3183,7 +3183,7 @@ void ModuleBitcodeWriter::writeBlockInfo() {
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // LHS
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::VBR, 6)); // RHS
    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 4)); // opc
-    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 7)); // flags
+    Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 8)); // flags
    if (Stream.EmitBlockInfoAbbrev(bitc::FUNCTION_BLOCK_ID, Abbv) !=
        FUNCTION_INST_BINOP_FLAGS_ABBREV)
      llvm_unreachable("Unexpected abbrev ordering!");
--- a/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/contrib/llvm/lib/Target/AArch64/AArch64InstructionSelector.cpp
@ -133,16 +133,21 @@ AArch64InstructionSelector::AArch64InstructionSelector(
 // for each class in the bank.
 static const TargetRegisterClass *
 getRegClassForTypeOnBank(LLT Ty, const RegisterBank &RB,
-                         const RegisterBankInfo &RBI) {
+                         const RegisterBankInfo &RBI,
+                         bool GetAllRegSet = false) {
  if (RB.getID() == AArch64::GPRRegBankID) {
    if (Ty.getSizeInBits() <= 32)
-      return &AArch64::GPR32RegClass;
+      return GetAllRegSet ? &AArch64::GPR32allRegClass
+                          : &AArch64::GPR32RegClass;
    if (Ty.getSizeInBits() == 64)
-      return &AArch64::GPR64RegClass;
+      return GetAllRegSet ? &AArch64::GPR64allRegClass
+                          : &AArch64::GPR64RegClass;
    return nullptr;
  }

  if (RB.getID() == AArch64::FPRRegBankID) {
+    if (Ty.getSizeInBits() <= 16)
+      return &AArch64::FPR16RegClass;
    if (Ty.getSizeInBits() == 32)
      return &AArch64::FPR32RegClass;
    if (Ty.getSizeInBits() == 64)
@ -310,19 +315,46 @@ static unsigned selectLoadStoreUIOp(unsigned GenericOpc, unsigned RegBankID,
  return GenericOpc;
 }

+static bool selectFP16CopyFromGPR32(MachineInstr &I, const TargetInstrInfo &TII,
+                                    MachineRegisterInfo &MRI, unsigned SrcReg) {
+  // Copies from gpr32 to fpr16 need to use a sub-register copy.
+  unsigned CopyReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(AArch64::COPY))
+      .addDef(CopyReg)
+      .addUse(SrcReg);
+  unsigned SubRegCopy = MRI.createVirtualRegister(&AArch64::FPR16RegClass);
+  BuildMI(*I.getParent(), I, I.getDebugLoc(), TII.get(TargetOpcode::COPY))
+      .addDef(SubRegCopy)
+      .addUse(CopyReg, 0, AArch64::hsub);
+
+  MachineOperand &RegOp = I.getOperand(1);
+  RegOp.setReg(SubRegCopy);
+  return true;
+}
+
 static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
                       MachineRegisterInfo &MRI, const TargetRegisterInfo &TRI,
                       const RegisterBankInfo &RBI) {

  unsigned DstReg = I.getOperand(0).getReg();
+  unsigned SrcReg = I.getOperand(1).getReg();
+
  if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
+    if (TRI.getRegClass(AArch64::FPR16RegClassID)->contains(DstReg) &&
+        !TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+      const RegisterBank &RegBank = *RBI.getRegBank(SrcReg, MRI, TRI);
+      const TargetRegisterClass *SrcRC = getRegClassForTypeOnBank(
+          MRI.getType(SrcReg), RegBank, RBI, /* GetAllRegSet */ true);
+      if (SrcRC == &AArch64::GPR32allRegClass)
+        return selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
+    }
    assert(I.isCopy() && "Generic operators do not allow physical registers");
    return true;
  }

  const RegisterBank &RegBank = *RBI.getRegBank(DstReg, MRI, TRI);
  const unsigned DstSize = MRI.getType(DstReg).getSizeInBits();
-  unsigned SrcReg = I.getOperand(1).getReg();
+  (void)DstSize;
  const unsigned SrcSize = RBI.getSizeInBits(SrcReg, MRI, TRI);
  (void)SrcSize;
  assert((!TargetRegisterInfo::isPhysicalRegister(SrcReg) || I.isCopy()) &&
@ -340,26 +372,38 @@ static bool selectCopy(MachineInstr &I, const TargetInstrInfo &TII,
      "Copy with different width?!");
  assert((DstSize <= 64 || RegBank.getID() == AArch64::FPRRegBankID) &&
         "GPRs cannot get more than 64-bit width values");
-  const TargetRegisterClass *RC = nullptr;

-  if (RegBank.getID() == AArch64::FPRRegBankID) {
-    if (DstSize <= 16)
-      RC = &AArch64::FPR16RegClass;
-    else if (DstSize <= 32)
-      RC = &AArch64::FPR32RegClass;
-    else if (DstSize <= 64)
-      RC = &AArch64::FPR64RegClass;
-    else if (DstSize <= 128)
-      RC = &AArch64::FPR128RegClass;
-    else {
-      DEBUG(dbgs() << "Unexpected bitcast size " << DstSize << '\n');
-      return false;
+  const TargetRegisterClass *RC = getRegClassForTypeOnBank(
+      MRI.getType(DstReg), RegBank, RBI, /* GetAllRegSet */ true);
+  if (!RC) {
+    DEBUG(dbgs() << "Unexpected bitcast size " << DstSize << '\n');
+    return false;
+  }
+
+  if (!TargetRegisterInfo::isPhysicalRegister(SrcReg)) {
+    const RegClassOrRegBank &RegClassOrBank = MRI.getRegClassOrRegBank(SrcReg);
+    const TargetRegisterClass *SrcRC =
+        RegClassOrBank.dyn_cast<const TargetRegisterClass *>();
+    const RegisterBank *RB = nullptr;
+    if (!SrcRC) {
+      RB = RegClassOrBank.get<const RegisterBank *>();
+      SrcRC = getRegClassForTypeOnBank(MRI.getType(SrcReg), *RB, RBI, true);
+    }
+    // Copies from fpr16 to gpr32 need to use SUBREG_TO_REG.
+    if (RC == &AArch64::GPR32allRegClass && SrcRC == &AArch64::FPR16RegClass) {
+      unsigned PromoteReg = MRI.createVirtualRegister(&AArch64::FPR32RegClass);
+      BuildMI(*I.getParent(), I, I.getDebugLoc(),
+              TII.get(AArch64::SUBREG_TO_REG))
+          .addDef(PromoteReg)
+          .addImm(0)
+          .addUse(SrcReg)
+          .addImm(AArch64::hsub);
+      MachineOperand &RegOp = I.getOperand(1);
+      RegOp.setReg(PromoteReg);
+    } else if (RC == &AArch64::FPR16RegClass &&
+               SrcRC == &AArch64::GPR32allRegClass) {
+      selectFP16CopyFromGPR32(I, TII, MRI, SrcReg);
    }
-  } else {
-    assert(RegBank.getID() == AArch64::GPRRegBankID &&
-           "Bitcast for the flags?");
-    RC =
-        DstSize <= 32 ? &AArch64::GPR32allRegClass : &AArch64::GPR64allRegClass;
  }

  // No need to constrain SrcReg. It will get constrained when
@ -795,15 +839,23 @@ bool AArch64InstructionSelector::select(MachineInstr &I,
  }
  case TargetOpcode::G_EXTRACT: {
    LLT SrcTy = MRI.getType(I.getOperand(1).getReg());
+    LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+    unsigned SrcSize = SrcTy.getSizeInBits();
    // Larger extracts are vectors, same-size extracts should be something else
    // by now (either split up or simplified to a COPY).
    if (SrcTy.getSizeInBits() > 64 || Ty.getSizeInBits() > 32)
      return false;

-    I.setDesc(TII.get(AArch64::UBFMXri));
+    I.setDesc(TII.get(SrcSize == 64 ? AArch64::UBFMXri : AArch64::UBFMWri));
    MachineInstrBuilder(MF, I).addImm(I.getOperand(2).getImm() +
                                      Ty.getSizeInBits() - 1);

+    if (SrcSize < 64) {
+      assert(SrcSize == 32 && DstTy.getSizeInBits() == 16 &&
+             "unexpected G_EXTRACT types");
+      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+    }
+
    unsigned DstReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
    BuildMI(MBB, std::next(I.getIterator()), I.getDebugLoc(),
            TII.get(AArch64::COPY))
@ -818,17 +870,26 @@ bool AArch64InstructionSelector::select(MachineInstr &I,

  case TargetOpcode::G_INSERT: {
    LLT SrcTy = MRI.getType(I.getOperand(2).getReg());
+    LLT DstTy = MRI.getType(I.getOperand(0).getReg());
+    unsigned DstSize = DstTy.getSizeInBits();
+    (void)DstSize;
    // Larger inserts are vectors, same-size ones should be something else by
    // now (split up or turned into COPYs).
    if (Ty.getSizeInBits() > 64 || SrcTy.getSizeInBits() > 32)
      return false;

-    I.setDesc(TII.get(AArch64::BFMXri));
+    I.setDesc(TII.get(DstSize == 64 ? AArch64::BFMXri : AArch64::BFMWri));
    unsigned LSB = I.getOperand(3).getImm();
    unsigned Width = MRI.getType(I.getOperand(2).getReg()).getSizeInBits();
-    I.getOperand(3).setImm((64 - LSB) % 64);
+    I.getOperand(3).setImm((DstSize - LSB) % DstSize);
    MachineInstrBuilder(MF, I).addImm(Width - 1);

+    if (DstSize < 64) {
+      assert(DstSize == 32 && SrcTy.getSizeInBits() == 16 &&
+             "unexpected G_INSERT types");
+      return constrainSelectedInstRegOperands(I, TII, TRI, RBI);
+    }
+
    unsigned SrcReg = MRI.createGenericVirtualRegister(LLT::scalar(64));
    BuildMI(MBB, I.getIterator(), I.getDebugLoc(),
            TII.get(AArch64::SUBREG_TO_REG))
--- a/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
+++ b/contrib/llvm/lib/Target/AMDGPU/SIInstrInfo.cpp
@ -3797,7 +3797,8 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
        }
      }

-      BuildMI(*MBB, Inst, Inst.getDebugLoc(),
+      MachineInstr *NewInstr =
+        BuildMI(*MBB, Inst, Inst.getDebugLoc(),
              get(AMDGPU::BUFFER_LOAD_DWORD_OFFEN), VDst)
        .add(*VAddr) // vaddr
        .add(*getNamedOperand(Inst, AMDGPU::OpName::sbase)) // srsrc
@ -3806,12 +3807,17 @@ void SIInstrInfo::moveToVALU(MachineInstr &TopInst) const {
        .addImm(getNamedOperand(Inst, AMDGPU::OpName::glc)->getImm())
        .addImm(0) // slc
        .addImm(0) // tfe
-        .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end());
+        .setMemRefs(Inst.memoperands_begin(), Inst.memoperands_end())
+        .getInstr();

      MRI.replaceRegWith(getNamedOperand(Inst, AMDGPU::OpName::sdst)->getReg(),
                         VDst);
      addUsersToMoveToVALUWorklist(VDst, MRI, Worklist);
      Inst.eraseFromParent();
+
+      // Legalize all operands other than the offset. Notably, convert the srsrc
+      // into SGPRs using v_readfirstlane if needed.
+      legalizeOperands(*NewInstr);
      continue;
    }
    }
--- a/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
+++ b/contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp
@ -454,13 +454,16 @@ bool PPCCTRLoops::mightUseCTR(BasicBlock *BB) {
        return true;
    }

+    // FREM is always a call.
+    if (J->getOpcode() == Instruction::FRem)
+      return true;
+
    if (STI->useSoftFloat()) {
      switch(J->getOpcode()) {
      case Instruction::FAdd:
      case Instruction::FSub:
      case Instruction::FMul:
      case Instruction::FDiv:
-      case Instruction::FRem:
      case Instruction::FPTrunc:
      case Instruction::FPExt:
      case Instruction::FPToUI:
--- a/contrib/llvm/lib/Target/X86/X86.td
+++ b/contrib/llvm/lib/Target/X86/X86.td
@ -740,7 +740,13 @@ class SkylakeServerProc<string Name> : ProcModel<Name, SkylakeServerModel,
 def : SkylakeServerProc<"skylake-avx512">;
 def : SkylakeServerProc<"skx">; // Legacy alias.

-def CNLFeatures : ProcessorFeatures<SKXFeatures.Value, [
+def CNLFeatures : ProcessorFeatures<SKLFeatures.Value, [
+  FeatureAVX512,
+  FeatureCDI,
+  FeatureDQI,
+  FeatureBWI,
+  FeatureVLX,
+  FeaturePKU,
  FeatureVBMI,
  FeatureIFMA,
  FeatureSHA
--- a/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/contrib/llvm/lib/Transforms/InstCombine/InstCombineSelect.cpp
@ -1643,11 +1643,25 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
    }
  }

+  auto canMergeSelectThroughBinop = [](BinaryOperator *BO) {
+    // The select might be preventing a division by 0.
+    switch (BO->getOpcode()) {
+    default:
+      return true;
+    case Instruction::SRem:
+    case Instruction::URem:
+    case Instruction::SDiv:
+    case Instruction::UDiv:
+      return false;
+    }
+  };
+
  // Try to simplify a binop sandwiched between 2 selects with the same
  // condition.
  // select(C, binop(select(C, X, Y), W), Z) -> select(C, binop(X, W), Z)
  BinaryOperator *TrueBO;
-  if (match(TrueVal, m_OneUse(m_BinOp(TrueBO)))) {
+  if (match(TrueVal, m_OneUse(m_BinOp(TrueBO))) &&
+      canMergeSelectThroughBinop(TrueBO)) {
    if (auto *TrueBOSI = dyn_cast<SelectInst>(TrueBO->getOperand(0))) {
      if (TrueBOSI->getCondition() == CondVal) {
        TrueBO->setOperand(0, TrueBOSI->getTrueValue());
@ -1666,7 +1680,8 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {

  // select(C, Z, binop(select(C, X, Y), W)) -> select(C, Z, binop(Y, W))
  BinaryOperator *FalseBO;
-  if (match(FalseVal, m_OneUse(m_BinOp(FalseBO)))) {
+  if (match(FalseVal, m_OneUse(m_BinOp(FalseBO))) &&
+      canMergeSelectThroughBinop(FalseBO)) {
    if (auto *FalseBOSI = dyn_cast<SelectInst>(FalseBO->getOperand(0))) {
      if (FalseBOSI->getCondition() == CondVal) {
        FalseBO->setOperand(0, FalseBOSI->getFalseValue());
--- a/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
+++ b/contrib/llvm/lib/Transforms/Scalar/LICM.cpp
@ -97,7 +97,7 @@ static bool hoist(Instruction &I, const DominatorTree *DT, const Loop *CurLoop,
                  const LoopSafetyInfo *SafetyInfo,
                  OptimizationRemarkEmitter *ORE);
 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
-                 const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+                 const Loop *CurLoop, LoopSafetyInfo *SafetyInfo,
                 OptimizationRemarkEmitter *ORE, bool FreeInLoop);
 static bool isSafeToExecuteUnconditionally(Instruction &Inst,
                                           const DominatorTree *DT,
@ -855,10 +855,16 @@ static Instruction *sinkThroughTriviallyReplacablePHI(
  return New;
 }

-static bool canSplitPredecessors(PHINode *PN) {
+static bool canSplitPredecessors(PHINode *PN, LoopSafetyInfo *SafetyInfo) {
  BasicBlock *BB = PN->getParent();
  if (!BB->canSplitPredecessors())
    return false;
+  // It's not impossible to split EHPad blocks, but if BlockColors already exist
+  // it require updating BlockColors for all offspring blocks accordingly. By
+  // skipping such corner case, we can make updating BlockColors after splitting
+  // predecessor fairly simple.
+  if (!SafetyInfo->BlockColors.empty() && BB->getFirstNonPHI()->isEHPad())
+    return false;
  for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
    BasicBlock *BBPred = *PI;
    if (isa<IndirectBrInst>(BBPred->getTerminator()))
@ -868,7 +874,8 @@ static bool canSplitPredecessors(PHINode *PN) {
 }

 static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
-                                        LoopInfo *LI, const Loop *CurLoop) {
+                                        LoopInfo *LI, const Loop *CurLoop,
+                                        LoopSafetyInfo *SafetyInfo) {
 #ifndef NDEBUG
  SmallVector<BasicBlock *, 32> ExitBlocks;
  CurLoop->getUniqueExitBlocks(ExitBlocks);
@ -910,13 +917,21 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
  // LE:
  //   %p = phi [%p1, %LE.split], [%p2, %LE.split2]
  //
+  auto &BlockColors = SafetyInfo->BlockColors;
  SmallSetVector<BasicBlock *, 8> PredBBs(pred_begin(ExitBB), pred_end(ExitBB));
  while (!PredBBs.empty()) {
    BasicBlock *PredBB = *PredBBs.begin();
    assert(CurLoop->contains(PredBB) &&
           "Expect all predecessors are in the loop");
-    if (PN->getBasicBlockIndex(PredBB) >= 0)
-      SplitBlockPredecessors(ExitBB, PredBB, ".split.loop.exit", DT, LI, true);
+    if (PN->getBasicBlockIndex(PredBB) >= 0) {
+      BasicBlock *NewPred = SplitBlockPredecessors(
+          ExitBB, PredBB, ".split.loop.exit", DT, LI, true);
+      // Since we do not allow splitting EH-block with BlockColors in
+      // canSplitPredecessors(), we can simply assign predecessor's color to
+      // the new block.
+      if (!BlockColors.empty())
+        BlockColors[NewPred] = BlockColors[PredBB];
+    }
    PredBBs.remove(PredBB);
  }
 }
@ -927,7 +942,7 @@ static void splitPredecessorsOfLoopExit(PHINode *PN, DominatorTree *DT,
 /// position, and may either delete it or move it to outside of the loop.
 ///
 static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
-                 const Loop *CurLoop, const LoopSafetyInfo *SafetyInfo,
+                 const Loop *CurLoop, LoopSafetyInfo *SafetyInfo,
                 OptimizationRemarkEmitter *ORE, bool FreeInLoop) {
  DEBUG(dbgs() << "LICM sinking instruction: " << I << "\n");
  ORE->emit([&]() {
@ -975,12 +990,12 @@ static bool sink(Instruction &I, LoopInfo *LI, DominatorTree *DT,
    if (isTriviallyReplacablePHI(*PN, I))
      continue;

-    if (!canSplitPredecessors(PN))
+    if (!canSplitPredecessors(PN, SafetyInfo))
      return Changed;

    // Split predecessors of the PHI so that we can make users trivially
    // replacable.
-    splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop);
+    splitPredecessorsOfLoopExit(PN, DT, LI, CurLoop, SafetyInfo);

    // Should rebuild the iterators, as they may be invalidated by
    // splitPredecessorsOfLoopExit().
--- a/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
+++ b/contrib/llvm/lib/Transforms/Utils/LoopUtils.cpp
@ -23,6 +23,7 @@
 #include "llvm/Analysis/ScalarEvolutionExpander.h"
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
@ -30,6 +31,7 @@
 #include "llvm/IR/ValueHandle.h"
 #include "llvm/Pass.h"
 #include "llvm/Support/Debug.h"
+#include "llvm/Support/KnownBits.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"

 using namespace llvm;
@ -77,10 +79,13 @@ bool RecurrenceDescriptor::isArithmeticRecurrenceKind(RecurrenceKind Kind) {
  return false;
 }

-Instruction *
-RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
-                                     SmallPtrSetImpl<Instruction *> &Visited,
-                                     SmallPtrSetImpl<Instruction *> &CI) {
+/// Determines if Phi may have been type-promoted. If Phi has a single user
+/// that ANDs the Phi with a type mask, return the user. RT is updated to
+/// account for the narrower bit width represented by the mask, and the AND
+/// instruction is added to CI.
+static Instruction *lookThroughAnd(PHINode *Phi, Type *&RT,
+                                   SmallPtrSetImpl<Instruction *> &Visited,
+                                   SmallPtrSetImpl<Instruction *> &CI) {
  if (!Phi->hasOneUse())
    return Phi;

@ -101,70 +106,92 @@ RecurrenceDescriptor::lookThroughAnd(PHINode *Phi, Type *&RT,
  return Phi;
 }

-bool RecurrenceDescriptor::getSourceExtensionKind(
-    Instruction *Start, Instruction *Exit, Type *RT, bool &IsSigned,
-    SmallPtrSetImpl<Instruction *> &Visited,
-    SmallPtrSetImpl<Instruction *> &CI) {
+/// Compute the minimal bit width needed to represent a reduction whose exit
+/// instruction is given by Exit.
+static std::pair<Type *, bool> computeRecurrenceType(Instruction *Exit,
+                                                     DemandedBits *DB,
+                                                     AssumptionCache *AC,
+                                                     DominatorTree *DT) {
+  bool IsSigned = false;
+  const DataLayout &DL = Exit->getModule()->getDataLayout();
+  uint64_t MaxBitWidth = DL.getTypeSizeInBits(Exit->getType());

-  SmallVector<Instruction *, 8> Worklist;
-  bool FoundOneOperand = false;
-  unsigned DstSize = RT->getPrimitiveSizeInBits();
-  Worklist.push_back(Exit);
+  if (DB) {
+    // Use the demanded bits analysis to determine the bits that are live out
+    // of the exit instruction, rounding up to the nearest power of two. If the
+    // use of demanded bits results in a smaller bit width, we know the value
+    // must be positive (i.e., IsSigned = false), because if this were not the
+    // case, the sign bit would have been demanded.
+    auto Mask = DB->getDemandedBits(Exit);
+    MaxBitWidth = Mask.getBitWidth() - Mask.countLeadingZeros();
+  }

-  // Traverse the instructions in the reduction expression, beginning with the
-  // exit value.
-  while (!Worklist.empty()) {
-    Instruction *I = Worklist.pop_back_val();
-    for (Use &U : I->operands()) {
-
-      // Terminate the traversal if the operand is not an instruction, or we
-      // reach the starting value.
-      Instruction *J = dyn_cast<Instruction>(U.get());
-      if (!J || J == Start)
-        continue;
-
-      // Otherwise, investigate the operation if it is also in the expression.
-      if (Visited.count(J)) {
-        Worklist.push_back(J);
-        continue;
-      }
-
-      // If the operand is not in Visited, it is not a reduction operation, but
-      // it does feed into one. Make sure it is either a single-use sign- or
-      // zero-extend instruction.
-      CastInst *Cast = dyn_cast<CastInst>(J);
-      bool IsSExtInst = isa<SExtInst>(J);
-      if (!Cast || !Cast->hasOneUse() || !(isa<ZExtInst>(J) || IsSExtInst))
-        return false;
-
-      // Ensure the source type of the extend is no larger than the reduction
-      // type. It is not necessary for the types to be identical.
-      unsigned SrcSize = Cast->getSrcTy()->getPrimitiveSizeInBits();
-      if (SrcSize > DstSize)
-        return false;
-
-      // Furthermore, ensure that all such extends are of the same kind.
-      if (FoundOneOperand) {
-        if (IsSigned != IsSExtInst)
-          return false;
-      } else {
-        FoundOneOperand = true;
-        IsSigned = IsSExtInst;
-      }
-
-      // Lastly, if the source type of the extend matches the reduction type,
-      // add the extend to CI so that we can avoid accounting for it in the
-      // cost model.
-      if (SrcSize == DstSize)
-        CI.insert(Cast);
+  if (MaxBitWidth == DL.getTypeSizeInBits(Exit->getType()) && AC && DT) {
+    // If demanded bits wasn't able to limit the bit width, we can try to use
+    // value tracking instead. This can be the case, for example, if the value
+    // may be negative.
+    auto NumSignBits = ComputeNumSignBits(Exit, DL, 0, AC, nullptr, DT);
+    auto NumTypeBits = DL.getTypeSizeInBits(Exit->getType());
+    MaxBitWidth = NumTypeBits - NumSignBits;
+    KnownBits Bits = computeKnownBits(Exit, DL);
+    if (!Bits.isNonNegative()) {
+      // If the value is not known to be non-negative, we set IsSigned to true,
+      // meaning that we will use sext instructions instead of zext
+      // instructions to restore the original type.
+      IsSigned = true;
+      if (!Bits.isNegative())
+        // If the value is not known to be negative, we don't known what the
+        // upper bit is, and therefore, we don't know what kind of extend we
+        // will need. In this case, just increase the bit width by one bit and
+        // use sext.
+        ++MaxBitWidth;
    }
  }
-  return true;
+  if (!isPowerOf2_64(MaxBitWidth))
+    MaxBitWidth = NextPowerOf2(MaxBitWidth);
+
+  return std::make_pair(Type::getIntNTy(Exit->getContext(), MaxBitWidth),
+                        IsSigned);
+}
+
+/// Collect cast instructions that can be ignored in the vectorizer's cost
+/// model, given a reduction exit value and the minimal type in which the
+/// reduction can be represented.
+static void collectCastsToIgnore(Loop *TheLoop, Instruction *Exit,
+                                 Type *RecurrenceType,
+                                 SmallPtrSetImpl<Instruction *> &Casts) {
+
+  SmallVector<Instruction *, 8> Worklist;
+  SmallPtrSet<Instruction *, 8> Visited;
+  Worklist.push_back(Exit);
+
+  while (!Worklist.empty()) {
+    Instruction *Val = Worklist.pop_back_val();
+    Visited.insert(Val);
+    if (auto *Cast = dyn_cast<CastInst>(Val))
+      if (Cast->getSrcTy() == RecurrenceType) {
+        // If the source type of a cast instruction is equal to the recurrence
+        // type, it will be eliminated, and should be ignored in the vectorizer
+        // cost model.
+        Casts.insert(Cast);
+        continue;
+      }
+
+    // Add all operands to the work list if they are loop-varying values that
+    // we haven't yet visited.
+    for (Value *O : cast<User>(Val)->operands())
+      if (auto *I = dyn_cast<Instruction>(O))
+        if (TheLoop->contains(I) && !Visited.count(I))
+          Worklist.push_back(I);
+  }
 }

 bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
                                           Loop *TheLoop, bool HasFunNoNaNAttr,
-                                           RecurrenceDescriptor &RedDes) {
+                                           RecurrenceDescriptor &RedDes,
+                                           DemandedBits *DB,
+                                           AssumptionCache *AC,
+                                           DominatorTree *DT) {
  if (Phi->getNumIncomingValues() != 2)
    return false;

@ -353,14 +380,49 @@ bool RecurrenceDescriptor::AddReductionVar(PHINode *Phi, RecurrenceKind Kind,
  if (!FoundStartPHI || !FoundReduxOp || !ExitInstruction)
    return false;

-  // If we think Phi may have been type-promoted, we also need to ensure that
-  // all source operands of the reduction are either SExtInsts or ZEstInsts. If
-  // so, we will be able to evaluate the reduction in the narrower bit width.
-  if (Start != Phi)
-    if (!getSourceExtensionKind(Start, ExitInstruction, RecurrenceType,
-                                IsSigned, VisitedInsts, CastInsts))
+  if (Start != Phi) {
+    // If the starting value is not the same as the phi node, we speculatively
+    // looked through an 'and' instruction when evaluating a potential
+    // arithmetic reduction to determine if it may have been type-promoted.
+    //
+    // We now compute the minimal bit width that is required to represent the
+    // reduction. If this is the same width that was indicated by the 'and', we
+    // can represent the reduction in the smaller type. The 'and' instruction
+    // will be eliminated since it will essentially be a cast instruction that
+    // can be ignore in the cost model. If we compute a different type than we
+    // did when evaluating the 'and', the 'and' will not be eliminated, and we
+    // will end up with different kinds of operations in the recurrence
+    // expression (e.g., RK_IntegerAND, RK_IntegerADD). We give up if this is
+    // the case.
+    //
+    // The vectorizer relies on InstCombine to perform the actual
+    // type-shrinking. It does this by inserting instructions to truncate the
+    // exit value of the reduction to the width indicated by RecurrenceType and
+    // then extend this value back to the original width. If IsSigned is false,
+    // a 'zext' instruction will be generated; otherwise, a 'sext' will be
+    // used.
+    //
+    // TODO: We should not rely on InstCombine to rewrite the reduction in the
+    //       smaller type. We should just generate a correctly typed expression
+    //       to begin with.
+    Type *ComputedType;
+    std::tie(ComputedType, IsSigned) =
+        computeRecurrenceType(ExitInstruction, DB, AC, DT);
+    if (ComputedType != RecurrenceType)
      return false;

+    // The recurrence expression will be represented in a narrower type. If
+    // there are any cast instructions that will be unnecessary, collect them
+    // in CastInsts. Note that the 'and' instruction was already included in
+    // this list.
+    //
+    // TODO: A better way to represent this may be to tag in some way all the
+    //       instructions that are a part of the reduction. The vectorizer cost
+    //       model could then apply the recurrence type to these instructions,
+    //       without needing a white list of instructions to ignore.
+    collectCastsToIgnore(TheLoop, ExitInstruction, RecurrenceType, CastInsts);
+  }
+
  // We found a reduction var if we have reached the original phi node and we
  // only have a single instruction with out-of-loop users.

@ -480,47 +542,57 @@ bool RecurrenceDescriptor::hasMultipleUsesOf(
  return false;
 }
 bool RecurrenceDescriptor::isReductionPHI(PHINode *Phi, Loop *TheLoop,
-                                          RecurrenceDescriptor &RedDes) {
+                                          RecurrenceDescriptor &RedDes,
+                                          DemandedBits *DB, AssumptionCache *AC,
+                                          DominatorTree *DT) {

  BasicBlock *Header = TheLoop->getHeader();
  Function &F = *Header->getParent();
  bool HasFunNoNaNAttr =
      F.getFnAttribute("no-nans-fp-math").getValueAsString() == "true";

-  if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_IntegerAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
    DEBUG(dbgs() << "Found an ADD reduction PHI." << *Phi << "\n");
    return true;
  }
-  if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_IntegerMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
    DEBUG(dbgs() << "Found a MUL reduction PHI." << *Phi << "\n");
    return true;
  }
-  if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_IntegerOr, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
    DEBUG(dbgs() << "Found an OR reduction PHI." << *Phi << "\n");
    return true;
  }
-  if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_IntegerAnd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
    DEBUG(dbgs() << "Found an AND reduction PHI." << *Phi << "\n");
    return true;
  }
-  if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_IntegerXor, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
    DEBUG(dbgs() << "Found a XOR reduction PHI." << *Phi << "\n");
    return true;
  }
-  if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr,
-                      RedDes)) {
+  if (AddReductionVar(Phi, RK_IntegerMinMax, TheLoop, HasFunNoNaNAttr, RedDes,
+                      DB, AC, DT)) {
    DEBUG(dbgs() << "Found a MINMAX reduction PHI." << *Phi << "\n");
    return true;
  }
-  if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_FloatMult, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
    DEBUG(dbgs() << "Found an FMult reduction PHI." << *Phi << "\n");
    return true;
  }
-  if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_FloatAdd, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
    DEBUG(dbgs() << "Found an FAdd reduction PHI." << *Phi << "\n");
    return true;
  }
-  if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes)) {
+  if (AddReductionVar(Phi, RK_FloatMinMax, TheLoop, HasFunNoNaNAttr, RedDes, DB,
+                      AC, DT)) {
    DEBUG(dbgs() << "Found an float MINMAX reduction PHI." << *Phi << "\n");
    return true;
  }
--- a/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
+++ b/contrib/llvm/lib/Transforms/Vectorize/LoopVectorize.cpp
@ -1542,9 +1542,10 @@ class LoopVectorizationLegality {
      const TargetTransformInfo *TTI,
      std::function<const LoopAccessInfo &(Loop &)> *GetLAA, LoopInfo *LI,
      OptimizationRemarkEmitter *ORE, LoopVectorizationRequirements *R,
-      LoopVectorizeHints *H)
+      LoopVectorizeHints *H, DemandedBits *DB, AssumptionCache *AC)
      : TheLoop(L), PSE(PSE), TLI(TLI), TTI(TTI), DT(DT), GetLAA(GetLAA),
-        ORE(ORE), InterleaveInfo(PSE, L, DT, LI), Requirements(R), Hints(H) {}
+        ORE(ORE), InterleaveInfo(PSE, L, DT, LI), Requirements(R), Hints(H),
+        DB(DB), AC(AC) {}

  /// ReductionList contains the reduction descriptors for all
  /// of the reductions that were found in the loop.
@ -1833,6 +1834,14 @@ class LoopVectorizationLegality {
  /// Used to emit an analysis of any legality issues.
  LoopVectorizeHints *Hints;

+  /// The demanded bits analsyis is used to compute the minimum type size in
+  /// which a reduction can be computed.
+  DemandedBits *DB;
+
+  /// The assumption cache analysis is used to compute the minimum type size in
+  /// which a reduction can be computed.
+  AssumptionCache *AC;
+
  /// While vectorizing these instructions we have to generate a
  /// call to the appropriate masked intrinsic
  SmallPtrSet<const Instruction *, 8> MaskedOp;
@ -5300,7 +5309,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() {
        }

        RecurrenceDescriptor RedDes;
-        if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes)) {
+        if (RecurrenceDescriptor::isReductionPHI(Phi, TheLoop, RedDes, DB, AC,
+                                                 DT)) {
          if (RedDes.hasUnsafeAlgebra())
            Requirements->addUnsafeAlgebraInst(RedDes.getUnsafeAlgebraInst());
          AllowedExit.insert(RedDes.getLoopExitInstr());
@ -8514,7 +8524,7 @@ bool LoopVectorizePass::processLoop(Loop *L) {
  // Check if it is legal to vectorize the loop.
  LoopVectorizationRequirements Requirements(*ORE);
  LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, GetLAA, LI, ORE,
-                                &Requirements, &Hints);
+                                &Requirements, &Hints, DB, AC);
  if (!LVL.canVectorize()) {
    DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n");
    emitMissedWarning(F, L, Hints, ORE);
--- a/contrib/llvm/tools/clang/lib/Basic/Targets/X86.cpp
+++ b/contrib/llvm/tools/clang/lib/Basic/Targets/X86.cpp
@ -152,7 +152,8 @@ bool X86TargetInfo::initFeatureMap(
    setFeatureEnabledImpl(Features, "avx512bw", true);
    setFeatureEnabledImpl(Features, "avx512vl", true);
    setFeatureEnabledImpl(Features, "pku", true);
-    setFeatureEnabledImpl(Features, "clwb", true);
+    if (Kind != CK_Cannonlake) // CNL inherits all SKX features, except CLWB
+      setFeatureEnabledImpl(Features, "clwb", true);
    LLVM_FALLTHROUGH;
  case CK_SkylakeClient:
    setFeatureEnabledImpl(Features, "xsavec", true);
--- a/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp
+++ b/contrib/llvm/tools/clang/lib/Sema/SemaExpr.cpp
@ -14926,7 +14926,8 @@ static void DoMarkVarDeclReferenced(Sema &SemaRef, SourceLocation Loc,
    if (RefersToEnclosingScope) {
      LambdaScopeInfo *const LSI =
          SemaRef.getCurLambda(/*IgnoreNonLambdaCapturingScope=*/true);
-      if (LSI && !LSI->CallOperator->Encloses(Var->getDeclContext())) {
+      if (LSI && (!LSI->CallOperator ||
+                  !LSI->CallOperator->Encloses(Var->getDeclContext()))) {
        // If a variable could potentially be odr-used, defer marking it so
        // until we finish analyzing the full expression for any
        // lvalue-to-rvalue
--- a/contrib/llvm/tools/lld/ELF/Driver.cpp
+++ b/contrib/llvm/tools/lld/ELF/Driver.cpp
@ -638,7 +638,7 @@ void LinkerDriver::readConfigs(opt::InputArgList &Args) {
  Config->Optimize = args::getInteger(Args, OPT_O, 1);
  Config->OrphanHandling = getOrphanHandling(Args);
  Config->OutputFile = Args.getLastArgValue(OPT_o);
-  Config->Pie = Args.hasFlag(OPT_pie, OPT_no_pie, false);
+  Config->Pie = Args.hasFlag(OPT_pie, OPT_nopie, false);
  Config->PrintGcSections =
      Args.hasFlag(OPT_print_gc_sections, OPT_no_print_gc_sections, false);
  Config->Rpath = getRpath(Args);
@ -1061,7 +1061,12 @@ template <class ELFT> void LinkerDriver::link(opt::InputArgList &Args) {
    addReservedSymbols();

  // Apply version scripts.
-  Symtab->scanVersionScript();
+  //
+  // For a relocatable output, version scripts don't make sense, and
+  // parsing a symbol version string (e.g. dropping "@ver1" from a symbol
+  // name "foo@ver1") rather do harm, so we don't call this if -r is given.
+  if (!Config->Relocatable)
+    Symtab->scanVersionScript();

  // Create wrapped symbols for -wrap option.
  for (auto *Arg : Args.filtered(OPT_wrap))
--- a/contrib/llvm/tools/lld/ELF/Options.td
+++ b/contrib/llvm/tools/lld/ELF/Options.td
@ -202,8 +202,6 @@ def no_gnu_unique: F<"no-gnu-unique">,
 def no_merge_exidx_entries: F<"no-merge-exidx-entries">,
  HelpText<"Disable merging .ARM.exidx entries">;

-def no_pie: F<"no-pie">, HelpText<"Do not create a position independent executable">;
-
 def no_threads: F<"no-threads">,
  HelpText<"Do not run the linker multi-threaded">;

@ -213,6 +211,8 @@ def no_whole_archive: F<"no-whole-archive">,
 def noinhibit_exec: F<"noinhibit-exec">,
  HelpText<"Retain the executable output file whenever it is still usable">;

+def nopie: F<"nopie">, HelpText<"Do not create a position independent executable">;
+
 def no_omagic: Flag<["--"], "no-omagic">, MetaVarName<"<magic>">,
  HelpText<"Do not set the text data sections to be writable">;

--- a/lib/clang/include/clang/Basic/Version.inc
+++ b/lib/clang/include/clang/Basic/Version.inc
@ -8,4 +8,4 @@

 #define	CLANG_VENDOR			"FreeBSD "

-#define	SVN_REVISION			"325330"
+#define	SVN_REVISION			"325932"
--- a/lib/clang/include/lld/Common/Version.inc
+++ b/lib/clang/include/lld/Common/Version.inc
@ -4,5 +4,5 @@
 #define LLD_VERSION_STRING "6.0.0"
 #define LLD_VERSION_MAJOR 6
 #define LLD_VERSION_MINOR 0
-#define LLD_REVISION_STRING "325330"
+#define LLD_REVISION_STRING "325932"
 #define LLD_REPOSITORY_STRING "FreeBSD"
--- a/lib/clang/include/llvm/Support/VCSRevision.h
+++ b/lib/clang/include/llvm/Support/VCSRevision.h
@ -1,2 +1,2 @@
 /* $FreeBSD$ */
-#define LLVM_REVISION "svn-r325330"
+#define LLVM_REVISION "svn-r325932"