Update prebuilt Clang to r365631c1 from Android.
The version we had was segfaulting.
Bug: 132420445
Change-Id: Icb45a6fe0b4e2166f7895e669df1157cec9fb4e0
diff --git a/linux-x64/clang/include/llvm/Analysis/AliasAnalysis.h b/linux-x64/clang/include/llvm/Analysis/AliasAnalysis.h
index 4e55f01..9483415 100644
--- a/linux-x64/clang/include/llvm/Analysis/AliasAnalysis.h
+++ b/linux-x64/clang/include/llvm/Analysis/AliasAnalysis.h
@@ -37,6 +37,7 @@
#ifndef LLVM_ANALYSIS_ALIASANALYSIS_H
#define LLVM_ANALYSIS_ALIASANALYSIS_H
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/None.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/SmallVector.h"
@@ -285,6 +286,28 @@
return ModRefInfo(FMRB & static_cast<int>(ModRefInfo::ModRef));
}
+/// This class stores info we want to provide to or retain within an alias
+/// query. By default, the root query is stateless and starts with a freshly
+/// constructed info object. Specific alias analyses can use this query info to
+/// store per-query state that is important for recursive or nested queries to
+/// avoid recomputing. To enable preserving this state across multiple queries
+/// where safe (due to the IR not changing), use a `BatchAAResults` wrapper.
+/// The information stored in an `AAQueryInfo` is currently limitted to the
+/// caches used by BasicAA, but can further be extended to fit other AA needs.
+class AAQueryInfo {
+public:
+ using LocPair = std::pair<MemoryLocation, MemoryLocation>;
+ using AliasCacheT = SmallDenseMap<LocPair, AliasResult, 8>;
+ AliasCacheT AliasCache;
+
+ using IsCapturedCacheT = SmallDenseMap<const Value *, bool, 8>;
+ IsCapturedCacheT IsCapturedCache;
+
+ AAQueryInfo() : AliasCache(), IsCapturedCache() {}
+};
+
+class BatchAAResults;
+
class AAResults {
public:
// Make these results default constructable and movable. We have to spell
@@ -599,32 +622,8 @@
/// helpers above.
ModRefInfo getModRefInfo(const Instruction *I,
const Optional<MemoryLocation> &OptLoc) {
- if (OptLoc == None) {
- if (const auto *Call = dyn_cast<CallBase>(I)) {
- return createModRefInfo(getModRefBehavior(Call));
- }
- }
-
- const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation());
-
- switch (I->getOpcode()) {
- case Instruction::VAArg: return getModRefInfo((const VAArgInst*)I, Loc);
- case Instruction::Load: return getModRefInfo((const LoadInst*)I, Loc);
- case Instruction::Store: return getModRefInfo((const StoreInst*)I, Loc);
- case Instruction::Fence: return getModRefInfo((const FenceInst*)I, Loc);
- case Instruction::AtomicCmpXchg:
- return getModRefInfo((const AtomicCmpXchgInst*)I, Loc);
- case Instruction::AtomicRMW:
- return getModRefInfo((const AtomicRMWInst*)I, Loc);
- case Instruction::Call: return getModRefInfo((const CallInst*)I, Loc);
- case Instruction::Invoke: return getModRefInfo((const InvokeInst*)I,Loc);
- case Instruction::CatchPad:
- return getModRefInfo((const CatchPadInst *)I, Loc);
- case Instruction::CatchRet:
- return getModRefInfo((const CatchReturnInst *)I, Loc);
- default:
- return ModRefInfo::NoModRef;
- }
+ AAQueryInfo AAQIP;
+ return getModRefInfo(I, OptLoc, AAQIP);
}
/// A convenience wrapper for constructing the memory location.
@@ -691,6 +690,69 @@
}
private:
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool OrLocal = false);
+ ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call2,
+ AAQueryInfo &AAQIP);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const VAArgInst *V, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const LoadInst *L, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const StoreInst *S, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const FenceInst *S, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const AtomicCmpXchgInst *CX,
+ const MemoryLocation &Loc, AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const AtomicRMWInst *RMW, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const CatchPadInst *I, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const CatchReturnInst *I, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const Instruction *I,
+ const Optional<MemoryLocation> &OptLoc,
+ AAQueryInfo &AAQIP) {
+ if (OptLoc == None) {
+ if (const auto *Call = dyn_cast<CallBase>(I)) {
+ return createModRefInfo(getModRefBehavior(Call));
+ }
+ }
+
+ const MemoryLocation &Loc = OptLoc.getValueOr(MemoryLocation());
+
+ switch (I->getOpcode()) {
+ case Instruction::VAArg:
+ return getModRefInfo((const VAArgInst *)I, Loc, AAQIP);
+ case Instruction::Load:
+ return getModRefInfo((const LoadInst *)I, Loc, AAQIP);
+ case Instruction::Store:
+ return getModRefInfo((const StoreInst *)I, Loc, AAQIP);
+ case Instruction::Fence:
+ return getModRefInfo((const FenceInst *)I, Loc, AAQIP);
+ case Instruction::AtomicCmpXchg:
+ return getModRefInfo((const AtomicCmpXchgInst *)I, Loc, AAQIP);
+ case Instruction::AtomicRMW:
+ return getModRefInfo((const AtomicRMWInst *)I, Loc, AAQIP);
+ case Instruction::Call:
+ return getModRefInfo((const CallInst *)I, Loc, AAQIP);
+ case Instruction::Invoke:
+ return getModRefInfo((const InvokeInst *)I, Loc, AAQIP);
+ case Instruction::CatchPad:
+ return getModRefInfo((const CatchPadInst *)I, Loc, AAQIP);
+ case Instruction::CatchRet:
+ return getModRefInfo((const CatchReturnInst *)I, Loc, AAQIP);
+ default:
+ return ModRefInfo::NoModRef;
+ }
+ }
+
class Concept;
template <typename T> class Model;
@@ -702,6 +764,47 @@
std::vector<std::unique_ptr<Concept>> AAs;
std::vector<AnalysisKey *> AADeps;
+
+ friend class BatchAAResults;
+};
+
+/// This class is a wrapper over an AAResults, and it is intended to be used
+/// only when there are no IR changes inbetween queries. BatchAAResults is
+/// reusing the same `AAQueryInfo` to preserve the state across queries,
+/// esentially making AA work in "batch mode". The internal state cannot be
+/// cleared, so to go "out-of-batch-mode", the user must either use AAResults,
+/// or create a new BatchAAResults.
+class BatchAAResults {
+ AAResults &AA;
+ AAQueryInfo AAQI;
+
+public:
+ BatchAAResults(AAResults &AAR) : AA(AAR), AAQI() {}
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+ return AA.alias(LocA, LocB, AAQI);
+ }
+ bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal = false) {
+ return AA.pointsToConstantMemory(Loc, AAQI, OrLocal);
+ }
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) {
+ return AA.getModRefInfo(Call, Loc, AAQI);
+ }
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) {
+ return AA.getModRefInfo(Call1, Call2, AAQI);
+ }
+ ModRefInfo getModRefInfo(const Instruction *I,
+ const Optional<MemoryLocation> &OptLoc) {
+ return AA.getModRefInfo(I, OptLoc, AAQI);
+ }
+ ModRefInfo getModRefInfo(Instruction *I, const CallBase *Call2) {
+ return AA.getModRefInfo(I, Call2, AAQI);
+ }
+ ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
+ return AA.getArgModRefInfo(Call, ArgIdx);
+ }
+ FunctionModRefBehavior getModRefBehavior(const CallBase *Call) {
+ return AA.getModRefBehavior(Call);
+ }
};
/// Temporary typedef for legacy code that uses a generic \c AliasAnalysis
@@ -734,12 +837,12 @@
/// each other. This is the interface that must be implemented by specific
/// alias analysis implementations.
virtual AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) = 0;
+ const MemoryLocation &LocB, AAQueryInfo &AAQI) = 0;
/// Checks whether the given location points to constant memory, or if
/// \p OrLocal is true whether it points to a local alloca.
virtual bool pointsToConstantMemory(const MemoryLocation &Loc,
- bool OrLocal) = 0;
+ AAQueryInfo &AAQI, bool OrLocal) = 0;
/// @}
//===--------------------------------------------------------------------===//
@@ -763,13 +866,14 @@
/// getModRefInfo (for call sites) - Return information about whether
/// a particular call site modifies or reads the specified memory location.
virtual ModRefInfo getModRefInfo(const CallBase *Call,
- const MemoryLocation &Loc) = 0;
+ const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) = 0;
/// Return information about whether two call sites may refer to the same set
/// of memory locations. See the AA documentation for details:
/// http://llvm.org/docs/AliasAnalysis.html#ModRefInfo
- virtual ModRefInfo getModRefInfo(const CallBase *Call1,
- const CallBase *Call2) = 0;
+ virtual ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI) = 0;
/// @}
};
@@ -791,14 +895,14 @@
void setAAResults(AAResults *NewAAR) override { Result.setAAResults(NewAAR); }
- AliasResult alias(const MemoryLocation &LocA,
- const MemoryLocation &LocB) override {
- return Result.alias(LocA, LocB);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) override {
+ return Result.alias(LocA, LocB, AAQI);
}
- bool pointsToConstantMemory(const MemoryLocation &Loc,
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
bool OrLocal) override {
- return Result.pointsToConstantMemory(Loc, OrLocal);
+ return Result.pointsToConstantMemory(Loc, AAQI, OrLocal);
}
ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) override {
@@ -813,14 +917,14 @@
return Result.getModRefBehavior(F);
}
- ModRefInfo getModRefInfo(const CallBase *Call,
- const MemoryLocation &Loc) override {
- return Result.getModRefInfo(Call, Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) override {
+ return Result.getModRefInfo(Call, Loc, AAQI);
}
- ModRefInfo getModRefInfo(const CallBase *Call1,
- const CallBase *Call2) override {
- return Result.getModRefInfo(Call1, Call2);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI) override {
+ return Result.getModRefInfo(Call1, Call2, AAQI);
}
};
@@ -866,13 +970,16 @@
AAResultsProxy(AAResults *AAR, DerivedT &CurrentResult)
: AAR(AAR), CurrentResult(CurrentResult) {}
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
- return AAR ? AAR->alias(LocA, LocB) : CurrentResult.alias(LocA, LocB);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
+ return AAR ? AAR->alias(LocA, LocB, AAQI)
+ : CurrentResult.alias(LocA, LocB, AAQI);
}
- bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) {
- return AAR ? AAR->pointsToConstantMemory(Loc, OrLocal)
- : CurrentResult.pointsToConstantMemory(Loc, OrLocal);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool OrLocal) {
+ return AAR ? AAR->pointsToConstantMemory(Loc, AAQI, OrLocal)
+ : CurrentResult.pointsToConstantMemory(Loc, AAQI, OrLocal);
}
ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx) {
@@ -889,14 +996,16 @@
return AAR ? AAR->getModRefBehavior(F) : CurrentResult.getModRefBehavior(F);
}
- ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) {
- return AAR ? AAR->getModRefInfo(Call, Loc)
- : CurrentResult.getModRefInfo(Call, Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
+ return AAR ? AAR->getModRefInfo(Call, Loc, AAQI)
+ : CurrentResult.getModRefInfo(Call, Loc, AAQI);
}
- ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) {
- return AAR ? AAR->getModRefInfo(Call1, Call2)
- : CurrentResult.getModRefInfo(Call1, Call2);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI) {
+ return AAR ? AAR->getModRefInfo(Call1, Call2, AAQI)
+ : CurrentResult.getModRefInfo(Call1, Call2, AAQI);
}
};
@@ -920,11 +1029,13 @@
AAResultsProxy getBestAAResults() { return AAResultsProxy(AAR, derived()); }
public:
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
return MayAlias;
}
- bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal) {
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool OrLocal) {
return false;
}
@@ -940,11 +1051,13 @@
return FMRB_UnknownModRefBehavior;
}
- ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc) {
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI) {
return ModRefInfo::ModRef;
}
- ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2) {
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI) {
return ModRefInfo::ModRef;
}
};
@@ -983,6 +1096,11 @@
/// This manager effectively wraps the AnalysisManager for registering alias
/// analyses. When you register your alias analysis with this manager, it will
/// ensure the analysis itself is registered with its AnalysisManager.
+///
+/// The result of this analysis is only invalidated if one of the particular
+/// aggregated AA results end up being invalidated. This removes the need to
+/// explicitly preserve the results of `AAManager`. Note that analyses should no
+/// longer be registered once the `AAManager` is run.
class AAManager : public AnalysisInfoMixin<AAManager> {
public:
using Result = AAResults;
diff --git a/linux-x64/clang/include/llvm/Analysis/BasicAliasAnalysis.h b/linux-x64/clang/include/llvm/Analysis/BasicAliasAnalysis.h
index 29f584c..22e8c4b 100644
--- a/linux-x64/clang/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/linux-x64/clang/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -81,14 +81,18 @@
bool invalidate(Function &Fn, const PreservedAnalyses &PA,
FunctionAnalysisManager::Invalidator &Inv);
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
- ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
- ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI);
/// Chases pointers until we find a (constant global) or not.
- bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool OrLocal);
/// Get the location associated with a pointer argument of a callsite.
ModRefInfo getArgModRefInfo(const CallBase *Call, unsigned ArgIdx);
@@ -140,13 +144,6 @@
SmallVector<VariableGEPIndex, 4> VarIndices;
};
- /// Track alias queries to guard against recursion.
- using LocPair = std::pair<MemoryLocation, MemoryLocation>;
- using AliasCacheTy = SmallDenseMap<LocPair, AliasResult, 8>;
- AliasCacheTy AliasCache;
- using IsCapturedCacheTy = SmallDenseMap<const Value *, bool, 8>;
- IsCapturedCacheTy IsCapturedCache;
-
/// Tracks phi nodes we have visited.
///
/// When interpret "Value" pointer equality as value equality we need to make
@@ -201,22 +198,24 @@
AliasResult aliasGEP(const GEPOperator *V1, LocationSize V1Size,
const AAMDNodes &V1AAInfo, const Value *V2,
LocationSize V2Size, const AAMDNodes &V2AAInfo,
- const Value *UnderlyingV1, const Value *UnderlyingV2);
+ const Value *UnderlyingV1, const Value *UnderlyingV2,
+ AAQueryInfo &AAQI);
AliasResult aliasPHI(const PHINode *PN, LocationSize PNSize,
const AAMDNodes &PNAAInfo, const Value *V2,
LocationSize V2Size, const AAMDNodes &V2AAInfo,
- const Value *UnderV2);
+ const Value *UnderV2, AAQueryInfo &AAQI);
AliasResult aliasSelect(const SelectInst *SI, LocationSize SISize,
const AAMDNodes &SIAAInfo, const Value *V2,
LocationSize V2Size, const AAMDNodes &V2AAInfo,
- const Value *UnderV2);
+ const Value *UnderV2, AAQueryInfo &AAQI);
AliasResult aliasCheck(const Value *V1, LocationSize V1Size,
AAMDNodes V1AATag, const Value *V2,
LocationSize V2Size, AAMDNodes V2AATag,
- const Value *O1 = nullptr, const Value *O2 = nullptr);
+ AAQueryInfo &AAQI, const Value *O1 = nullptr,
+ const Value *O2 = nullptr);
};
/// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/linux-x64/clang/include/llvm/Analysis/BlockFrequencyInfo.h b/linux-x64/clang/include/llvm/Analysis/BlockFrequencyInfo.h
index c73c7fa..8bcfd7f 100644
--- a/linux-x64/clang/include/llvm/Analysis/BlockFrequencyInfo.h
+++ b/linux-x64/clang/include/llvm/Analysis/BlockFrequencyInfo.h
@@ -67,7 +67,8 @@
/// Returns the estimated profile count of \p BB.
/// This computes the relative block frequency of \p BB and multiplies it by
/// the enclosing function's count (if available) and returns the value.
- Optional<uint64_t> getBlockProfileCount(const BasicBlock *BB) const;
+ Optional<uint64_t> getBlockProfileCount(const BasicBlock *BB,
+ bool AllowSynthetic = false) const;
/// Returns the estimated profile count of \p Freq.
/// This uses the frequency \p Freq and multiplies it by
diff --git a/linux-x64/clang/include/llvm/Analysis/BlockFrequencyInfoImpl.h b/linux-x64/clang/include/llvm/Analysis/BlockFrequencyInfoImpl.h
index 813bad4..bfe4fb1 100644
--- a/linux-x64/clang/include/llvm/Analysis/BlockFrequencyInfoImpl.h
+++ b/linux-x64/clang/include/llvm/Analysis/BlockFrequencyInfoImpl.h
@@ -520,9 +520,11 @@
BlockFrequency getBlockFreq(const BlockNode &Node) const;
Optional<uint64_t> getBlockProfileCount(const Function &F,
- const BlockNode &Node) const;
+ const BlockNode &Node,
+ bool AllowSynthetic = false) const;
Optional<uint64_t> getProfileCountFromFreq(const Function &F,
- uint64_t Freq) const;
+ uint64_t Freq,
+ bool AllowSynthetic = false) const;
bool isIrrLoopHeader(const BlockNode &Node);
void setBlockFreq(const BlockNode &Node, uint64_t Freq);
@@ -968,13 +970,17 @@
}
Optional<uint64_t> getBlockProfileCount(const Function &F,
- const BlockT *BB) const {
- return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB));
+ const BlockT *BB,
+ bool AllowSynthetic = false) const {
+ return BlockFrequencyInfoImplBase::getBlockProfileCount(F, getNode(BB),
+ AllowSynthetic);
}
Optional<uint64_t> getProfileCountFromFreq(const Function &F,
- uint64_t Freq) const {
- return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq);
+ uint64_t Freq,
+ bool AllowSynthetic = false) const {
+ return BlockFrequencyInfoImplBase::getProfileCountFromFreq(F, Freq,
+ AllowSynthetic);
}
bool isIrrLoopHeader(const BlockT *BB) {
diff --git a/linux-x64/clang/include/llvm/Analysis/CFG.h b/linux-x64/clang/include/llvm/Analysis/CFG.h
index bcff4fb..bb55e76 100644
--- a/linux-x64/clang/include/llvm/Analysis/CFG.h
+++ b/linux-x64/clang/include/llvm/Analysis/CFG.h
@@ -47,8 +47,8 @@
bool isCriticalEdge(const Instruction *TI, unsigned SuccNum,
bool AllowIdenticalEdges = false);
-/// Determine whether instruction 'To' is reachable from 'From',
-/// returning true if uncertain.
+/// Determine whether instruction 'To' is reachable from 'From', without passing
+/// through any blocks in ExclusionSet, returning true if uncertain.
///
/// Determine whether there is a path from From to To within a single function.
/// Returns false only if we can prove that once 'From' has been executed then
@@ -62,9 +62,10 @@
/// we find a block that dominates the block containing 'To'. DT is most useful
/// on branchy code but not loops, and LI is most useful on code with loops but
/// does not help on branchy code outside loops.
-bool isPotentiallyReachable(const Instruction *From, const Instruction *To,
- const DominatorTree *DT = nullptr,
- const LoopInfo *LI = nullptr);
+bool isPotentiallyReachable(
+ const Instruction *From, const Instruction *To,
+ const SmallPtrSetImpl<BasicBlock *> *ExclusionSet = nullptr,
+ const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
/// Determine whether block 'To' is reachable from 'From', returning
/// true if uncertain.
@@ -88,6 +89,20 @@
const DominatorTree *DT = nullptr,
const LoopInfo *LI = nullptr);
+/// Determine whether there is at least one path from a block in
+/// 'Worklist' to 'StopBB' without passing through any blocks in
+/// 'ExclusionSet', returning true if uncertain.
+///
+/// Determine whether there is a path from at least one block in Worklist to
+/// StopBB within a single function without passing through any of the blocks
+/// in 'ExclusionSet'. Returns false only if we can prove that once any block
+/// in 'Worklist' has been reached then 'StopBB' can not be executed.
+/// Conservatively returns true.
+bool isPotentiallyReachableFromMany(
+ SmallVectorImpl<BasicBlock *> &Worklist, BasicBlock *StopBB,
+ const SmallPtrSetImpl<BasicBlock *> *ExclusionSet,
+ const DominatorTree *DT = nullptr, const LoopInfo *LI = nullptr);
+
/// Return true if the control flow in \p RPOTraversal is irreducible.
///
/// This is a generic implementation to detect CFG irreducibility based on loop
diff --git a/linux-x64/clang/include/llvm/Analysis/CFLAndersAliasAnalysis.h b/linux-x64/clang/include/llvm/Analysis/CFLAndersAliasAnalysis.h
index 696aaeb..7c8b42b 100644
--- a/linux-x64/clang/include/llvm/Analysis/CFLAndersAliasAnalysis.h
+++ b/linux-x64/clang/include/llvm/Analysis/CFLAndersAliasAnalysis.h
@@ -60,7 +60,8 @@
const cflaa::AliasSummary *getAliasSummary(const Function &);
AliasResult query(const MemoryLocation &, const MemoryLocation &);
- AliasResult alias(const MemoryLocation &, const MemoryLocation &);
+ AliasResult alias(const MemoryLocation &, const MemoryLocation &,
+ AAQueryInfo &);
private:
/// Ensures that the given function is available in the cache.
diff --git a/linux-x64/clang/include/llvm/Analysis/CFLSteensAliasAnalysis.h b/linux-x64/clang/include/llvm/Analysis/CFLSteensAliasAnalysis.h
index 2d3b43c..cc7a47c 100644
--- a/linux-x64/clang/include/llvm/Analysis/CFLSteensAliasAnalysis.h
+++ b/linux-x64/clang/include/llvm/Analysis/CFLSteensAliasAnalysis.h
@@ -69,7 +69,8 @@
AliasResult query(const MemoryLocation &LocA, const MemoryLocation &LocB);
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB) {
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI) {
if (LocA.Ptr == LocB.Ptr)
return MustAlias;
@@ -79,11 +80,11 @@
// ConstantExpr, but every query needs to have at least one Value tied to a
// Function, and neither GlobalValues nor ConstantExprs are.
if (isa<Constant>(LocA.Ptr) && isa<Constant>(LocB.Ptr))
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
AliasResult QueryResult = query(LocA, LocB);
if (QueryResult == MayAlias)
- return AAResultBase::alias(LocA, LocB);
+ return AAResultBase::alias(LocA, LocB, AAQI);
return QueryResult;
}
diff --git a/linux-x64/clang/include/llvm/Analysis/CGSCCPassManager.h b/linux-x64/clang/include/llvm/Analysis/CGSCCPassManager.h
index 6d26954..8af5fb8 100644
--- a/linux-x64/clang/include/llvm/Analysis/CGSCCPassManager.h
+++ b/linux-x64/clang/include/llvm/Analysis/CGSCCPassManager.h
@@ -291,6 +291,21 @@
/// post-order walk.
LazyCallGraph::SCC *UpdatedC;
+ /// Preserved analyses across SCCs.
+ ///
+ /// We specifically want to allow CGSCC passes to mutate ancestor IR
+ /// (changing both the CG structure and the function IR itself). However,
+ /// this means we need to take special care to correctly mark what analyses
+ /// are preserved *across* SCCs. We have to track this out-of-band here
+ /// because within the main `PassManeger` infrastructure we need to mark
+ /// everything within an SCC as preserved in order to avoid repeatedly
+ /// invalidating the same analyses as we unnest pass managers and adaptors.
+ /// So we track the cross-SCC version of the preserved analyses here from any
+ /// code that does direct invalidation of SCC analyses, and then use it
+ /// whenever we move forward in the post-order walk of SCCs before running
+ /// passes over the new SCC.
+ PreservedAnalyses CrossSCCPA;
+
/// A hacky area where the inliner can retain history about inlining
/// decisions that mutated the call graph's SCC structure in order to avoid
/// infinite inlining. See the comments in the inliner's CG update logic.
@@ -338,175 +353,7 @@
}
/// Runs the CGSCC pass across every SCC in the module.
- PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
- // Setup the CGSCC analysis manager from its proxy.
- CGSCCAnalysisManager &CGAM =
- AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager();
-
- // Get the call graph for this module.
- LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
-
- // We keep worklists to allow us to push more work onto the pass manager as
- // the passes are run.
- SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist;
- SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist;
-
- // Keep sets for invalidated SCCs and RefSCCs that should be skipped when
- // iterating off the worklists.
- SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet;
- SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet;
-
- SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4>
- InlinedInternalEdges;
-
- CGSCCUpdateResult UR = {RCWorklist, CWorklist, InvalidRefSCCSet,
- InvalidSCCSet, nullptr, nullptr,
- InlinedInternalEdges};
-
- // Request PassInstrumentation from analysis manager, will use it to run
- // instrumenting callbacks for the passes later.
- PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
-
- PreservedAnalyses PA = PreservedAnalyses::all();
- CG.buildRefSCCs();
- for (auto RCI = CG.postorder_ref_scc_begin(),
- RCE = CG.postorder_ref_scc_end();
- RCI != RCE;) {
- assert(RCWorklist.empty() &&
- "Should always start with an empty RefSCC worklist");
- // The postorder_ref_sccs range we are walking is lazily constructed, so
- // we only push the first one onto the worklist. The worklist allows us
- // to capture *new* RefSCCs created during transformations.
- //
- // We really want to form RefSCCs lazily because that makes them cheaper
- // to update as the program is simplified and allows us to have greater
- // cache locality as forming a RefSCC touches all the parts of all the
- // functions within that RefSCC.
- //
- // We also eagerly increment the iterator to the next position because
- // the CGSCC passes below may delete the current RefSCC.
- RCWorklist.insert(&*RCI++);
-
- do {
- LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
- if (InvalidRefSCCSet.count(RC)) {
- LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n");
- continue;
- }
-
- assert(CWorklist.empty() &&
- "Should always start with an empty SCC worklist");
-
- LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC
- << "\n");
-
- // Push the initial SCCs in reverse post-order as we'll pop off the
- // back and so see this in post-order.
- for (LazyCallGraph::SCC &C : llvm::reverse(*RC))
- CWorklist.insert(&C);
-
- do {
- LazyCallGraph::SCC *C = CWorklist.pop_back_val();
- // Due to call graph mutations, we may have invalid SCCs or SCCs from
- // other RefSCCs in the worklist. The invalid ones are dead and the
- // other RefSCCs should be queued above, so we just need to skip both
- // scenarios here.
- if (InvalidSCCSet.count(C)) {
- LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n");
- continue;
- }
- if (&C->getOuterRefSCC() != RC) {
- LLVM_DEBUG(dbgs()
- << "Skipping an SCC that is now part of some other "
- "RefSCC...\n");
- continue;
- }
-
- do {
- // Check that we didn't miss any update scenario.
- assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!");
- assert(C->begin() != C->end() && "Cannot have an empty SCC!");
- assert(&C->getOuterRefSCC() == RC &&
- "Processing an SCC in a different RefSCC!");
-
- UR.UpdatedRC = nullptr;
- UR.UpdatedC = nullptr;
-
- // Check the PassInstrumentation's BeforePass callbacks before
- // running the pass, skip its execution completely if asked to
- // (callback returns false).
- if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
- continue;
-
- PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR);
-
- if (UR.InvalidatedSCCs.count(C))
- PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass);
- else
- PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C);
-
- // Update the SCC and RefSCC if necessary.
- C = UR.UpdatedC ? UR.UpdatedC : C;
- RC = UR.UpdatedRC ? UR.UpdatedRC : RC;
-
- // If the CGSCC pass wasn't able to provide a valid updated SCC,
- // the current SCC may simply need to be skipped if invalid.
- if (UR.InvalidatedSCCs.count(C)) {
- LLVM_DEBUG(dbgs()
- << "Skipping invalidated root or island SCC!\n");
- break;
- }
- // Check that we didn't miss any update scenario.
- assert(C->begin() != C->end() && "Cannot have an empty SCC!");
-
- // We handle invalidating the CGSCC analysis manager's information
- // for the (potentially updated) SCC here. Note that any other SCCs
- // whose structure has changed should have been invalidated by
- // whatever was updating the call graph. This SCC gets invalidated
- // late as it contains the nodes that were actively being
- // processed.
- CGAM.invalidate(*C, PassPA);
-
- // Then intersect the preserved set so that invalidation of module
- // analyses will eventually occur when the module pass completes.
- PA.intersect(std::move(PassPA));
-
- // The pass may have restructured the call graph and refined the
- // current SCC and/or RefSCC. We need to update our current SCC and
- // RefSCC pointers to follow these. Also, when the current SCC is
- // refined, re-run the SCC pass over the newly refined SCC in order
- // to observe the most precise SCC model available. This inherently
- // cannot cycle excessively as it only happens when we split SCCs
- // apart, at most converging on a DAG of single nodes.
- // FIXME: If we ever start having RefSCC passes, we'll want to
- // iterate there too.
- if (UR.UpdatedC)
- LLVM_DEBUG(dbgs()
- << "Re-running SCC passes after a refinement of the "
- "current SCC: "
- << *UR.UpdatedC << "\n");
-
- // Note that both `C` and `RC` may at this point refer to deleted,
- // invalid SCC and RefSCCs respectively. But we will short circuit
- // the processing when we check them in the loop above.
- } while (UR.UpdatedC);
- } while (!CWorklist.empty());
-
- // We only need to keep internal inlined edge information within
- // a RefSCC, clear it to save on space and let the next time we visit
- // any of these functions have a fresh start.
- InlinedInternalEdges.clear();
- } while (!RCWorklist.empty());
- }
-
- // By definition we preserve the call garph, all SCC analyses, and the
- // analysis proxies by handling them above and in any nested pass managers.
- PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
- PA.preserve<LazyCallGraphAnalysis>();
- PA.preserve<CGSCCAnalysisManagerModuleProxy>();
- PA.preserve<FunctionAnalysisManagerModuleProxy>();
- return PA;
- }
+ PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM);
private:
CGSCCPassT Pass;
@@ -872,6 +719,210 @@
return DevirtSCCRepeatedPass<PassT>(std::move(Pass), MaxIterations);
}
+// Out-of-line implementation details for templates below this point.
+
+template <typename CGSCCPassT>
+PreservedAnalyses
+ModuleToPostOrderCGSCCPassAdaptor<CGSCCPassT>::run(Module &M,
+ ModuleAnalysisManager &AM) {
+ // Setup the CGSCC analysis manager from its proxy.
+ CGSCCAnalysisManager &CGAM =
+ AM.getResult<CGSCCAnalysisManagerModuleProxy>(M).getManager();
+
+ // Get the call graph for this module.
+ LazyCallGraph &CG = AM.getResult<LazyCallGraphAnalysis>(M);
+
+ // We keep worklists to allow us to push more work onto the pass manager as
+ // the passes are run.
+ SmallPriorityWorklist<LazyCallGraph::RefSCC *, 1> RCWorklist;
+ SmallPriorityWorklist<LazyCallGraph::SCC *, 1> CWorklist;
+
+ // Keep sets for invalidated SCCs and RefSCCs that should be skipped when
+ // iterating off the worklists.
+ SmallPtrSet<LazyCallGraph::RefSCC *, 4> InvalidRefSCCSet;
+ SmallPtrSet<LazyCallGraph::SCC *, 4> InvalidSCCSet;
+
+ SmallDenseSet<std::pair<LazyCallGraph::Node *, LazyCallGraph::SCC *>, 4>
+ InlinedInternalEdges;
+
+ CGSCCUpdateResult UR = {
+ RCWorklist, CWorklist, InvalidRefSCCSet, InvalidSCCSet,
+ nullptr, nullptr, PreservedAnalyses::all(), InlinedInternalEdges};
+
+ // Request PassInstrumentation from analysis manager, will use it to run
+ // instrumenting callbacks for the passes later.
+ PassInstrumentation PI = AM.getResult<PassInstrumentationAnalysis>(M);
+
+ PreservedAnalyses PA = PreservedAnalyses::all();
+ CG.buildRefSCCs();
+ for (auto RCI = CG.postorder_ref_scc_begin(),
+ RCE = CG.postorder_ref_scc_end();
+ RCI != RCE;) {
+ assert(RCWorklist.empty() &&
+ "Should always start with an empty RefSCC worklist");
+ // The postorder_ref_sccs range we are walking is lazily constructed, so
+ // we only push the first one onto the worklist. The worklist allows us
+ // to capture *new* RefSCCs created during transformations.
+ //
+ // We really want to form RefSCCs lazily because that makes them cheaper
+ // to update as the program is simplified and allows us to have greater
+ // cache locality as forming a RefSCC touches all the parts of all the
+ // functions within that RefSCC.
+ //
+ // We also eagerly increment the iterator to the next position because
+ // the CGSCC passes below may delete the current RefSCC.
+ RCWorklist.insert(&*RCI++);
+
+ do {
+ LazyCallGraph::RefSCC *RC = RCWorklist.pop_back_val();
+ if (InvalidRefSCCSet.count(RC)) {
+ LLVM_DEBUG(dbgs() << "Skipping an invalid RefSCC...\n");
+ continue;
+ }
+
+ assert(CWorklist.empty() &&
+ "Should always start with an empty SCC worklist");
+
+ LLVM_DEBUG(dbgs() << "Running an SCC pass across the RefSCC: " << *RC
+ << "\n");
+
+ // Push the initial SCCs in reverse post-order as we'll pop off the
+ // back and so see this in post-order.
+ for (LazyCallGraph::SCC &C : llvm::reverse(*RC))
+ CWorklist.insert(&C);
+
+ do {
+ LazyCallGraph::SCC *C = CWorklist.pop_back_val();
+ // Due to call graph mutations, we may have invalid SCCs or SCCs from
+ // other RefSCCs in the worklist. The invalid ones are dead and the
+ // other RefSCCs should be queued above, so we just need to skip both
+ // scenarios here.
+ if (InvalidSCCSet.count(C)) {
+ LLVM_DEBUG(dbgs() << "Skipping an invalid SCC...\n");
+ continue;
+ }
+ if (&C->getOuterRefSCC() != RC) {
+ LLVM_DEBUG(dbgs() << "Skipping an SCC that is now part of some other "
+ "RefSCC...\n");
+ continue;
+ }
+
+ // Ensure we can proxy analysis updates from from the CGSCC analysis
+ // manager into the Function analysis manager by getting a proxy here.
+ // FIXME: This seems like a bit of a hack. We should find a cleaner
+ // or more costructive way to ensure this happens.
+ (void)CGAM.getResult<FunctionAnalysisManagerCGSCCProxy>(*C, CG);
+
+ // Each time we visit a new SCC pulled off the worklist,
+ // a transformation of a child SCC may have also modified this parent
+ // and invalidated analyses. So we invalidate using the update record's
+ // cross-SCC preserved set. This preserved set is intersected by any
+ // CGSCC pass that handles invalidation (primarily pass managers) prior
+ // to marking its SCC as preserved. That lets us track everything that
+ // might need invalidation across SCCs without excessive invalidations
+ // on a single SCC.
+ //
+ // This essentially allows SCC passes to freely invalidate analyses
+ // of any ancestor SCC. If this becomes detrimental to successfully
+ // caching analyses, we could force each SCC pass to manually
+ // invalidate the analyses for any SCCs other than themselves which
+ // are mutated. However, that seems to lose the robustness of the
+ // pass-manager driven invalidation scheme.
+ //
+ // FIXME: This is redundant in one case -- the top of the worklist may
+ // *also* be the same SCC we just ran over (and invalidated for). In
+ // that case, we'll end up doing a redundant invalidation here as
+ // a consequence.
+ CGAM.invalidate(*C, UR.CrossSCCPA);
+
+ do {
+ // Check that we didn't miss any update scenario.
+ assert(!InvalidSCCSet.count(C) && "Processing an invalid SCC!");
+ assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+ assert(&C->getOuterRefSCC() == RC &&
+ "Processing an SCC in a different RefSCC!");
+
+ UR.UpdatedRC = nullptr;
+ UR.UpdatedC = nullptr;
+
+ // Check the PassInstrumentation's BeforePass callbacks before
+ // running the pass, skip its execution completely if asked to
+ // (callback returns false).
+ if (!PI.runBeforePass<LazyCallGraph::SCC>(Pass, *C))
+ continue;
+
+ PreservedAnalyses PassPA = Pass.run(*C, CGAM, CG, UR);
+
+ if (UR.InvalidatedSCCs.count(C))
+ PI.runAfterPassInvalidated<LazyCallGraph::SCC>(Pass);
+ else
+ PI.runAfterPass<LazyCallGraph::SCC>(Pass, *C);
+
+ // Update the SCC and RefSCC if necessary.
+ C = UR.UpdatedC ? UR.UpdatedC : C;
+ RC = UR.UpdatedRC ? UR.UpdatedRC : RC;
+
+ // If the CGSCC pass wasn't able to provide a valid updated SCC,
+ // the current SCC may simply need to be skipped if invalid.
+ if (UR.InvalidatedSCCs.count(C)) {
+ LLVM_DEBUG(dbgs() << "Skipping invalidated root or island SCC!\n");
+ break;
+ }
+ // Check that we didn't miss any update scenario.
+ assert(C->begin() != C->end() && "Cannot have an empty SCC!");
+
+ // We handle invalidating the CGSCC analysis manager's information
+ // for the (potentially updated) SCC here. Note that any other SCCs
+ // whose structure has changed should have been invalidated by
+ // whatever was updating the call graph. This SCC gets invalidated
+ // late as it contains the nodes that were actively being
+ // processed.
+ CGAM.invalidate(*C, PassPA);
+
+ // Then intersect the preserved set so that invalidation of module
+ // analyses will eventually occur when the module pass completes.
+ // Also intersect with the cross-SCC preserved set to capture any
+ // cross-SCC invalidation.
+ UR.CrossSCCPA.intersect(PassPA);
+ PA.intersect(std::move(PassPA));
+
+ // The pass may have restructured the call graph and refined the
+ // current SCC and/or RefSCC. We need to update our current SCC and
+ // RefSCC pointers to follow these. Also, when the current SCC is
+ // refined, re-run the SCC pass over the newly refined SCC in order
+ // to observe the most precise SCC model available. This inherently
+ // cannot cycle excessively as it only happens when we split SCCs
+ // apart, at most converging on a DAG of single nodes.
+ // FIXME: If we ever start having RefSCC passes, we'll want to
+ // iterate there too.
+ if (UR.UpdatedC)
+ LLVM_DEBUG(dbgs()
+ << "Re-running SCC passes after a refinement of the "
+ "current SCC: "
+ << *UR.UpdatedC << "\n");
+
+ // Note that both `C` and `RC` may at this point refer to deleted,
+ // invalid SCC and RefSCCs respectively. But we will short circuit
+ // the processing when we check them in the loop above.
+ } while (UR.UpdatedC);
+ } while (!CWorklist.empty());
+
+ // We only need to keep internal inlined edge information within
+ // a RefSCC, clear it to save on space and let the next time we visit
+ // any of these functions have a fresh start.
+ InlinedInternalEdges.clear();
+ } while (!RCWorklist.empty());
+ }
+
+ // By definition we preserve the call garph, all SCC analyses, and the
+ // analysis proxies by handling them above and in any nested pass managers.
+ PA.preserveSet<AllAnalysesOn<LazyCallGraph::SCC>>();
+ PA.preserve<LazyCallGraphAnalysis>();
+ PA.preserve<CGSCCAnalysisManagerModuleProxy>();
+ PA.preserve<FunctionAnalysisManagerModuleProxy>();
+ return PA;
+}
+
// Clear out the debug logging macro.
#undef DEBUG_TYPE
diff --git a/linux-x64/clang/include/llvm/Analysis/CallGraph.h b/linux-x64/clang/include/llvm/Analysis/CallGraph.h
index a743cbc..7a10183 100644
--- a/linux-x64/clang/include/llvm/Analysis/CallGraph.h
+++ b/linux-x64/clang/include/llvm/Analysis/CallGraph.h
@@ -47,8 +47,8 @@
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/STLExtras.h"
-#include "llvm/IR/CallSite.h"
#include "llvm/IR/Function.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Intrinsics.h"
#include "llvm/IR/PassManager.h"
#include "llvm/IR/ValueHandle.h"
@@ -229,11 +229,11 @@
}
/// Adds a function to the list of functions called by this one.
- void addCalledFunction(CallSite CS, CallGraphNode *M) {
- assert(!CS.getInstruction() || !CS.getCalledFunction() ||
- !CS.getCalledFunction()->isIntrinsic() ||
- !Intrinsic::isLeaf(CS.getCalledFunction()->getIntrinsicID()));
- CalledFunctions.emplace_back(CS.getInstruction(), M);
+ void addCalledFunction(CallBase *Call, CallGraphNode *M) {
+ assert(!Call || !Call->getCalledFunction() ||
+ !Call->getCalledFunction()->isIntrinsic() ||
+ !Intrinsic::isLeaf(Call->getCalledFunction()->getIntrinsicID()));
+ CalledFunctions.emplace_back(Call, M);
M->AddRef();
}
@@ -246,7 +246,7 @@
/// Removes the edge in the node for the specified call site.
///
/// Note that this method takes linear time, so it should be used sparingly.
- void removeCallEdgeFor(CallSite CS);
+ void removeCallEdgeFor(CallBase &Call);
/// Removes all call edges from this node to the specified callee
/// function.
@@ -263,7 +263,8 @@
/// new one.
///
/// Note that this method takes linear time, so it should be used sparingly.
- void replaceCallEdge(CallSite CS, CallSite NewCS, CallGraphNode *NewNode);
+ void replaceCallEdge(CallBase &Call, CallBase &NewCall,
+ CallGraphNode *NewNode);
private:
friend class CallGraph;
diff --git a/linux-x64/clang/include/llvm/Analysis/ConstantFolding.h b/linux-x64/clang/include/llvm/Analysis/ConstantFolding.h
index 43a2df0..2385b6f 100644
--- a/linux-x64/clang/include/llvm/Analysis/ConstantFolding.h
+++ b/linux-x64/clang/include/llvm/Analysis/ConstantFolding.h
@@ -71,6 +71,12 @@
Constant *RHS, const DataLayout &DL,
const TargetLibraryInfo *TLI = nullptr);
+/// Attempt to constant fold a unary operation with the specified
+/// operand. If it fails, it returns a constant expression of the specified
+/// operands.
+Constant *ConstantFoldUnaryOpOperand(unsigned Opcode, Constant *Op,
+ const DataLayout &DL);
+
/// Attempt to constant fold a binary operation with the specified
/// operands. If it fails, it returns a constant expression of the specified
/// operands.
diff --git a/linux-x64/clang/include/llvm/Analysis/DomTreeUpdater.h b/linux-x64/clang/include/llvm/Analysis/DomTreeUpdater.h
index fcfd3c1..5ccce2e 100644
--- a/linux-x64/clang/include/llvm/Analysis/DomTreeUpdater.h
+++ b/linux-x64/clang/include/llvm/Analysis/DomTreeUpdater.h
@@ -82,61 +82,114 @@
/// Returns false under Eager UpdateStrategy or PDT is nullptr.
bool hasPendingPostDomTreeUpdates() const;
- /// Apply updates on all available trees. Under Eager UpdateStrategy with
- /// ForceRemoveDuplicates enabled or under Lazy UpdateStrategy, it will
- /// discard duplicated updates and self-dominance updates. If both DT and PDT
- /// are nullptrs, this function discards all updates. The Eager Strategy
- /// applies the updates immediately while the Lazy Strategy queues the
- /// updates. It is required for the state of the LLVM IR to be updated
- /// *before* applying the Updates because the internal update routine will
- /// analyze the current state of the relationship between a pair of (From, To)
- /// BasicBlocks to determine whether a single update needs to be discarded.
- void applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates,
- bool ForceRemoveDuplicates = false);
+ ///@{
+ /// \name Mutation APIs
+ ///
+ /// These methods provide APIs for submitting updates to the DominatorTree and
+ /// the PostDominatorTree.
+ ///
+ /// Note: There are two strategies to update the DominatorTree and the
+ /// PostDominatorTree:
+ /// 1. Eager UpdateStrategy: Updates are submitted and then flushed
+ /// immediately.
+ /// 2. Lazy UpdateStrategy: Updates are submitted but only flushed when you
+ /// explicitly call Flush APIs. It is recommended to use this update strategy
+ /// when you submit a bunch of updates multiple times which can then
+ /// add up to a large number of updates between two queries on the
+ /// DominatorTree. The incremental updater can reschedule the updates or
+ /// decide to recalculate the dominator tree in order to speedup the updating
+ /// process depending on the number of updates.
+ ///
+ /// Although GenericDomTree provides several update primitives,
+ /// it is not encouraged to use these APIs directly.
- /// Notify all available trees on an edge insertion. If both DT and PDT are
- /// nullptrs, this function discards the update. Under either Strategy,
- /// self-dominance update will be removed. The Eager Strategy applies
- /// the update immediately while the Lazy Strategy queues the update.
- /// It is recommended to only use this method when you have exactly one
- /// insertion (and no deletions). It is recommended to use applyUpdates() in
- /// all other cases. This function has to be called *after* making the update
- /// on the actual CFG. An internal functions checks if the edge exists in the
+ /// Submit updates to all available trees.
+ /// The Eager Strategy flushes updates immediately while the Lazy Strategy
+ /// queues the updates.
+ ///
+ /// Note: The "existence" of an edge in a CFG refers to the CFG which DTU is
+ /// in sync with + all updates before that single update.
+ ///
+ /// CAUTION!
+ /// 1. It is required for the state of the LLVM IR to be updated
+ /// *before* submitting the updates because the internal update routine will
+ /// analyze the current state of the CFG to determine whether an update
+ /// is valid.
+ /// 2. It is illegal to submit any update that has already been submitted,
+ /// i.e., you are supposed not to insert an existent edge or delete a
+ /// nonexistent edge.
+ void applyUpdates(ArrayRef<DominatorTree::UpdateType> Updates);
+
+ /// Submit updates to all available trees. It will also
+ /// 1. discard duplicated updates,
+ /// 2. remove invalid updates. (Invalid updates means deletion of an edge that
+ /// still exists or insertion of an edge that does not exist.)
+ /// The Eager Strategy flushes updates immediately while the Lazy Strategy
+ /// queues the updates.
+ ///
+ /// Note: The "existence" of an edge in a CFG refers to the CFG which DTU is
+ /// in sync with + all updates before that single update.
+ ///
+ /// CAUTION!
+ /// 1. It is required for the state of the LLVM IR to be updated
+ /// *before* submitting the updates because the internal update routine will
+ /// analyze the current state of the CFG to determine whether an update
+ /// is valid.
+ /// 2. It is illegal to submit any update that has already been submitted,
+ /// i.e., you are supposed not to insert an existent edge or delete a
+ /// nonexistent edge.
+ /// 3. It is only legal to submit updates to an edge in the order CFG changes
+ /// are made. The order you submit updates on different edges is not
+ /// restricted.
+ void applyUpdatesPermissive(ArrayRef<DominatorTree::UpdateType> Updates);
+
+ /// Notify DTU that the entry block was replaced.
+ /// Recalculate all available trees and flush all BasicBlocks
+ /// awaiting deletion immediately.
+ void recalculate(Function &F);
+
+ /// \deprecated { Submit an edge insertion to all available trees. The Eager
+ /// Strategy flushes this update immediately while the Lazy Strategy queues
+ /// the update. An internal function checks if the edge exists in the CFG in
+ /// DEBUG mode. CAUTION! This function has to be called *after* making the
+ /// update on the actual CFG. It is illegal to submit any update that has
+ /// already been applied. }
+ LLVM_ATTRIBUTE_DEPRECATED(void insertEdge(BasicBlock *From, BasicBlock *To),
+ "Use applyUpdates() instead.");
+
+ /// \deprecated {Submit an edge insertion to all available trees.
+ /// Under either Strategy, an invalid update will be discard silently.
+ /// Invalid update means inserting an edge that does not exist in the CFG.
+ /// The Eager Strategy flushes this update immediately while the Lazy Strategy
+ /// queues the update. It is only recommended to use this method when you
+ /// want to discard an invalid update.
+ /// CAUTION! It is illegal to submit any update that has already been
+ /// submitted. }
+ LLVM_ATTRIBUTE_DEPRECATED(void insertEdgeRelaxed(BasicBlock *From,
+ BasicBlock *To),
+ "Use applyUpdatesPermissive() instead.");
+
+ /// \deprecated { Submit an edge deletion to all available trees. The Eager
+ /// Strategy flushes this update immediately while the Lazy Strategy queues
+ /// the update. An internal function checks if the edge doesn't exist in the
/// CFG in DEBUG mode.
- void insertEdge(BasicBlock *From, BasicBlock *To);
+ /// CAUTION! This function has to be called *after* making the update on the
+ /// actual CFG. It is illegal to submit any update that has already been
+ /// submitted. }
+ LLVM_ATTRIBUTE_DEPRECATED(void deleteEdge(BasicBlock *From, BasicBlock *To),
+ "Use applyUpdates() instead.");
- /// Notify all available trees on an edge insertion.
- /// Under either Strategy, the following updates will be discard silently
- /// 1. Invalid - Inserting an edge that does not exist in the CFG.
- /// 2. Self-dominance update.
- /// 3. Both DT and PDT are nullptrs.
- /// The Eager Strategy applies the update immediately while the Lazy Strategy
- /// queues the update. It is recommended to only use this method when you have
- /// exactly one insertion (and no deletions) and want to discard an invalid
- /// update.
- void insertEdgeRelaxed(BasicBlock *From, BasicBlock *To);
-
- /// Notify all available trees on an edge deletion. If both DT and PDT are
- /// nullptrs, this function discards the update. Under either Strategy,
- /// self-dominance update will be removed. The Eager Strategy applies
- /// the update immediately while the Lazy Strategy queues the update.
- /// It is recommended to only use this method when you have exactly one
- /// deletion (and no insertions). It is recommended to use applyUpdates() in
- /// all other cases. This function has to be called *after* making the update
- /// on the actual CFG. An internal functions checks if the edge doesn't exist
- /// in the CFG in DEBUG mode.
- void deleteEdge(BasicBlock *From, BasicBlock *To);
-
- /// Notify all available trees on an edge deletion.
- /// Under either Strategy, the following updates will be discard silently
- /// 1. Invalid - Deleting an edge that still exists in the CFG.
- /// 2. Self-dominance update.
- /// 3. Both DT and PDT are nullptrs.
- /// The Eager Strategy applies the update immediately while the Lazy Strategy
- /// queues the update. It is recommended to only use this method when you have
- /// exactly one deletion (and no insertions) and want to discard an invalid
- /// update.
- void deleteEdgeRelaxed(BasicBlock *From, BasicBlock *To);
+ /// \deprecated { Submit an edge deletion to all available trees.
+ /// Under either Strategy, an invalid update will be discard silently.
+ /// Invalid update means deleting an edge that exists in the CFG.
+ /// The Eager Strategy flushes this update immediately while the Lazy Strategy
+ /// queues the update. It is only recommended to use this method when you
+ /// want to discard an invalid update.
+ /// CAUTION! It is illegal to submit any update that has already been
+ /// submitted. }
+ LLVM_ATTRIBUTE_DEPRECATED(void deleteEdgeRelaxed(BasicBlock *From,
+ BasicBlock *To),
+ "Use applyUpdatesPermissive() instead.");
/// Delete DelBB. DelBB will be removed from its Parent and
/// erased from available trees if it exists and finally get deleted.
@@ -158,27 +211,32 @@
void callbackDeleteBB(BasicBlock *DelBB,
std::function<void(BasicBlock *)> Callback);
- /// Recalculate all available trees and flush all BasicBlocks
- /// awaiting deletion immediately.
- void recalculate(Function &F);
+ ///@}
+
+ ///@{
+ /// \name Flush APIs
+ ///
+ /// CAUTION! By the moment these flush APIs are called, the current CFG needs
+ /// to be the same as the CFG which DTU is in sync with + all updates
+ /// submitted.
/// Flush DomTree updates and return DomTree.
- /// It also flush out of date updates applied by all available trees
- /// and flush Deleted BBs if both trees are up-to-date.
+ /// It flushes Deleted BBs if both trees are up-to-date.
/// It must only be called when it has a DomTree.
DominatorTree &getDomTree();
/// Flush PostDomTree updates and return PostDomTree.
- /// It also flush out of date updates applied by all available trees
- /// and flush Deleted BBs if both trees are up-to-date.
+ /// It flushes Deleted BBs if both trees are up-to-date.
/// It must only be called when it has a PostDomTree.
PostDominatorTree &getPostDomTree();
/// Apply all pending updates to available trees and flush all BasicBlocks
/// awaiting deletion.
- /// Does nothing under Eager UpdateStrategy.
+
void flush();
+ ///@}
+
/// Debug method to help view the internal state of this class.
LLVM_DUMP_METHOD void dump() const;
@@ -220,11 +278,6 @@
/// Returns true if at least one BasicBlock is deleted.
bool forceFlushDeletedBB();
- /// Deduplicate and remove unnecessary updates (no-ops) when using Lazy
- /// UpdateStrategy. Returns true if the update is queued for update.
- bool applyLazyUpdate(DominatorTree::UpdateKind Kind, BasicBlock *From,
- BasicBlock *To);
-
/// Helper function to apply all pending DomTree updates.
void applyDomTreeUpdates();
diff --git a/linux-x64/clang/include/llvm/Analysis/GlobalsModRef.h b/linux-x64/clang/include/llvm/Analysis/GlobalsModRef.h
index 14b2097..d3fcfc2 100644
--- a/linux-x64/clang/include/llvm/Analysis/GlobalsModRef.h
+++ b/linux-x64/clang/include/llvm/Analysis/GlobalsModRef.h
@@ -84,10 +84,12 @@
//------------------------------------------------
// Implement the AliasAnalysis API
//
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
using AAResultBase::getModRefInfo;
- ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
/// getModRefBehavior - Return the behavior of the specified function if
/// called from the specified call site. The call site may be null in which
@@ -113,7 +115,7 @@
bool isNonEscapingGlobalNoAlias(const GlobalValue *GV, const Value *V);
ModRefInfo getModRefInfoForArgument(const CallBase *Call,
- const GlobalValue *GV);
+ const GlobalValue *GV, AAQueryInfo &AAQI);
};
/// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/linux-x64/clang/include/llvm/Analysis/IVDescriptors.h b/linux-x64/clang/include/llvm/Analysis/IVDescriptors.h
index 254cabf..7be1fd3 100644
--- a/linux-x64/clang/include/llvm/Analysis/IVDescriptors.h
+++ b/linux-x64/clang/include/llvm/Analysis/IVDescriptors.h
@@ -89,10 +89,12 @@
RecurrenceDescriptor() = default;
RecurrenceDescriptor(Value *Start, Instruction *Exit, RecurrenceKind K,
- MinMaxRecurrenceKind MK, Instruction *UAI, Type *RT,
- bool Signed, SmallPtrSetImpl<Instruction *> &CI)
- : StartValue(Start), LoopExitInstr(Exit), Kind(K), MinMaxKind(MK),
- UnsafeAlgebraInst(UAI), RecurrenceType(RT), IsSigned(Signed) {
+ FastMathFlags FMF, MinMaxRecurrenceKind MK,
+ Instruction *UAI, Type *RT, bool Signed,
+ SmallPtrSetImpl<Instruction *> &CI)
+ : StartValue(Start), LoopExitInstr(Exit), Kind(K), FMF(FMF),
+ MinMaxKind(MK), UnsafeAlgebraInst(UAI), RecurrenceType(RT),
+ IsSigned(Signed) {
CastInsts.insert(CI.begin(), CI.end());
}
@@ -198,6 +200,8 @@
MinMaxRecurrenceKind getMinMaxRecurrenceKind() { return MinMaxKind; }
+ FastMathFlags getFastMathFlags() { return FMF; }
+
TrackingVH<Value> getRecurrenceStartValue() { return StartValue; }
Instruction *getLoopExitInstr() { return LoopExitInstr; }
@@ -237,6 +241,9 @@
Instruction *LoopExitInstr = nullptr;
// The kind of the recurrence.
RecurrenceKind Kind = RK_NoRecurrence;
+ // The fast-math flags on the recurrent instructions. We propagate these
+ // fast-math flags into the vectorized FP instructions we generate.
+ FastMathFlags FMF;
// If this a min/max recurrence the kind of recurrence.
MinMaxRecurrenceKind MinMaxKind = MRK_Invalid;
// First occurrence of unasfe algebra in the PHI's use-chain.
@@ -308,12 +315,16 @@
/// not have the "fast-math" property. Such operation requires a relaxed FP
/// mode.
bool hasUnsafeAlgebra() {
- return InductionBinOp && !cast<FPMathOperator>(InductionBinOp)->isFast();
+ return (IK == IK_FpInduction) && InductionBinOp &&
+ !cast<FPMathOperator>(InductionBinOp)->isFast();
}
/// Returns induction operator that does not have "fast-math" property
/// and requires FP unsafe mode.
Instruction *getUnsafeAlgebraInst() {
+ if (IK != IK_FpInduction)
+ return nullptr;
+
if (!InductionBinOp || cast<FPMathOperator>(InductionBinOp)->isFast())
return nullptr;
return InductionBinOp;
diff --git a/linux-x64/clang/include/llvm/Analysis/InlineCost.h b/linux-x64/clang/include/llvm/Analysis/InlineCost.h
index e386093..611c9de 100644
--- a/linux-x64/clang/include/llvm/Analysis/InlineCost.h
+++ b/linux-x64/clang/include/llvm/Analysis/InlineCost.h
@@ -22,7 +22,7 @@
namespace llvm {
class AssumptionCacheTracker;
class BlockFrequencyInfo;
-class CallSite;
+class CallBase;
class DataLayout;
class Function;
class ProfileSummaryInfo;
@@ -67,10 +67,10 @@
};
/// The estimated cost of inlining this callsite.
- const int Cost;
+ int Cost;
/// The adjusted threshold against which this cost was computed.
- const int Threshold;
+ int Threshold;
/// Must be set for Always and Never instances.
const char *Reason = nullptr;
@@ -199,7 +199,7 @@
/// Return the cost associated with a callsite, including parameter passing
/// and the call/return instruction.
-int getCallsiteCost(CallSite CS, const DataLayout &DL);
+int getCallsiteCost(CallBase &Call, const DataLayout &DL);
/// Get an InlineCost object representing the cost of inlining this
/// callsite.
@@ -213,7 +213,7 @@
/// Also note that calling this function *dynamically* computes the cost of
/// inlining the callsite. It is an expensive, heavyweight call.
InlineCost getInlineCost(
- CallSite CS, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
+ CallBase &Call, const InlineParams &Params, TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
ProfileSummaryInfo *PSI, OptimizationRemarkEmitter *ORE = nullptr);
@@ -224,7 +224,7 @@
/// parameter in all other respects.
//
InlineCost
-getInlineCost(CallSite CS, Function *Callee, const InlineParams &Params,
+getInlineCost(CallBase &Call, Function *Callee, const InlineParams &Params,
TargetTransformInfo &CalleeTTI,
std::function<AssumptionCache &(Function &)> &GetAssumptionCache,
Optional<function_ref<BlockFrequencyInfo &(Function &)>> GetBFI,
diff --git a/linux-x64/clang/include/llvm/Analysis/InstructionPrecedenceTracking.h b/linux-x64/clang/include/llvm/Analysis/InstructionPrecedenceTracking.h
index eb72eac..3c39810 100644
--- a/linux-x64/clang/include/llvm/Analysis/InstructionPrecedenceTracking.h
+++ b/linux-x64/clang/include/llvm/Analysis/InstructionPrecedenceTracking.h
@@ -92,7 +92,7 @@
/// example, throwing calls and guards do not always do this. If we need to know
/// for sure that some instruction is guaranteed to execute if the given block
/// is reached, then we need to make sure that there is no implicit control flow
-/// instruction (ICFI) preceeding it. For example, this check is required if we
+/// instruction (ICFI) preceding it. For example, this check is required if we
/// perform PRE moving non-speculable instruction to other place.
class ImplicitControlFlowTracking : public InstructionPrecedenceTracking {
public:
diff --git a/linux-x64/clang/include/llvm/Analysis/InstructionSimplify.h b/linux-x64/clang/include/llvm/Analysis/InstructionSimplify.h
index a904043..531000b 100644
--- a/linux-x64/clang/include/llvm/Analysis/InstructionSimplify.h
+++ b/linux-x64/clang/include/llvm/Analysis/InstructionSimplify.h
@@ -117,6 +117,10 @@
// deprecated.
// Please use the SimplifyQuery versions in new code.
+/// Given operand for an FNeg, fold the result or return null.
+Value *SimplifyFNegInst(Value *Op, FastMathFlags FMF,
+ const SimplifyQuery &Q);
+
/// Given operands for an Add, fold the result or return null.
Value *SimplifyAddInst(Value *LHS, Value *RHS, bool isNSW, bool isNUW,
const SimplifyQuery &Q);
@@ -227,6 +231,15 @@
Value *SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
const SimplifyQuery &Q);
+/// Given operand for a UnaryOperator, fold the result or return null.
+Value *SimplifyUnOp(unsigned Opcode, Value *Op, const SimplifyQuery &Q);
+
+/// Given operand for an FP UnaryOperator, fold the result or return null.
+/// In contrast to SimplifyUnOp, try to use FastMathFlag when folding the
+/// result. In case we don't need FastMathFlags, simply fall to SimplifyUnOp.
+Value *SimplifyFPUnOp(unsigned Opcode, Value *Op, FastMathFlags FMF,
+ const SimplifyQuery &Q);
+
/// Given operands for a BinaryOperator, fold the result or return null.
Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
const SimplifyQuery &Q);
diff --git a/linux-x64/clang/include/llvm/Analysis/IteratedDominanceFrontier.h b/linux-x64/clang/include/llvm/Analysis/IteratedDominanceFrontier.h
index e7d19d1..1fa1db5 100644
--- a/linux-x64/clang/include/llvm/Analysis/IteratedDominanceFrontier.h
+++ b/linux-x64/clang/include/llvm/Analysis/IteratedDominanceFrontier.h
@@ -5,96 +5,86 @@
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
-/// \file
-/// Compute iterated dominance frontiers using a linear time algorithm.
-///
-/// The algorithm used here is based on:
-///
-/// Sreedhar and Gao. A linear time algorithm for placing phi-nodes.
-/// In Proceedings of the 22nd ACM SIGPLAN-SIGACT Symposium on Principles of
-/// Programming Languages
-/// POPL '95. ACM, New York, NY, 62-73.
-///
-/// It has been modified to not explicitly use the DJ graph data structure and
-/// to directly compute pruned SSA using per-variable liveness information.
-//
-//===----------------------------------------------------------------------===//
#ifndef LLVM_ANALYSIS_IDF_H
#define LLVM_ANALYSIS_IDF_H
-#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/SmallPtrSet.h"
-#include "llvm/ADT/SmallVector.h"
-#include "llvm/IR/BasicBlock.h"
#include "llvm/IR/CFGDiff.h"
-#include "llvm/IR/Dominators.h"
+#include "llvm/Support/GenericIteratedDominanceFrontier.h"
namespace llvm {
-/// Determine the iterated dominance frontier, given a set of defining
-/// blocks, and optionally, a set of live-in blocks.
-///
-/// In turn, the results can be used to place phi nodes.
-///
-/// This algorithm is a linear time computation of Iterated Dominance Frontiers,
-/// pruned using the live-in set.
-/// By default, liveness is not used to prune the IDF computation.
-/// The template parameters should be either BasicBlock* or Inverse<BasicBlock
-/// *>, depending on if you want the forward or reverse IDF.
-template <class NodeTy, bool IsPostDom>
-class IDFCalculator {
- public:
- IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT)
- : DT(DT), GD(nullptr), useLiveIn(false) {}
+class BasicBlock;
- IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT,
- const GraphDiff<BasicBlock *, IsPostDom> *GD)
- : DT(DT), GD(GD), useLiveIn(false) {}
+namespace IDFCalculatorDetail {
- /// Give the IDF calculator the set of blocks in which the value is
- /// defined. This is equivalent to the set of starting blocks it should be
- /// calculating the IDF for (though later gets pruned based on liveness).
- ///
- /// Note: This set *must* live for the entire lifetime of the IDF calculator.
- void setDefiningBlocks(const SmallPtrSetImpl<BasicBlock *> &Blocks) {
- DefBlocks = &Blocks;
- }
+/// Specialization for BasicBlock for the optional use of GraphDiff.
+template <bool IsPostDom> struct ChildrenGetterTy<BasicBlock, IsPostDom> {
+ using NodeRef = BasicBlock *;
+ using ChildrenTy = SmallVector<BasicBlock *, 8>;
- /// Give the IDF calculator the set of blocks in which the value is
- /// live on entry to the block. This is used to prune the IDF calculation to
- /// not include blocks where any phi insertion would be dead.
- ///
- /// Note: This set *must* live for the entire lifetime of the IDF calculator.
-
- void setLiveInBlocks(const SmallPtrSetImpl<BasicBlock *> &Blocks) {
- LiveInBlocks = &Blocks;
- useLiveIn = true;
+ ChildrenGetterTy() = default;
+ ChildrenGetterTy(const GraphDiff<BasicBlock *, IsPostDom> *GD) : GD(GD) {
+ assert(GD);
}
- /// Reset the live-in block set to be empty, and tell the IDF
- /// calculator to not use liveness anymore.
- void resetLiveInBlocks() {
- LiveInBlocks = nullptr;
- useLiveIn = false;
- }
+ ChildrenTy get(const NodeRef &N);
- /// Calculate iterated dominance frontiers
- ///
- /// This uses the linear-time phi algorithm based on DJ-graphs mentioned in
- /// the file-level comment. It performs DF->IDF pruning using the live-in
- /// set, to avoid computing the IDF for blocks where an inserted PHI node
- /// would be dead.
- void calculate(SmallVectorImpl<BasicBlock *> &IDFBlocks);
-
-private:
- DominatorTreeBase<BasicBlock, IsPostDom> &DT;
- const GraphDiff<BasicBlock *, IsPostDom> *GD;
- bool useLiveIn;
- const SmallPtrSetImpl<BasicBlock *> *LiveInBlocks;
- const SmallPtrSetImpl<BasicBlock *> *DefBlocks;
+ const GraphDiff<BasicBlock *, IsPostDom> *GD = nullptr;
};
-typedef IDFCalculator<BasicBlock *, false> ForwardIDFCalculator;
-typedef IDFCalculator<Inverse<BasicBlock *>, true> ReverseIDFCalculator;
+
+} // end of namespace IDFCalculatorDetail
+
+template <bool IsPostDom>
+class IDFCalculator final : public IDFCalculatorBase<BasicBlock, IsPostDom> {
+public:
+ using IDFCalculatorBase =
+ typename llvm::IDFCalculatorBase<BasicBlock, IsPostDom>;
+ using ChildrenGetterTy = typename IDFCalculatorBase::ChildrenGetterTy;
+
+ IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT)
+ : IDFCalculatorBase(DT) {}
+
+ IDFCalculator(DominatorTreeBase<BasicBlock, IsPostDom> &DT,
+ const GraphDiff<BasicBlock *, IsPostDom> *GD)
+ : IDFCalculatorBase(DT, ChildrenGetterTy(GD)) {
+ assert(GD);
+ }
+};
+
+using ForwardIDFCalculator = IDFCalculator<false>;
+using ReverseIDFCalculator = IDFCalculator<true>;
+
+//===----------------------------------------------------------------------===//
+// Implementation.
+//===----------------------------------------------------------------------===//
+
+namespace IDFCalculatorDetail {
+
+template <bool IsPostDom>
+typename ChildrenGetterTy<BasicBlock, IsPostDom>::ChildrenTy
+ChildrenGetterTy<BasicBlock, IsPostDom>::get(
+ const ChildrenGetterTy<BasicBlock, IsPostDom>::NodeRef &N) {
+
+ using OrderedNodeTy =
+ typename IDFCalculatorBase<BasicBlock, IsPostDom>::OrderedNodeTy;
+
+ if (!GD) {
+ auto Children = children<OrderedNodeTy>(N);
+ return {Children.begin(), Children.end()};
+ }
+
+ using SnapShotBBPairTy =
+ std::pair<const GraphDiff<BasicBlock *, IsPostDom> *, OrderedNodeTy>;
+
+ ChildrenTy Ret;
+ for (const auto &SnapShotBBPair : children<SnapShotBBPairTy>({GD, N}))
+ Ret.emplace_back(SnapShotBBPair.second);
+ return Ret;
}
+
+} // end of namespace IDFCalculatorDetail
+
+} // end of namespace llvm
+
#endif
diff --git a/linux-x64/clang/include/llvm/Analysis/LazyCallGraph.h b/linux-x64/clang/include/llvm/Analysis/LazyCallGraph.h
index 3286547..2d83929 100644
--- a/linux-x64/clang/include/llvm/Analysis/LazyCallGraph.h
+++ b/linux-x64/clang/include/llvm/Analysis/LazyCallGraph.h
@@ -38,6 +38,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/Optional.h"
#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
@@ -1082,12 +1083,26 @@
continue;
}
+ // The blockaddress constant expression is a weird special case, we can't
+ // generically walk its operands the way we do for all other constants.
if (BlockAddress *BA = dyn_cast<BlockAddress>(C)) {
- // The blockaddress constant expression is a weird special case, we
- // can't generically walk its operands the way we do for all other
- // constants.
- if (Visited.insert(BA->getFunction()).second)
- Worklist.push_back(BA->getFunction());
+ // If we've already visited the function referred to by the block
+ // address, we don't need to revisit it.
+ if (Visited.count(BA->getFunction()))
+ continue;
+
+ // If all of the blockaddress' users are instructions within the
+ // referred to function, we don't need to insert a cycle.
+ if (llvm::all_of(BA->users(), [&](User *U) {
+ if (Instruction *I = dyn_cast<Instruction>(U))
+ return I->getFunction() == BA->getFunction();
+ return false;
+ }))
+ continue;
+
+ // Otherwise we should go visit the referred to function.
+ Visited.insert(BA->getFunction());
+ Worklist.push_back(BA->getFunction());
continue;
}
diff --git a/linux-x64/clang/include/llvm/Analysis/Loads.h b/linux-x64/clang/include/llvm/Analysis/Loads.h
index 0440116..5df6bb0 100644
--- a/linux-x64/clang/include/llvm/Analysis/Loads.h
+++ b/linux-x64/clang/include/llvm/Analysis/Loads.h
@@ -25,7 +25,8 @@
/// Return true if this is always a dereferenceable pointer. If the context
/// instruction is specified perform context-sensitive analysis and return true
/// if the pointer is dereferenceable at the specified instruction.
-bool isDereferenceablePointer(const Value *V, const DataLayout &DL,
+bool isDereferenceablePointer(const Value *V, Type *Ty,
+ const DataLayout &DL,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr);
@@ -33,8 +34,8 @@
/// greater or equal than requested. If the context instruction is specified
/// performs context-sensitive analysis and returns true if the pointer is
/// dereferenceable at the specified instruction.
-bool isDereferenceableAndAlignedPointer(const Value *V, unsigned Align,
- const DataLayout &DL,
+bool isDereferenceableAndAlignedPointer(const Value *V, Type *Ty,
+ unsigned Align, const DataLayout &DL,
const Instruction *CtxI = nullptr,
const DominatorTree *DT = nullptr);
@@ -55,7 +56,20 @@
/// If it is not obviously safe to load from the specified pointer, we do a
/// quick local scan of the basic block containing ScanFrom, to determine if
/// the address is already accessed.
-bool isSafeToLoadUnconditionally(Value *V, unsigned Align,
+bool isSafeToLoadUnconditionally(Value *V, unsigned Align, APInt &Size,
+ const DataLayout &DL,
+ Instruction *ScanFrom = nullptr,
+ const DominatorTree *DT = nullptr);
+
+/// Return true if we know that executing a load from this value cannot trap.
+///
+/// If DT and ScanFrom are specified this method performs context-sensitive
+/// analysis and returns true if it is safe to load immediately before ScanFrom.
+///
+/// If it is not obviously safe to load from the specified pointer, we do a
+/// quick local scan of the basic block containing ScanFrom, to determine if
+/// the address is already accessed.
+bool isSafeToLoadUnconditionally(Value *V, Type *Ty, unsigned Align,
const DataLayout &DL,
Instruction *ScanFrom = nullptr,
const DominatorTree *DT = nullptr);
diff --git a/linux-x64/clang/include/llvm/Analysis/LoopAccessAnalysis.h b/linux-x64/clang/include/llvm/Analysis/LoopAccessAnalysis.h
index fa34afa..9e9aaa3 100644
--- a/linux-x64/clang/include/llvm/Analysis/LoopAccessAnalysis.h
+++ b/linux-x64/clang/include/llvm/Analysis/LoopAccessAnalysis.h
@@ -522,6 +522,11 @@
/// no memory dependence cycles.
bool canVectorizeMemory() const { return CanVecMem; }
+ /// Return true if there is a convergent operation in the loop. There may
+ /// still be reported runtime pointer checks that would be required, but it is
+ /// not legal to insert them.
+ bool hasConvergentOp() const { return HasConvergentOp; }
+
const RuntimePointerChecking *getRuntimePointerChecking() const {
return PtrRtChecking.get();
}
@@ -642,6 +647,7 @@
/// Cache the result of analyzeLoop.
bool CanVecMem;
+ bool HasConvergentOp;
/// Indicator that there are non vectorizable stores to a uniform address.
bool HasDependenceInvolvingLoopInvariantAddress;
diff --git a/linux-x64/clang/include/llvm/Analysis/LoopAnalysisManager.h b/linux-x64/clang/include/llvm/Analysis/LoopAnalysisManager.h
index b87d981..368a810 100644
--- a/linux-x64/clang/include/llvm/Analysis/LoopAnalysisManager.h
+++ b/linux-x64/clang/include/llvm/Analysis/LoopAnalysisManager.h
@@ -61,9 +61,6 @@
MemorySSA *MSSA;
};
-/// Enables memory ssa as a dependency for loop passes.
-extern cl::opt<bool> EnableMSSALoopDependency;
-
/// Extern template declaration for the analysis set for this IR unit.
extern template class AllAnalysesOn<Loop>;
diff --git a/linux-x64/clang/include/llvm/Analysis/LoopInfo.h b/linux-x64/clang/include/llvm/Analysis/LoopInfo.h
index 0899630..98b3129 100644
--- a/linux-x64/clang/include/llvm/Analysis/LoopInfo.h
+++ b/linux-x64/clang/include/llvm/Analysis/LoopInfo.h
@@ -54,8 +54,11 @@
class DominatorTree;
class LoopInfo;
class Loop;
+class InductionDescriptor;
class MDNode;
+class MemorySSAUpdater;
class PHINode;
+class ScalarEvolution;
class raw_ostream;
template <class N, bool IsPostDom> class DominatorTreeBase;
template <class N, class M> class LoopInfoBase;
@@ -198,9 +201,10 @@
}
/// True if terminator in the block can branch to another block that is
- /// outside of the current loop.
+ /// outside of the current loop. \p BB must be inside the loop.
bool isLoopExiting(const BlockT *BB) const {
assert(!isInvalid() && "Loop not in a valid state!");
+ assert(contains(BB) && "Exiting block must be part of the loop");
for (const auto &Succ : children<const BlockT *>(BB)) {
if (!contains(Succ))
return true;
@@ -275,7 +279,7 @@
BlockT *getUniqueExitBlock() const;
/// Edge type.
- typedef std::pair<const BlockT *, const BlockT *> Edge;
+ typedef std::pair<BlockT *, BlockT *> Edge;
/// Return all pairs of (_inside_block_,_outside_block_).
void getExitEdges(SmallVectorImpl<Edge> &ExitEdges) const;
@@ -308,6 +312,40 @@
LoopLatches.push_back(Pred);
}
+ /// Return all inner loops in the loop nest rooted by the loop in preorder,
+ /// with siblings in forward program order.
+ template <class Type>
+ static void getInnerLoopsInPreorder(const LoopT &L,
+ SmallVectorImpl<Type> &PreOrderLoops) {
+ SmallVector<LoopT *, 4> PreOrderWorklist;
+ PreOrderWorklist.append(L.rbegin(), L.rend());
+
+ while (!PreOrderWorklist.empty()) {
+ LoopT *L = PreOrderWorklist.pop_back_val();
+ // Sub-loops are stored in forward program order, but will process the
+ // worklist backwards so append them in reverse order.
+ PreOrderWorklist.append(L->rbegin(), L->rend());
+ PreOrderLoops.push_back(L);
+ }
+ }
+
+ /// Return all loops in the loop nest rooted by the loop in preorder, with
+ /// siblings in forward program order.
+ SmallVector<const LoopT *, 4> getLoopsInPreorder() const {
+ SmallVector<const LoopT *, 4> PreOrderLoops;
+ const LoopT *CurLoop = static_cast<const LoopT *>(this);
+ PreOrderLoops.push_back(CurLoop);
+ getInnerLoopsInPreorder(*CurLoop, PreOrderLoops);
+ return PreOrderLoops;
+ }
+ SmallVector<LoopT *, 4> getLoopsInPreorder() {
+ SmallVector<LoopT *, 4> PreOrderLoops;
+ LoopT *CurLoop = static_cast<LoopT *>(this);
+ PreOrderLoops.push_back(CurLoop);
+ getInnerLoopsInPreorder(*CurLoop, PreOrderLoops);
+ return PreOrderLoops;
+ }
+
//===--------------------------------------------------------------------===//
// APIs for updating loop information after changing the CFG
//
@@ -470,7 +508,7 @@
public:
LocRange() {}
- LocRange(DebugLoc Start) : Start(std::move(Start)), End(std::move(Start)) {}
+ LocRange(DebugLoc Start) : Start(Start), End(Start) {}
LocRange(DebugLoc Start, DebugLoc End)
: Start(std::move(Start)), End(std::move(End)) {}
@@ -498,7 +536,8 @@
/// If InsertPt is specified, it is the point to hoist instructions to.
/// If null, the terminator of the loop preheader is used.
bool makeLoopInvariant(Value *V, bool &Changed,
- Instruction *InsertPt = nullptr) const;
+ Instruction *InsertPt = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr) const;
/// If the given instruction is inside of the loop and it can be hoisted, do
/// so to make it trivially loop-invariant.
@@ -510,7 +549,8 @@
/// If null, the terminator of the loop preheader is used.
///
bool makeLoopInvariant(Instruction *I, bool &Changed,
- Instruction *InsertPt = nullptr) const;
+ Instruction *InsertPt = nullptr,
+ MemorySSAUpdater *MSSAU = nullptr) const;
/// Check to see if the loop has a canonical induction variable: an integer
/// recurrence that starts at 0 and increments by one each time through the
@@ -521,6 +561,170 @@
///
PHINode *getCanonicalInductionVariable() const;
+ /// Obtain the unique incoming and back edge. Return false if they are
+ /// non-unique or the loop is dead; otherwise, return true.
+ bool getIncomingAndBackEdge(BasicBlock *&Incoming,
+ BasicBlock *&Backedge) const;
+
+ /// Below are some utilities to get loop bounds and induction variable, and
+ /// check if a given phinode is an auxiliary induction variable, as well as
+ /// checking if the loop is canonical.
+ ///
+ /// Here is an example:
+ /// \code
+ /// for (int i = lb; i < ub; i+=step)
+ /// <loop body>
+ /// --- pseudo LLVMIR ---
+ /// beforeloop:
+ /// guardcmp = (lb < ub)
+ /// if (guardcmp) goto preheader; else goto afterloop
+ /// preheader:
+ /// loop:
+ /// i_1 = phi[{lb, preheader}, {i_2, latch}]
+ /// <loop body>
+ /// i_2 = i_1 + step
+ /// latch:
+ /// cmp = (i_2 < ub)
+ /// if (cmp) goto loop
+ /// exit:
+ /// afterloop:
+ /// \endcode
+ ///
+ /// - getBounds
+ /// - getInitialIVValue --> lb
+ /// - getStepInst --> i_2 = i_1 + step
+ /// - getStepValue --> step
+ /// - getFinalIVValue --> ub
+ /// - getCanonicalPredicate --> '<'
+ /// - getDirection --> Increasing
+ ///
+ /// - getInductionVariable --> i_1
+ /// - isAuxiliaryInductionVariable(x) --> true if x == i_1
+ /// - isCanonical --> false
+ struct LoopBounds {
+ /// Return the LoopBounds object if
+ /// - the given \p IndVar is an induction variable
+ /// - the initial value of the induction variable can be found
+ /// - the step instruction of the induction variable can be found
+ /// - the final value of the induction variable can be found
+ ///
+ /// Else None.
+ static Optional<Loop::LoopBounds> getBounds(const Loop &L, PHINode &IndVar,
+ ScalarEvolution &SE);
+
+ /// Get the initial value of the loop induction variable.
+ Value &getInitialIVValue() const { return InitialIVValue; }
+
+ /// Get the instruction that updates the loop induction variable.
+ Instruction &getStepInst() const { return StepInst; }
+
+ /// Get the step that the loop induction variable gets updated by in each
+ /// loop iteration. Return nullptr if not found.
+ Value *getStepValue() const { return StepValue; }
+
+ /// Get the final value of the loop induction variable.
+ Value &getFinalIVValue() const { return FinalIVValue; }
+
+ /// Return the canonical predicate for the latch compare instruction, if
+ /// able to be calcuated. Else BAD_ICMP_PREDICATE.
+ ///
+ /// A predicate is considered as canonical if requirements below are all
+ /// satisfied:
+ /// 1. The first successor of the latch branch is the loop header
+ /// If not, inverse the predicate.
+ /// 2. One of the operands of the latch comparison is StepInst
+ /// If not, and
+ /// - if the current calcuated predicate is not ne or eq, flip the
+ /// predicate.
+ /// - else if the loop is increasing, return slt
+ /// (notice that it is safe to change from ne or eq to sign compare)
+ /// - else if the loop is decreasing, return sgt
+ /// (notice that it is safe to change from ne or eq to sign compare)
+ ///
+ /// Here is an example when both (1) and (2) are not satisfied:
+ /// \code
+ /// loop.header:
+ /// %iv = phi [%initialiv, %loop.preheader], [%inc, %loop.header]
+ /// %inc = add %iv, %step
+ /// %cmp = slt %iv, %finaliv
+ /// br %cmp, %loop.exit, %loop.header
+ /// loop.exit:
+ /// \endcode
+ /// - The second successor of the latch branch is the loop header instead
+ /// of the first successor (slt -> sge)
+ /// - The first operand of the latch comparison (%cmp) is the IndVar (%iv)
+ /// instead of the StepInst (%inc) (sge -> sgt)
+ ///
+ /// The predicate would be sgt if both (1) and (2) are satisfied.
+ /// getCanonicalPredicate() returns sgt for this example.
+ /// Note: The IR is not changed.
+ ICmpInst::Predicate getCanonicalPredicate() const;
+
+ /// An enum for the direction of the loop
+ /// - for (int i = 0; i < ub; ++i) --> Increasing
+ /// - for (int i = ub; i > 0; --i) --> Descresing
+ /// - for (int i = x; i != y; i+=z) --> Unknown
+ enum class Direction { Increasing, Decreasing, Unknown };
+
+ /// Get the direction of the loop.
+ Direction getDirection() const;
+
+ private:
+ LoopBounds(const Loop &Loop, Value &I, Instruction &SI, Value *SV, Value &F,
+ ScalarEvolution &SE)
+ : L(Loop), InitialIVValue(I), StepInst(SI), StepValue(SV),
+ FinalIVValue(F), SE(SE) {}
+
+ const Loop &L;
+
+ // The initial value of the loop induction variable
+ Value &InitialIVValue;
+
+ // The instruction that updates the loop induction variable
+ Instruction &StepInst;
+
+ // The value that the loop induction variable gets updated by in each loop
+ // iteration
+ Value *StepValue;
+
+ // The final value of the loop induction variable
+ Value &FinalIVValue;
+
+ ScalarEvolution &SE;
+ };
+
+ /// Return the struct LoopBounds collected if all struct members are found,
+ /// else None.
+ Optional<LoopBounds> getBounds(ScalarEvolution &SE) const;
+
+ /// Return the loop induction variable if found, else return nullptr.
+ /// An instruction is considered as the loop induction variable if
+ /// - it is an induction variable of the loop; and
+ /// - it is used to determine the condition of the branch in the loop latch
+ ///
+ /// Note: the induction variable doesn't need to be canonical, i.e. starts at
+ /// zero and increments by one each time through the loop (but it can be).
+ PHINode *getInductionVariable(ScalarEvolution &SE) const;
+
+ /// Get the loop induction descriptor for the loop induction variable. Return
+ /// true if the loop induction variable is found.
+ bool getInductionDescriptor(ScalarEvolution &SE,
+ InductionDescriptor &IndDesc) const;
+
+ /// Return true if the given PHINode \p AuxIndVar is
+ /// - in the loop header
+ /// - not used outside of the loop
+ /// - incremented by a loop invariant step for each loop iteration
+ /// - step instruction opcode should be add or sub
+ /// Note: auxiliary induction variable is not required to be used in the
+ /// conditional branch in the loop latch. (but it can be)
+ bool isAuxiliaryInductionVariable(PHINode &AuxIndVar,
+ ScalarEvolution &SE) const;
+
+ /// Return true if the loop induction variable starts at zero and increments
+ /// by one each time through the loop.
+ bool isCanonical(ScalarEvolution &SE) const;
+
/// Return true if the Loop is in LCSSA form.
bool isLCSSAForm(DominatorTree &DT) const;
diff --git a/linux-x64/clang/include/llvm/Analysis/LoopInfoImpl.h b/linux-x64/clang/include/llvm/Analysis/LoopInfoImpl.h
index ad42508..6ff4837 100644
--- a/linux-x64/clang/include/llvm/Analysis/LoopInfoImpl.h
+++ b/linux-x64/clang/include/llvm/Analysis/LoopInfoImpl.h
@@ -587,16 +587,9 @@
// FIXME: If we change the order of LoopInfo we will want to remove the
// reverse here.
for (LoopT *RootL : reverse(*this)) {
- assert(PreOrderWorklist.empty() &&
- "Must start with an empty preorder walk worklist.");
- PreOrderWorklist.push_back(RootL);
- do {
- LoopT *L = PreOrderWorklist.pop_back_val();
- // Sub-loops are stored in forward program order, but will process the
- // worklist backwards so append them in reverse order.
- PreOrderWorklist.append(L->rbegin(), L->rend());
- PreOrderLoops.push_back(L);
- } while (!PreOrderWorklist.empty());
+ auto PreOrderLoopsInRootL = RootL->getLoopsInPreorder();
+ PreOrderLoops.append(PreOrderLoopsInRootL.begin(),
+ PreOrderLoopsInRootL.end());
}
return PreOrderLoops;
diff --git a/linux-x64/clang/include/llvm/Analysis/MemoryBuiltins.h b/linux-x64/clang/include/llvm/Analysis/MemoryBuiltins.h
index 49b87a4..49f9e58 100644
--- a/linux-x64/clang/include/llvm/Analysis/MemoryBuiltins.h
+++ b/linux-x64/clang/include/llvm/Analysis/MemoryBuiltins.h
@@ -18,6 +18,7 @@
#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/TargetFolder.h"
+#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/IRBuilder.h"
#include "llvm/IR/InstVisitor.h"
@@ -83,6 +84,15 @@
bool isAllocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
bool LookThroughBitCast = false);
+/// Tests if a value is a call or invoke to a library function that
+/// reallocates memory (e.g., realloc).
+bool isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI,
+ bool LookThroughBitCast = false);
+
+/// Tests if a function is a call or invoke to a library function that
+/// reallocates memory (e.g., realloc).
+bool isReallocLikeFn(const Function *F, const TargetLibraryInfo *TLI);
+
//===----------------------------------------------------------------------===//
// malloc Call Utility Functions.
//
@@ -134,6 +144,9 @@
// free Call Utility Functions.
//
+/// isLibFreeFunction - Returns true if the function is a builtin free()
+bool isLibFreeFunction(const Function *F, const LibFunc TLIFn);
+
/// isFreeCall - Returns non-null if the value is a call to the builtin free()
const CallInst *isFreeCall(const Value *I, const TargetLibraryInfo *TLI);
@@ -250,7 +263,7 @@
/// May create code to compute the result at run-time.
class ObjectSizeOffsetEvaluator
: public InstVisitor<ObjectSizeOffsetEvaluator, SizeOffsetEvalType> {
- using BuilderTy = IRBuilder<TargetFolder>;
+ using BuilderTy = IRBuilder<TargetFolder, IRBuilderCallbackInserter>;
using WeakEvalType = std::pair<WeakTrackingVH, WeakTrackingVH>;
using CacheMapTy = DenseMap<const Value *, WeakEvalType>;
using PtrSetTy = SmallPtrSet<const Value *, 8>;
@@ -264,6 +277,7 @@
CacheMapTy CacheMap;
PtrSetTy SeenVals;
ObjectSizeOpts EvalOpts;
+ SmallPtrSet<Instruction *, 8> InsertedInstructions;
SizeOffsetEvalType compute_(Value *V);
diff --git a/linux-x64/clang/include/llvm/Analysis/MemoryDependenceAnalysis.h b/linux-x64/clang/include/llvm/Analysis/MemoryDependenceAnalysis.h
index de57499..e2669c2 100644
--- a/linux-x64/clang/include/llvm/Analysis/MemoryDependenceAnalysis.h
+++ b/linux-x64/clang/include/llvm/Analysis/MemoryDependenceAnalysis.h
@@ -381,7 +381,8 @@
///
/// See the class comment for more details. It is illegal to call this on
/// non-memory instructions.
- MemDepResult getDependency(Instruction *QueryInst);
+ MemDepResult getDependency(Instruction *QueryInst,
+ OrderedBasicBlock *OBB = nullptr);
/// Perform a full dependency query for the specified call, returning the set
/// of blocks that the value is potentially live across.
@@ -447,14 +448,14 @@
BasicBlock::iterator ScanIt,
BasicBlock *BB,
Instruction *QueryInst = nullptr,
- unsigned *Limit = nullptr);
+ unsigned *Limit = nullptr,
+ OrderedBasicBlock *OBB = nullptr);
- MemDepResult getSimplePointerDependencyFrom(const MemoryLocation &MemLoc,
- bool isLoad,
- BasicBlock::iterator ScanIt,
- BasicBlock *BB,
- Instruction *QueryInst,
- unsigned *Limit = nullptr);
+ MemDepResult
+ getSimplePointerDependencyFrom(const MemoryLocation &MemLoc, bool isLoad,
+ BasicBlock::iterator ScanIt, BasicBlock *BB,
+ Instruction *QueryInst, unsigned *Limit,
+ OrderedBasicBlock *OBB);
/// This analysis looks for other loads and stores with invariant.group
/// metadata and the same pointer operand. Returns Unknown if it does not
diff --git a/linux-x64/clang/include/llvm/Analysis/MemorySSA.h b/linux-x64/clang/include/llvm/Analysis/MemorySSA.h
index fa92fd3..b7730be 100644
--- a/linux-x64/clang/include/llvm/Analysis/MemorySSA.h
+++ b/linux-x64/clang/include/llvm/Analysis/MemorySSA.h
@@ -104,6 +104,9 @@
namespace llvm {
+/// Enables memory ssa as a dependency for loop passes.
+extern cl::opt<bool> EnableMSSALoopDependency;
+
class Function;
class Instruction;
class MemoryAccess;
@@ -700,6 +703,11 @@
class MemorySSA {
public:
MemorySSA(Function &, AliasAnalysis *, DominatorTree *);
+
+ // MemorySSA must remain where it's constructed; Walkers it creates store
+ // pointers to it.
+ MemorySSA(MemorySSA &&) = delete;
+
~MemorySSA();
MemorySSAWalker *getWalker();
@@ -825,13 +833,13 @@
const MemoryUseOrDef *Template = nullptr);
private:
- class ClobberWalkerBase;
- class CachingWalker;
- class SkipSelfWalker;
+ template <class AliasAnalysisType> class ClobberWalkerBase;
+ template <class AliasAnalysisType> class CachingWalker;
+ template <class AliasAnalysisType> class SkipSelfWalker;
class OptimizeUses;
- CachingWalker *getWalkerImpl();
- void buildMemorySSA();
+ CachingWalker<AliasAnalysis> *getWalkerImpl();
+ void buildMemorySSA(BatchAAResults &BAA);
void optimizeUses();
void prepareForMoveTo(MemoryAccess *, BasicBlock *);
@@ -845,7 +853,8 @@
void markUnreachableAsLiveOnEntry(BasicBlock *BB);
bool dominatesUse(const MemoryAccess *, const MemoryAccess *) const;
MemoryPhi *createMemoryPhi(BasicBlock *BB);
- MemoryUseOrDef *createNewAccess(Instruction *,
+ template <typename AliasAnalysisType>
+ MemoryUseOrDef *createNewAccess(Instruction *, AliasAnalysisType *,
const MemoryUseOrDef *Template = nullptr);
MemoryAccess *findDominatingDef(BasicBlock *, enum InsertionPlace);
void placePHINodes(const SmallPtrSetImpl<BasicBlock *> &);
@@ -881,9 +890,9 @@
mutable DenseMap<const MemoryAccess *, unsigned long> BlockNumbering;
// Memory SSA building info
- std::unique_ptr<ClobberWalkerBase> WalkerBase;
- std::unique_ptr<CachingWalker> Walker;
- std::unique_ptr<SkipSelfWalker> SkipWalker;
+ std::unique_ptr<ClobberWalkerBase<AliasAnalysis>> WalkerBase;
+ std::unique_ptr<CachingWalker<AliasAnalysis>> Walker;
+ std::unique_ptr<SkipSelfWalker<AliasAnalysis>> SkipWalker;
unsigned NextID;
};
@@ -927,6 +936,9 @@
MemorySSA &getMSSA() { return *MSSA.get(); }
std::unique_ptr<MemorySSA> MSSA;
+
+ bool invalidate(Function &F, const PreservedAnalyses &PA,
+ FunctionAnalysisManager::Invalidator &Inv);
};
Result run(Function &F, FunctionAnalysisManager &AM);
@@ -1039,8 +1051,6 @@
/// the walker it uses or returns.
virtual void invalidateInfo(MemoryAccess *) {}
- virtual void verify(const MemorySSA *MSSA) { assert(MSSA == this->MSSA); }
-
protected:
friend class MemorySSA; // For updating MSSA pointer in MemorySSA move
// constructor.
@@ -1096,15 +1106,15 @@
assert(Access && "Tried to access past the end of our iterator");
// Go to the first argument for phis, and the defining access for everything
// else.
- if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Access))
+ if (const MemoryPhi *MP = dyn_cast<MemoryPhi>(Access))
return MP->getIncomingValue(ArgNo);
return cast<MemoryUseOrDef>(Access)->getDefiningAccess();
}
using BaseT::operator++;
- memoryaccess_def_iterator &operator++() {
+ memoryaccess_def_iterator_base &operator++() {
assert(Access && "Hit end of iterator");
- if (MemoryPhi *MP = dyn_cast<MemoryPhi>(Access)) {
+ if (const MemoryPhi *MP = dyn_cast<MemoryPhi>(Access)) {
if (++ArgNo >= MP->getNumIncomingValues()) {
ArgNo = 0;
Access = nullptr;
diff --git a/linux-x64/clang/include/llvm/Analysis/MemorySSAUpdater.h b/linux-x64/clang/include/llvm/Analysis/MemorySSAUpdater.h
index 58cf1cc..6467d41 100644
--- a/linux-x64/clang/include/llvm/Analysis/MemorySSAUpdater.h
+++ b/linux-x64/clang/include/llvm/Analysis/MemorySSAUpdater.h
@@ -105,7 +105,12 @@
/// Update the MemoryPhi in `To` to have a single incoming edge from `From`,
/// following a CFG change that replaced multiple edges (switch) with a direct
/// branch.
- void removeDuplicatePhiEdgesBetween(BasicBlock *From, BasicBlock *To);
+ void removeDuplicatePhiEdgesBetween(const BasicBlock *From,
+ const BasicBlock *To);
+ /// Update MemorySSA when inserting a unique backedge block for a loop.
+ void updatePhisWhenInsertingUniqueBackedgeBlock(BasicBlock *LoopHeader,
+ BasicBlock *LoopPreheader,
+ BasicBlock *BackedgeBlock);
/// Update MemorySSA after a loop was cloned, given the blocks in RPO order,
/// the exit blocks and a 1:1 mapping of all blocks and instructions
/// cloned. This involves duplicating all defs and uses in the cloned blocks
@@ -240,6 +245,16 @@
/// deleted after this call.
void removeBlocks(const SmallPtrSetImpl<BasicBlock *> &DeadBlocks);
+ /// Instruction I will be changed to an unreachable. Remove all accesses in
+ /// I's block that follow I (inclusive), and update the Phis in the blocks'
+ /// successors.
+ void changeToUnreachable(const Instruction *I);
+
+ /// Conditional branch BI is changed or replaced with an unconditional branch
+ /// to `To`. Update Phis in BI's successors to remove BI's BB.
+ void changeCondBranchToUnconditionalTo(const BranchInst *BI,
+ const BasicBlock *To);
+
/// Get handle on MemorySSA.
MemorySSA* getMemorySSA() const { return MSSA; }
@@ -261,6 +276,7 @@
MemoryAccess *recursePhi(MemoryAccess *Phi);
template <class RangeType>
MemoryAccess *tryRemoveTrivialPhi(MemoryPhi *Phi, RangeType &Operands);
+ void tryRemoveTrivialPhis(ArrayRef<WeakVH> UpdatedPHIs);
void fixupDefs(const SmallVectorImpl<WeakVH> &);
// Clone all uses and defs from BB to NewBB given a 1:1 map of all
// instructions and blocks cloned, and a map of MemoryPhi : Definition
@@ -271,8 +287,14 @@
// not necessarily be MemoryPhis themselves, they may be MemoryDefs. As such,
// the map is between MemoryPhis and MemoryAccesses, where the MemoryAccesses
// may be MemoryPhis or MemoryDefs and not MemoryUses.
+ // If CloneWasSimplified = true, the clone was exact. Otherwise, assume that
+ // the clone involved simplifications that may have: (1) turned a MemoryUse
+ // into an instruction that MemorySSA has no representation for, or (2) turned
+ // a MemoryDef into a MemoryUse or an instruction that MemorySSA has no
+ // representation for. No other cases are supported.
void cloneUsesAndDefs(BasicBlock *BB, BasicBlock *NewBB,
- const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap);
+ const ValueToValueMapTy &VMap, PhiToDefMap &MPhiMap,
+ bool CloneWasSimplified = false);
template <typename Iter>
void privateUpdateExitBlocksForClonedLoop(ArrayRef<BasicBlock *> ExitBlocks,
Iter ValuesBegin, Iter ValuesEnd,
diff --git a/linux-x64/clang/include/llvm/Analysis/ObjCARCAliasAnalysis.h b/linux-x64/clang/include/llvm/Analysis/ObjCARCAliasAnalysis.h
index ed15472..b4f4e5f 100644
--- a/linux-x64/clang/include/llvm/Analysis/ObjCARCAliasAnalysis.h
+++ b/linux-x64/clang/include/llvm/Analysis/ObjCARCAliasAnalysis.h
@@ -52,14 +52,17 @@
return false;
}
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
- bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool OrLocal);
using AAResultBase::getModRefBehavior;
FunctionModRefBehavior getModRefBehavior(const Function *F);
using AAResultBase::getModRefInfo;
- ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
};
/// Analysis pass providing a never-invalidated alias analysis result.
diff --git a/linux-x64/clang/include/llvm/Analysis/ObjCARCInstKind.h b/linux-x64/clang/include/llvm/Analysis/ObjCARCInstKind.h
index 9890d5f..dc6093a 100644
--- a/linux-x64/clang/include/llvm/Analysis/ObjCARCInstKind.h
+++ b/linux-x64/clang/include/llvm/Analysis/ObjCARCInstKind.h
@@ -74,6 +74,10 @@
/// passed a null pointer.
bool IsNoopOnNull(ARCInstKind Class);
+/// Test if the given class represents instructions which do nothing if
+/// passed a global variable.
+bool IsNoopOnGlobal(ARCInstKind Class);
+
/// Test if the given class represents instructions which are always safe
/// to mark with the "tail" keyword.
bool IsAlwaysTail(ARCInstKind Class);
diff --git a/linux-x64/clang/include/llvm/Analysis/OptimizationRemarkEmitter.h b/linux-x64/clang/include/llvm/Analysis/OptimizationRemarkEmitter.h
index a2b2955..7b84044 100644
--- a/linux-x64/clang/include/llvm/Analysis/OptimizationRemarkEmitter.h
+++ b/linux-x64/clang/include/llvm/Analysis/OptimizationRemarkEmitter.h
@@ -77,7 +77,7 @@
// remarks enabled. We can't currently check whether remarks are requested
// for the calling pass since that requires actually building the remark.
- if (F->getContext().getDiagnosticsOutputFile() ||
+ if (F->getContext().getRemarkStreamer() ||
F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled()) {
auto R = RemarkBuilder();
emit((DiagnosticInfoOptimizationBase &)R);
@@ -92,7 +92,7 @@
/// provide more context so that non-trivial false positives can be quickly
/// detected by the user.
bool allowExtraAnalysis(StringRef PassName) const {
- return (F->getContext().getDiagnosticsOutputFile() ||
+ return (F->getContext().getRemarkStreamer() ||
F->getContext().getDiagHandlerPtr()->isAnyRemarkEnabled(PassName));
}
diff --git a/linux-x64/clang/include/llvm/Analysis/OrderedBasicBlock.h b/linux-x64/clang/include/llvm/Analysis/OrderedBasicBlock.h
index 6823f68..ae64c01 100644
--- a/linux-x64/clang/include/llvm/Analysis/OrderedBasicBlock.h
+++ b/linux-x64/clang/include/llvm/Analysis/OrderedBasicBlock.h
@@ -59,6 +59,14 @@
/// only relevant to compare relative instructions positions inside \p BB.
/// Returns false for A == B.
bool dominates(const Instruction *A, const Instruction *B);
+
+ /// Remove \p from the ordering, if it is present.
+ void eraseInstruction(const Instruction *I);
+
+ /// Replace \p Old with \p New in the ordering. \p New is assigned the
+ /// numbering of \p Old, so it must be inserted at the same position in the
+ /// IR.
+ void replaceInstruction(const Instruction *Old, const Instruction *New);
};
} // End llvm namespace
diff --git a/linux-x64/clang/include/llvm/Analysis/ProfileSummaryInfo.h b/linux-x64/clang/include/llvm/Analysis/ProfileSummaryInfo.h
index 636b5d6..f309d34 100644
--- a/linux-x64/clang/include/llvm/Analysis/ProfileSummaryInfo.h
+++ b/linux-x64/clang/include/llvm/Analysis/ProfileSummaryInfo.h
@@ -73,6 +73,12 @@
Summary->getKind() == ProfileSummary::PSK_Instr;
}
+ /// Returns true if module \c M has context sensitive instrumentation profile.
+ bool hasCSInstrumentationProfile() {
+ return hasProfileSummary() &&
+ Summary->getKind() == ProfileSummary::PSK_CSInstr;
+ }
+
/// Handle the invalidation of this information.
///
/// When used as a result of \c ProfileSummaryAnalysis this method will be
@@ -86,7 +92,8 @@
/// Returns the profile count for \p CallInst.
Optional<uint64_t> getProfileCount(const Instruction *CallInst,
- BlockFrequencyInfo *BFI);
+ BlockFrequencyInfo *BFI,
+ bool AllowSynthetic = false);
/// Returns true if the working set size of the code is considered huge.
bool hasHugeWorkingSetSize();
/// Returns true if \p F has hot function entry.
diff --git a/linux-x64/clang/include/llvm/Analysis/PtrUseVisitor.h b/linux-x64/clang/include/llvm/Analysis/PtrUseVisitor.h
index dca8718..fbf04c8 100644
--- a/linux-x64/clang/include/llvm/Analysis/PtrUseVisitor.h
+++ b/linux-x64/clang/include/llvm/Analysis/PtrUseVisitor.h
@@ -256,6 +256,10 @@
enqueueUsers(BC);
}
+ void visitAddrSpaceCastInst(AddrSpaceCastInst &ASC) {
+ enqueueUsers(ASC);
+ }
+
void visitPtrToIntInst(PtrToIntInst &I) {
PI.setEscaped(&I);
}
diff --git a/linux-x64/clang/include/llvm/Analysis/ScalarEvolution.h b/linux-x64/clang/include/llvm/Analysis/ScalarEvolution.h
index f3a0351..0bd98ef 100644
--- a/linux-x64/clang/include/llvm/Analysis/ScalarEvolution.h
+++ b/linux-x64/clang/include/llvm/Analysis/ScalarEvolution.h
@@ -537,7 +537,7 @@
const SCEV *getConstant(ConstantInt *V);
const SCEV *getConstant(const APInt &Val);
const SCEV *getConstant(Type *Ty, uint64_t V, bool isSigned = false);
- const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty);
+ const SCEV *getTruncateExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
const SCEV *getZeroExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
const SCEV *getSignExtendExpr(const SCEV *Op, Type *Ty, unsigned Depth = 0);
const SCEV *getAnyExtendExpr(const SCEV *Op, Type *Ty);
@@ -598,6 +598,8 @@
/// \p IndexExprs The expressions for the indices.
const SCEV *getGEPExpr(GEPOperator *GEP,
const SmallVectorImpl<const SCEV *> &IndexExprs);
+ const SCEV *getMinMaxExpr(unsigned Kind,
+ SmallVectorImpl<const SCEV *> &Operands);
const SCEV *getSMaxExpr(const SCEV *LHS, const SCEV *RHS);
const SCEV *getSMaxExpr(SmallVectorImpl<const SCEV *> &Operands);
const SCEV *getUMaxExpr(const SCEV *LHS, const SCEV *RHS);
@@ -635,11 +637,13 @@
/// Return a SCEV corresponding to a conversion of the input value to the
/// specified type. If the type must be extended, it is zero extended.
- const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty);
+ const SCEV *getTruncateOrZeroExtend(const SCEV *V, Type *Ty,
+ unsigned Depth = 0);
/// Return a SCEV corresponding to a conversion of the input value to the
/// specified type. If the type must be extended, it is sign extended.
- const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty);
+ const SCEV *getTruncateOrSignExtend(const SCEV *V, Type *Ty,
+ unsigned Depth = 0);
/// Return a SCEV corresponding to a conversion of the input value to the
/// specified type. If the type must be extended, it is zero extended. The
@@ -742,9 +746,12 @@
unsigned getSmallConstantTripMultiple(const Loop *L,
BasicBlock *ExitingBlock);
- /// Get the expression for the number of loop iterations for which this loop
- /// is guaranteed not to exit via ExitingBlock. Otherwise return
- /// SCEVCouldNotCompute.
+ /// Return the number of times the backedge executes before the given exit
+ /// would be taken; if not exactly computable, return SCEVCouldNotCompute.
+ /// For a single exit loop, this value is equivelent to the result of
+ /// getBackedgeTakenCount. The loop is guaranteed to exit (via *some* exit)
+ /// before the backedge is executed (ExitCount + 1) times. Note that there
+ /// is no guarantee about *which* exit is taken on the exiting iteration.
const SCEV *getExitCount(const Loop *L, BasicBlock *ExitingBlock);
/// If the specified loop has a predictable backedge-taken count, return it,
@@ -780,6 +787,13 @@
/// backedge-taken count.
bool hasLoopInvariantBackedgeTakenCount(const Loop *L);
+ // This method should be called by the client when it made any change that
+ // would invalidate SCEV's answers, and the client wants to remove all loop
+ // information held internally by ScalarEvolution. This is intended to be used
+ // when the alternative to forget a loop is too expensive (i.e. large loop
+ // bodies).
+ void forgetAllLoops();
+
/// This method should be called by the client when it has changed a loop in
/// a way that may effect ScalarEvolution's ability to compute a trip count,
/// or if the loop is deleted. This call is potentially expensive for large
@@ -1869,6 +1883,16 @@
/// Assign A and B to LHS and RHS, respectively.
bool matchURem(const SCEV *Expr, const SCEV *&LHS, const SCEV *&RHS);
+ /// Look for a SCEV expression with type `SCEVType` and operands `Ops` in
+ /// `UniqueSCEVs`.
+ ///
+ /// The first component of the returned tuple is the SCEV if found and null
+ /// otherwise. The second component is the `FoldingSetNodeID` that was
+ /// constructed to look up the SCEV and the third component is the insertion
+ /// point.
+ std::tuple<const SCEV *, FoldingSetNodeID, void *>
+ findExistingSCEVInCache(int SCEVType, ArrayRef<const SCEV *> Ops);
+
FoldingSet<SCEV> UniqueSCEVs;
FoldingSet<SCEVPredicate> UniquePreds;
BumpPtrAllocator SCEVAllocator;
diff --git a/linux-x64/clang/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h b/linux-x64/clang/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
index d417850..98d5323 100644
--- a/linux-x64/clang/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
+++ b/linux-x64/clang/include/llvm/Analysis/ScalarEvolutionAliasAnalysis.h
@@ -30,7 +30,8 @@
explicit SCEVAAResult(ScalarEvolution &SE) : AAResultBase(), SE(SE) {}
SCEVAAResult(SCEVAAResult &&Arg) : AAResultBase(std::move(Arg)), SE(Arg.SE) {}
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
private:
Value *GetBaseValue(const SCEV *S);
diff --git a/linux-x64/clang/include/llvm/Analysis/ScalarEvolutionExpander.h b/linux-x64/clang/include/llvm/Analysis/ScalarEvolutionExpander.h
index a2c1258..a519f93 100644
--- a/linux-x64/clang/include/llvm/Analysis/ScalarEvolutionExpander.h
+++ b/linux-x64/clang/include/llvm/Analysis/ScalarEvolutionExpander.h
@@ -315,8 +315,10 @@
SmallPtrSetImpl<const SCEV *> &Processed);
/// Insert the specified binary operator, doing a small amount of work to
- /// avoid inserting an obviously redundant operation.
- Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS);
+ /// avoid inserting an obviously redundant operation, and hoisting to an
+ /// outer loop when the opportunity is there and it is safe.
+ Value *InsertBinop(Instruction::BinaryOps Opcode, Value *LHS, Value *RHS,
+ SCEV::NoWrapFlags Flags, bool IsSafeToHoist);
/// Arrange for there to be a cast of V to Ty at IP, reusing an existing
/// cast if a suitable one exists, moving an existing cast if a suitable one
@@ -367,6 +369,10 @@
Value *visitUMaxExpr(const SCEVUMaxExpr *S);
+ Value *visitSMinExpr(const SCEVSMinExpr *S);
+
+ Value *visitUMinExpr(const SCEVUMinExpr *S);
+
Value *visitUnknown(const SCEVUnknown *S) {
return S->getValue();
}
diff --git a/linux-x64/clang/include/llvm/Analysis/ScalarEvolutionExpressions.h b/linux-x64/clang/include/llvm/Analysis/ScalarEvolutionExpressions.h
index e187a96..d008af7 100644
--- a/linux-x64/clang/include/llvm/Analysis/ScalarEvolutionExpressions.h
+++ b/linux-x64/clang/include/llvm/Analysis/ScalarEvolutionExpressions.h
@@ -39,7 +39,7 @@
// These should be ordered in terms of increasing complexity to make the
// folders simpler.
scConstant, scTruncate, scZeroExtend, scSignExtend, scAddExpr, scMulExpr,
- scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr,
+ scUDivExpr, scAddRecExpr, scUMaxExpr, scSMaxExpr, scUMinExpr, scSMinExpr,
scUnknown, scCouldNotCompute
};
@@ -190,10 +190,9 @@
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const SCEV *S) {
- return S->getSCEVType() == scAddExpr ||
- S->getSCEVType() == scMulExpr ||
- S->getSCEVType() == scSMaxExpr ||
- S->getSCEVType() == scUMaxExpr ||
+ return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
+ S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr ||
+ S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr ||
S->getSCEVType() == scAddRecExpr;
}
};
@@ -208,10 +207,9 @@
public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const SCEV *S) {
- return S->getSCEVType() == scAddExpr ||
- S->getSCEVType() == scMulExpr ||
- S->getSCEVType() == scSMaxExpr ||
- S->getSCEVType() == scUMaxExpr;
+ return S->getSCEVType() == scAddExpr || S->getSCEVType() == scMulExpr ||
+ S->getSCEVType() == scSMaxExpr || S->getSCEVType() == scUMaxExpr ||
+ S->getSCEVType() == scSMinExpr || S->getSCEVType() == scUMinExpr;
}
/// Set flags for a non-recurrence without clearing previously set flags.
@@ -366,18 +364,54 @@
}
};
- /// This class represents a signed maximum selection.
- class SCEVSMaxExpr : public SCEVCommutativeExpr {
+ /// This node is the base class min/max selections.
+ class SCEVMinMaxExpr : public SCEVCommutativeExpr {
friend class ScalarEvolution;
- SCEVSMaxExpr(const FoldingSetNodeIDRef ID,
- const SCEV *const *O, size_t N)
- : SCEVCommutativeExpr(ID, scSMaxExpr, O, N) {
- // Max never overflows.
+ static bool isMinMaxType(enum SCEVTypes T) {
+ return T == scSMaxExpr || T == scUMaxExpr || T == scSMinExpr ||
+ T == scUMinExpr;
+ }
+
+ protected:
+ /// Note: Constructing subclasses via this constructor is allowed
+ SCEVMinMaxExpr(const FoldingSetNodeIDRef ID, enum SCEVTypes T,
+ const SCEV *const *O, size_t N)
+ : SCEVCommutativeExpr(ID, T, O, N) {
+ assert(isMinMaxType(T));
+ // Min and max never overflow
setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW));
}
public:
+ static bool classof(const SCEV *S) {
+ return isMinMaxType(static_cast<SCEVTypes>(S->getSCEVType()));
+ }
+
+ static enum SCEVTypes negate(enum SCEVTypes T) {
+ switch (T) {
+ case scSMaxExpr:
+ return scSMinExpr;
+ case scSMinExpr:
+ return scSMaxExpr;
+ case scUMaxExpr:
+ return scUMinExpr;
+ case scUMinExpr:
+ return scUMaxExpr;
+ default:
+ llvm_unreachable("Not a min or max SCEV type!");
+ }
+ }
+ };
+
+ /// This class represents a signed maximum selection.
+ class SCEVSMaxExpr : public SCEVMinMaxExpr {
+ friend class ScalarEvolution;
+
+ SCEVSMaxExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
+ : SCEVMinMaxExpr(ID, scSMaxExpr, O, N) {}
+
+ public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
static bool classof(const SCEV *S) {
return S->getSCEVType() == scSMaxExpr;
@@ -385,15 +419,11 @@
};
/// This class represents an unsigned maximum selection.
- class SCEVUMaxExpr : public SCEVCommutativeExpr {
+ class SCEVUMaxExpr : public SCEVMinMaxExpr {
friend class ScalarEvolution;
- SCEVUMaxExpr(const FoldingSetNodeIDRef ID,
- const SCEV *const *O, size_t N)
- : SCEVCommutativeExpr(ID, scUMaxExpr, O, N) {
- // Max never overflows.
- setNoWrapFlags((NoWrapFlags)(FlagNUW | FlagNSW));
- }
+ SCEVUMaxExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
+ : SCEVMinMaxExpr(ID, scUMaxExpr, O, N) {}
public:
/// Methods for support type inquiry through isa, cast, and dyn_cast:
@@ -402,6 +432,34 @@
}
};
+ /// This class represents a signed minimum selection.
+ class SCEVSMinExpr : public SCEVMinMaxExpr {
+ friend class ScalarEvolution;
+
+ SCEVSMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
+ : SCEVMinMaxExpr(ID, scSMinExpr, O, N) {}
+
+ public:
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const SCEV *S) {
+ return S->getSCEVType() == scSMinExpr;
+ }
+ };
+
+ /// This class represents an unsigned minimum selection.
+ class SCEVUMinExpr : public SCEVMinMaxExpr {
+ friend class ScalarEvolution;
+
+ SCEVUMinExpr(const FoldingSetNodeIDRef ID, const SCEV *const *O, size_t N)
+ : SCEVMinMaxExpr(ID, scUMinExpr, O, N) {}
+
+ public:
+ /// Methods for support type inquiry through isa, cast, and dyn_cast:
+ static bool classof(const SCEV *S) {
+ return S->getSCEVType() == scUMinExpr;
+ }
+ };
+
/// This means that we are dealing with an entirely unknown SCEV
/// value, and only represent it as its LLVM Value. This is the
/// "bottom" value for the analysis.
@@ -474,6 +532,10 @@
return ((SC*)this)->visitSMaxExpr((const SCEVSMaxExpr*)S);
case scUMaxExpr:
return ((SC*)this)->visitUMaxExpr((const SCEVUMaxExpr*)S);
+ case scSMinExpr:
+ return ((SC *)this)->visitSMinExpr((const SCEVSMinExpr *)S);
+ case scUMinExpr:
+ return ((SC *)this)->visitUMinExpr((const SCEVUMinExpr *)S);
case scUnknown:
return ((SC*)this)->visitUnknown((const SCEVUnknown*)S);
case scCouldNotCompute:
@@ -527,6 +589,8 @@
case scMulExpr:
case scSMaxExpr:
case scUMaxExpr:
+ case scSMinExpr:
+ case scUMinExpr:
case scAddRecExpr:
for (const auto *Op : cast<SCEVNAryExpr>(S)->operands())
push(Op);
@@ -689,6 +753,26 @@
return !Changed ? Expr : SE.getUMaxExpr(Operands);
}
+ const SCEV *visitSMinExpr(const SCEVSMinExpr *Expr) {
+ SmallVector<const SCEV *, 2> Operands;
+ bool Changed = false;
+ for (auto *Op : Expr->operands()) {
+ Operands.push_back(((SC *)this)->visit(Op));
+ Changed |= Op != Operands.back();
+ }
+ return !Changed ? Expr : SE.getSMinExpr(Operands);
+ }
+
+ const SCEV *visitUMinExpr(const SCEVUMinExpr *Expr) {
+ SmallVector<const SCEV *, 2> Operands;
+ bool Changed = false;
+ for (auto *Op : Expr->operands()) {
+ Operands.push_back(((SC *)this)->visit(Op));
+ Changed |= Op != Operands.back();
+ }
+ return !Changed ? Expr : SE.getUMinExpr(Operands);
+ }
+
const SCEV *visitUnknown(const SCEVUnknown *Expr) {
return Expr;
}
diff --git a/linux-x64/clang/include/llvm/Analysis/ScopedNoAliasAA.h b/linux-x64/clang/include/llvm/Analysis/ScopedNoAliasAA.h
index 94c6021..dae733b 100644
--- a/linux-x64/clang/include/llvm/Analysis/ScopedNoAliasAA.h
+++ b/linux-x64/clang/include/llvm/Analysis/ScopedNoAliasAA.h
@@ -39,9 +39,12 @@
return false;
}
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
- ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
- ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI);
private:
bool mayAliasInScopes(const MDNode *Scopes, const MDNode *NoAlias) const;
diff --git a/linux-x64/clang/include/llvm/Analysis/TargetFolder.h b/linux-x64/clang/include/llvm/Analysis/TargetFolder.h
index b8047a4..7ab6562 100644
--- a/linux-x64/clang/include/llvm/Analysis/TargetFolder.h
+++ b/linux-x64/clang/include/llvm/Analysis/TargetFolder.h
@@ -124,6 +124,10 @@
return Fold(ConstantExpr::getNot(C));
}
+ Constant *CreateUnOp(Instruction::UnaryOps Opc, Constant *C) const {
+ return Fold(ConstantExpr::get(Opc, C));
+ }
+
//===--------------------------------------------------------------------===//
// Memory Instructions
//===--------------------------------------------------------------------===//
diff --git a/linux-x64/clang/include/llvm/Analysis/TargetLibraryInfo.def b/linux-x64/clang/include/llvm/Analysis/TargetLibraryInfo.def
index f73c69e..afed404 100644
--- a/linux-x64/clang/include/llvm/Analysis/TargetLibraryInfo.def
+++ b/linux-x64/clang/include/llvm/Analysis/TargetLibraryInfo.def
@@ -11,6 +11,15 @@
// Which is defined depends on whether TLI_DEFINE_ENUM is defined or
// TLI_DEFINE_STRING is defined. Only one should be defined at a time.
+// NOTE: The nofree attribute is added to Libfuncs which are not
+// listed as free or realloc functions in MemoryBuiltins.cpp
+//
+// When adding a function which frees memory include the LibFunc
+// in lib/Analysis/MemoryBuiltins.cpp "isLibFreeFunction".
+//
+// When adding a LibFunc which reallocates memory include the LibFunc
+// in lib/Analysis/MemoryBuiltins.cpp "AllocationFnData[]".
+
#if !(defined(TLI_DEFINE_ENUM) || defined(TLI_DEFINE_STRING))
#error "Must define TLI_DEFINE_ENUM or TLI_DEFINE_STRING for TLI .def."
#elif defined(TLI_DEFINE_ENUM) && defined(TLI_DEFINE_STRING)
@@ -329,6 +338,10 @@
/// long double __logl_finite(long double x);
TLI_DEFINE_ENUM_INTERNAL(logl_finite)
TLI_DEFINE_STRING_INTERNAL("__logl_finite")
+/// void *__memccpy_chk(void *dst, const void *src, int c, size_t n,
+/// size_t dstsize)
+TLI_DEFINE_ENUM_INTERNAL(memccpy_chk)
+TLI_DEFINE_STRING_INTERNAL("__memccpy_chk")
/// void *__memcpy_chk(void *s1, const void *s2, size_t n, size_t s1size);
TLI_DEFINE_ENUM_INTERNAL(memcpy_chk)
TLI_DEFINE_STRING_INTERNAL("__memcpy_chk")
@@ -372,6 +385,23 @@
/// float __sinpif(float x);
TLI_DEFINE_ENUM_INTERNAL(sinpif)
TLI_DEFINE_STRING_INTERNAL("__sinpif")
+/// int __small_fprintf(FILE *stream, const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(small_fprintf)
+TLI_DEFINE_STRING_INTERNAL("__small_fprintf")
+/// int __small_printf(const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(small_printf)
+TLI_DEFINE_STRING_INTERNAL("__small_printf")
+/// int __small_sprintf(char *str, const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(small_sprintf)
+TLI_DEFINE_STRING_INTERNAL("__small_sprintf")
+/// int __snprintf_chk(char *s, size_t n, int flags, size_t slen,
+/// const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(snprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__snprintf_chk")
+/// int __sprintf_chk(char *str, int flags, size_t str_len,
+/// const char *format, ...);
+TLI_DEFINE_ENUM_INTERNAL(sprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__sprintf_chk")
/// double __sqrt_finite(double x);
TLI_DEFINE_ENUM_INTERNAL(sqrt_finite)
TLI_DEFINE_STRING_INTERNAL("__sqrt_finite")
@@ -387,12 +417,26 @@
/// char *__stpncpy_chk(char *s1, const char *s2, size_t n, size_t s1size);
TLI_DEFINE_ENUM_INTERNAL(stpncpy_chk)
TLI_DEFINE_STRING_INTERNAL("__stpncpy_chk")
+/// char *__strcat_chk(char *s1, const char *s2, size_t s1size);
+TLI_DEFINE_ENUM_INTERNAL(strcat_chk)
+TLI_DEFINE_STRING_INTERNAL("__strcat_chk")
/// char *__strcpy_chk(char *s1, const char *s2, size_t s1size);
TLI_DEFINE_ENUM_INTERNAL(strcpy_chk)
TLI_DEFINE_STRING_INTERNAL("__strcpy_chk")
/// char * __strdup(const char *s);
TLI_DEFINE_ENUM_INTERNAL(dunder_strdup)
TLI_DEFINE_STRING_INTERNAL("__strdup")
+/// size_t __strlcat_chk(char *dst, const char *src, size_t size,
+/// size_t dstsize);
+TLI_DEFINE_ENUM_INTERNAL(strlcat_chk)
+TLI_DEFINE_STRING_INTERNAL("__strlcat_chk")
+/// size_t __strlcpy_chk(char *dst, const char *src, size_t size,
+/// size_t dstsize);
+TLI_DEFINE_ENUM_INTERNAL(strlcpy_chk)
+TLI_DEFINE_STRING_INTERNAL("__strlcpy_chk")
+/// char *strncat_chk(char *s1, const char *s2, size_t n, size_t s1size);
+TLI_DEFINE_ENUM_INTERNAL(strncat_chk)
+TLI_DEFINE_STRING_INTERNAL("__strncat_chk")
/// char *__strncpy_chk(char *s1, const char *s2, size_t n, size_t s1size);
TLI_DEFINE_ENUM_INTERNAL(strncpy_chk)
TLI_DEFINE_STRING_INTERNAL("__strncpy_chk")
@@ -402,6 +446,14 @@
/// char * __strtok_r(char *s, const char *delim, char **save_ptr);
TLI_DEFINE_ENUM_INTERNAL(dunder_strtok_r)
TLI_DEFINE_STRING_INTERNAL("__strtok_r")
+/// int __vsnprintf_chk(char *s, size_t n, int flags, size_t slen,
+/// const char *format, va_list ap);
+TLI_DEFINE_ENUM_INTERNAL(vsnprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__vsnprintf_chk")
+/// int __vsprintf_chk(char *s, int flags, size_t slen, const char *format,
+/// va_list ap);
+TLI_DEFINE_ENUM_INTERNAL(vsprintf_chk)
+TLI_DEFINE_STRING_INTERNAL("__vsprintf_chk")
/// int abs(int j);
TLI_DEFINE_ENUM_INTERNAL(abs)
TLI_DEFINE_STRING_INTERNAL("abs")
@@ -1191,6 +1243,12 @@
/// char *strdup(const char *s1);
TLI_DEFINE_ENUM_INTERNAL(strdup)
TLI_DEFINE_STRING_INTERNAL("strdup")
+/// size_t strlcat(char *dst, const char *src, size_t size);
+TLI_DEFINE_ENUM_INTERNAL(strlcat)
+TLI_DEFINE_STRING_INTERNAL("strlcat")
+/// size_t strlcpy(char *dst, const char *src, size_t size);
+TLI_DEFINE_ENUM_INTERNAL(strlcpy)
+TLI_DEFINE_STRING_INTERNAL("strlcpy")
/// size_t strlen(const char *s);
TLI_DEFINE_ENUM_INTERNAL(strlen)
TLI_DEFINE_STRING_INTERNAL("strlen")
diff --git a/linux-x64/clang/include/llvm/Analysis/TargetLibraryInfo.h b/linux-x64/clang/include/llvm/Analysis/TargetLibraryInfo.h
index fc6811e..4b5200f 100644
--- a/linux-x64/clang/include/llvm/Analysis/TargetLibraryInfo.h
+++ b/linux-x64/clang/include/llvm/Analysis/TargetLibraryInfo.h
@@ -86,6 +86,7 @@
enum VectorLibrary {
NoLibrary, // Don't use any vector library.
Accelerate, // Use Accelerate framework.
+ MASSV, // IBM MASS vector library.
SVML // Intel short vector math library.
};
@@ -280,9 +281,9 @@
case LibFunc_trunc: case LibFunc_truncf: case LibFunc_truncl:
case LibFunc_log2: case LibFunc_log2f: case LibFunc_log2l:
case LibFunc_exp2: case LibFunc_exp2f: case LibFunc_exp2l:
- case LibFunc_memcmp: case LibFunc_strcmp: case LibFunc_strcpy:
- case LibFunc_stpcpy: case LibFunc_strlen: case LibFunc_strnlen:
- case LibFunc_memchr: case LibFunc_mempcpy:
+ case LibFunc_memcmp: case LibFunc_bcmp: case LibFunc_strcmp:
+ case LibFunc_strcpy: case LibFunc_stpcpy: case LibFunc_strlen:
+ case LibFunc_strnlen: case LibFunc_memchr: case LibFunc_mempcpy:
return true;
}
return false;
diff --git a/linux-x64/clang/include/llvm/Analysis/TargetTransformInfo.h b/linux-x64/clang/include/llvm/Analysis/TargetTransformInfo.h
index 60dbf67..af1a12d 100644
--- a/linux-x64/clang/include/llvm/Analysis/TargetTransformInfo.h
+++ b/linux-x64/clang/include/llvm/Analysis/TargetTransformInfo.h
@@ -27,6 +27,10 @@
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/DataTypes.h"
+#include "llvm/Analysis/LoopInfo.h"
+#include "llvm/Analysis/ScalarEvolution.h"
+#include "llvm/IR/Dominators.h"
+#include "llvm/Analysis/AssumptionCache.h"
#include <functional>
namespace llvm {
@@ -35,6 +39,8 @@
enum ID : unsigned;
}
+class AssumptionCache;
+class BranchInst;
class Function;
class GlobalValue;
class IntrinsicInst;
@@ -44,6 +50,7 @@
class ScalarEvolution;
class StoreInst;
class SwitchInst;
+class TargetLibraryInfo;
class Type;
class User;
class Value;
@@ -72,6 +79,30 @@
}
};
+/// Attributes of a target dependent hardware loop.
+struct HardwareLoopInfo {
+ HardwareLoopInfo() = delete;
+ HardwareLoopInfo(Loop *L) : L(L) {}
+ Loop *L = nullptr;
+ BasicBlock *ExitBlock = nullptr;
+ BranchInst *ExitBranch = nullptr;
+ const SCEV *ExitCount = nullptr;
+ IntegerType *CountType = nullptr;
+ Value *LoopDecrement = nullptr; // Decrement the loop counter by this
+ // value in every iteration.
+ bool IsNestingLegal = false; // Can a hardware loop be a parent to
+ // another hardware loop?
+ bool CounterInReg = false; // Should loop counter be updated in
+ // the loop via a phi?
+ bool PerformEntryTest = false; // Generate the intrinsic which also performs
+ // icmp ne zero on the loop counter value and
+ // produces an i1 to guard the loop entry.
+ bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,
+ DominatorTree &DT, bool ForceNestedLoop = false,
+ bool ForceHardwareLoopPHI = false);
+ bool canAnalyze(LoopInfo &LI);
+};
+
/// This pass provides access to the codegen interfaces that are needed
/// for IR-level transformations.
class TargetTransformInfo {
@@ -80,7 +111,7 @@
/// API below.
///
/// This is used by targets to construct a TTI wrapping their target-specific
- /// implementaion that encodes appropriate costs for their target.
+ /// implementation that encodes appropriate costs for their target.
template <typename T> TargetTransformInfo(T Impl);
/// Construct a baseline TTI object using a minimal implementation of
@@ -208,18 +239,21 @@
/// This is the most basic query for estimating call cost: it only knows the
/// function type and (potentially) the number of arguments at the call site.
/// The latter is only interesting for varargs function types.
- int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
+ int getCallCost(FunctionType *FTy, int NumArgs = -1,
+ const User *U = nullptr) const;
/// Estimate the cost of calling a specific function when lowered.
///
/// This overload adds the ability to reason about the particular function
/// being called in the event it is a library call with special lowering.
- int getCallCost(const Function *F, int NumArgs = -1) const;
+ int getCallCost(const Function *F, int NumArgs = -1,
+ const User *U = nullptr) const;
/// Estimate the cost of calling a specific function when lowered.
///
/// This overload allows specifying a set of candidate argument values.
- int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
+ int getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
+ const User *U = nullptr) const;
/// \returns A value by which our inlining threshold should be multiplied.
/// This is primarily used to bump up the inlining threshold wholesale on
@@ -233,13 +267,19 @@
///
/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) const;
+ ArrayRef<Type *> ParamTys,
+ const User *U = nullptr) const;
/// Estimate the cost of an intrinsic when lowered.
///
/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) const;
+ ArrayRef<const Value *> Arguments,
+ const User *U = nullptr) const;
+
+ /// \return the expected cost of a memcpy, which could e.g. depend on the
+ /// source/destination type and alignment and the number of bytes copied.
+ int getMemcpyCost(const Instruction *I) const;
/// \return The estimated number of case clusters when lowering \p 'SI'.
/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
@@ -295,7 +335,7 @@
// Returns true for the target specific
// set of operations which produce uniform result
- // even taking non-unform arguments
+ // even taking non-uniform arguments
bool isAlwaysUniform(const Value *V) const;
/// Returns the address space ID for a target's 'flat' address space. Note
@@ -436,6 +476,13 @@
void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) const;
+ /// Query the target whether it would be profitable to convert the given loop
+ /// into a hardware loop.
+ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo) const;
+
/// @}
/// \name Scalar Target Information
@@ -482,6 +529,12 @@
/// calculation for the instructions in a loop.
bool canMacroFuseCmp() const;
+ /// Return true if the target can save a compare for loop count, for example
+ /// hardware loop saves a compare.
+ bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
+ DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *LibInfo) const;
+
/// \return True is LSR should make efforts to create/preserve post-inc
/// addressing mode expressions.
bool shouldFavorPostInc() const;
@@ -490,17 +543,26 @@
/// modes that operate across loop iterations.
bool shouldFavorBackedgeIndex(const Loop *L) const;
- /// Return true if the target supports masked load/store
- /// AVX2 and AVX-512 targets allow masks for consecutive load and store
+ /// Return true if the target supports masked load.
bool isLegalMaskedStore(Type *DataType) const;
+ /// Return true if the target supports masked store.
bool isLegalMaskedLoad(Type *DataType) const;
- /// Return true if the target supports masked gather/scatter
- /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
- /// bits scalar type.
+ /// Return true if the target supports nontemporal store.
+ bool isLegalNTStore(Type *DataType, unsigned Alignment) const;
+ /// Return true if the target supports nontemporal load.
+ bool isLegalNTLoad(Type *DataType, unsigned Alignment) const;
+
+ /// Return true if the target supports masked scatter.
bool isLegalMaskedScatter(Type *DataType) const;
+ /// Return true if the target supports masked gather.
bool isLegalMaskedGather(Type *DataType) const;
+ /// Return true if the target supports masked compress store.
+ bool isLegalMaskedCompressStore(Type *DataType) const;
+ /// Return true if the target supports masked expand load.
+ bool isLegalMaskedExpandLoad(Type *DataType) const;
+
/// Return true if the target has a unified operation to calculate division
/// and remainder. If so, the additional implicit multiplication and
/// subtraction required to calculate a remainder from division are free. This
@@ -579,17 +641,35 @@
/// Don't restrict interleaved unrolling to small loops.
bool enableAggressiveInterleaving(bool LoopHasReductions) const;
- /// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
- /// true if this is the expansion of memcmp(p1, p2, s) == 0.
+ /// Returns options for expansion of memcmp. IsZeroCmp is
+ // true if this is the expansion of memcmp(p1, p2, s) == 0.
struct MemCmpExpansionOptions {
+ // Return true if memcmp expansion is enabled.
+ operator bool() const { return MaxNumLoads > 0; }
+
+ // Maximum number of load operations.
+ unsigned MaxNumLoads = 0;
+
// The list of available load sizes (in bytes), sorted in decreasing order.
SmallVector<unsigned, 8> LoadSizes;
+
+ // For memcmp expansion when the memcmp result is only compared equal or
+ // not-equal to 0, allow up to this number of load pairs per block. As an
+ // example, this may allow 'memcmp(a, b, 3) == 0' in a single block:
+ // a0 = load2bytes &a[0]
+ // b0 = load2bytes &b[0]
+ // a2 = load1byte &a[2]
+ // b2 = load1byte &b[2]
+ // r = cmp eq (a0 ^ b0 | a2 ^ b2), 0
+ unsigned NumLoadsPerBlock = 1;
+
// Set to true to allow overlapping loads. For example, 7-byte compares can
// be done with two 4-byte compares instead of 4+2+1-byte compares. This
// requires all loads in LoadSizes to be doable in an unaligned way.
bool AllowOverlappingLoads = false;
};
- const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
+ MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const;
/// Enable matching of interleaved access groups.
bool enableInterleavedAccessVectorization() const;
@@ -703,7 +783,7 @@
bool shouldMaximizeVectorBandwidth(bool OptSize) const;
/// \return The minimum vectorization factor for types of given element
- /// bit width, or 0 if there is no mimimum VF. The returned value only
+ /// bit width, or 0 if there is no minimum VF. The returned value only
/// applies when shouldMaximizeVectorBandwidth returns true.
unsigned getMinimumVF(unsigned ElemWidth) const;
@@ -1008,6 +1088,11 @@
/// \returns True if the target wants to expand the given reduction intrinsic
/// into a shuffle sequence.
bool shouldExpandReduction(const IntrinsicInst *II) const;
+
+ /// \returns the size cost of rematerializing a GlobalValue address relative
+ /// to a stack reload.
+ unsigned getGISelRematGlobalCost() const;
+
/// @}
private:
@@ -1038,15 +1123,17 @@
virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) = 0;
virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
- virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
- virtual int getCallCost(const Function *F, int NumArgs) = 0;
+ virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0;
+ virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0;
virtual int getCallCost(const Function *F,
- ArrayRef<const Value *> Arguments) = 0;
+ ArrayRef<const Value *> Arguments, const User *U) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) = 0;
+ ArrayRef<Type *> ParamTys, const User *U) = 0;
virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) = 0;
+ ArrayRef<const Value *> Arguments,
+ const User *U) = 0;
+ virtual int getMemcpyCost(const Instruction *I) = 0;
virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
unsigned &JTSize) = 0;
virtual int
@@ -1058,6 +1145,10 @@
virtual bool isLoweredToCall(const Function *F) = 0;
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) = 0;
+ virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo) = 0;
virtual bool isLegalAddImmediate(int64_t Imm) = 0;
virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
@@ -1068,12 +1159,19 @@
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) = 0;
virtual bool canMacroFuseCmp() = 0;
+ virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
+ LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *LibInfo) = 0;
virtual bool shouldFavorPostInc() const = 0;
virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
virtual bool isLegalMaskedStore(Type *DataType) = 0;
virtual bool isLegalMaskedLoad(Type *DataType) = 0;
+ virtual bool isLegalNTStore(Type *DataType, unsigned Alignment) = 0;
+ virtual bool isLegalNTLoad(Type *DataType, unsigned Alignment) = 0;
virtual bool isLegalMaskedScatter(Type *DataType) = 0;
virtual bool isLegalMaskedGather(Type *DataType) = 0;
+ virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
+ virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
virtual bool prefersVectorizedAddressing() = 0;
@@ -1096,8 +1194,8 @@
unsigned VF) = 0;
virtual bool supportsEfficientVectorElementLoadStore() = 0;
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
- virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
- bool IsZeroCmp) const = 0;
+ virtual MemCmpExpansionOptions
+ enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;
virtual bool enableInterleavedAccessVectorization() = 0;
virtual bool enableMaskedInterleavedAccessVectorization() = 0;
virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
@@ -1214,6 +1312,7 @@
virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags) const = 0;
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
+ virtual unsigned getGISelRematGlobalCost() const = 0;
virtual int getInstructionLatency(const Instruction *I) = 0;
};
@@ -1239,26 +1338,30 @@
int getExtCost(const Instruction *I, const Value *Src) override {
return Impl.getExtCost(I, Src);
}
- int getCallCost(FunctionType *FTy, int NumArgs) override {
- return Impl.getCallCost(FTy, NumArgs);
+ int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override {
+ return Impl.getCallCost(FTy, NumArgs, U);
}
- int getCallCost(const Function *F, int NumArgs) override {
- return Impl.getCallCost(F, NumArgs);
+ int getCallCost(const Function *F, int NumArgs, const User *U) override {
+ return Impl.getCallCost(F, NumArgs, U);
}
int getCallCost(const Function *F,
- ArrayRef<const Value *> Arguments) override {
- return Impl.getCallCost(F, Arguments);
+ ArrayRef<const Value *> Arguments, const User *U) override {
+ return Impl.getCallCost(F, Arguments, U);
}
unsigned getInliningThresholdMultiplier() override {
return Impl.getInliningThresholdMultiplier();
}
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) override {
- return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
+ ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
+ return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
}
int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) override {
- return Impl.getIntrinsicCost(IID, RetTy, Arguments);
+ ArrayRef<const Value *> Arguments,
+ const User *U = nullptr) override {
+ return Impl.getIntrinsicCost(IID, RetTy, Arguments, U);
+ }
+ int getMemcpyCost(const Instruction *I) override {
+ return Impl.getMemcpyCost(I);
}
int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
return Impl.getUserCost(U, Operands);
@@ -1283,6 +1386,12 @@
UnrollingPreferences &UP) override {
return Impl.getUnrollingPreferences(L, SE, UP);
}
+ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo) override {
+ return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
+ }
bool isLegalAddImmediate(int64_t Imm) override {
return Impl.isLegalAddImmediate(Imm);
}
@@ -1303,6 +1412,12 @@
bool canMacroFuseCmp() override {
return Impl.canMacroFuseCmp();
}
+ bool canSaveCmp(Loop *L, BranchInst **BI,
+ ScalarEvolution *SE,
+ LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *LibInfo) override {
+ return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
+ }
bool shouldFavorPostInc() const override {
return Impl.shouldFavorPostInc();
}
@@ -1315,12 +1430,24 @@
bool isLegalMaskedLoad(Type *DataType) override {
return Impl.isLegalMaskedLoad(DataType);
}
+ bool isLegalNTStore(Type *DataType, unsigned Alignment) override {
+ return Impl.isLegalNTStore(DataType, Alignment);
+ }
+ bool isLegalNTLoad(Type *DataType, unsigned Alignment) override {
+ return Impl.isLegalNTLoad(DataType, Alignment);
+ }
bool isLegalMaskedScatter(Type *DataType) override {
return Impl.isLegalMaskedScatter(DataType);
}
bool isLegalMaskedGather(Type *DataType) override {
return Impl.isLegalMaskedGather(DataType);
}
+ bool isLegalMaskedCompressStore(Type *DataType) override {
+ return Impl.isLegalMaskedCompressStore(DataType);
+ }
+ bool isLegalMaskedExpandLoad(Type *DataType) override {
+ return Impl.isLegalMaskedExpandLoad(DataType);
+ }
bool hasDivRemOp(Type *DataType, bool IsSigned) override {
return Impl.hasDivRemOp(DataType, IsSigned);
}
@@ -1375,9 +1502,9 @@
bool enableAggressiveInterleaving(bool LoopHasReductions) override {
return Impl.enableAggressiveInterleaving(LoopHasReductions);
}
- const MemCmpExpansionOptions *enableMemCmpExpansion(
- bool IsZeroCmp) const override {
- return Impl.enableMemCmpExpansion(IsZeroCmp);
+ MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const override {
+ return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);
}
bool enableInterleavedAccessVectorization() override {
return Impl.enableInterleavedAccessVectorization();
@@ -1624,6 +1751,11 @@
bool shouldExpandReduction(const IntrinsicInst *II) const override {
return Impl.shouldExpandReduction(II);
}
+
+ unsigned getGISelRematGlobalCost() const override {
+ return Impl.getGISelRematGlobalCost();
+ }
+
int getInstructionLatency(const Instruction *I) override {
return Impl.getInstructionLatency(I);
}
diff --git a/linux-x64/clang/include/llvm/Analysis/TargetTransformInfoImpl.h b/linux-x64/clang/include/llvm/Analysis/TargetTransformInfoImpl.h
index 4705933..a9383e7 100644
--- a/linux-x64/clang/include/llvm/Analysis/TargetTransformInfoImpl.h
+++ b/linux-x64/clang/include/llvm/Analysis/TargetTransformInfoImpl.h
@@ -123,7 +123,7 @@
return TTI::TCC_Basic;
}
- unsigned getCallCost(FunctionType *FTy, int NumArgs) {
+ unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) {
assert(FTy && "FunctionType must be provided to this routine.");
// The target-independent implementation just measures the size of the
@@ -140,45 +140,8 @@
unsigned getInliningThresholdMultiplier() { return 1; }
- unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys) {
- switch (IID) {
- default:
- // Intrinsics rarely (if ever) have normal argument setup constraints.
- // Model them as having a basic instruction cost.
- // FIXME: This is wrong for libc intrinsics.
- return TTI::TCC_Basic;
-
- case Intrinsic::annotation:
- case Intrinsic::assume:
- case Intrinsic::sideeffect:
- case Intrinsic::dbg_declare:
- case Intrinsic::dbg_value:
- case Intrinsic::dbg_label:
- case Intrinsic::invariant_start:
- case Intrinsic::invariant_end:
- case Intrinsic::launder_invariant_group:
- case Intrinsic::strip_invariant_group:
- case Intrinsic::is_constant:
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::objectsize:
- case Intrinsic::ptr_annotation:
- case Intrinsic::var_annotation:
- case Intrinsic::experimental_gc_result:
- case Intrinsic::experimental_gc_relocate:
- case Intrinsic::coro_alloc:
- case Intrinsic::coro_begin:
- case Intrinsic::coro_free:
- case Intrinsic::coro_end:
- case Intrinsic::coro_frame:
- case Intrinsic::coro_size:
- case Intrinsic::coro_suspend:
- case Intrinsic::coro_param:
- case Intrinsic::coro_subfn_addr:
- // These intrinsics don't actually represent code after lowering.
- return TTI::TCC_Free;
- }
+ unsigned getMemcpyCost(const Instruction *I) {
+ return TTI::TCC_Expensive;
}
bool hasBranchDivergence() { return false; }
@@ -227,6 +190,13 @@
return true;
}
+ bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
+ AssumptionCache &AC,
+ TargetLibraryInfo *LibInfo,
+ HardwareLoopInfo &HWLoopInfo) {
+ return false;
+ }
+
void getUnrollingPreferences(Loop *, ScalarEvolution &,
TTI::UnrollingPreferences &) {}
@@ -251,6 +221,12 @@
bool canMacroFuseCmp() { return false; }
+ bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
+ DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *LibInfo) {
+ return false;
+ }
+
bool shouldFavorPostInc() const { return false; }
bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
@@ -259,10 +235,28 @@
bool isLegalMaskedLoad(Type *DataType) { return false; }
+ bool isLegalNTStore(Type *DataType, unsigned Alignment) {
+ // By default, assume nontemporal memory stores are available for stores
+ // that are aligned and have a size that is a power of 2.
+ unsigned DataSize = DL.getTypeStoreSize(DataType);
+ return Alignment >= DataSize && isPowerOf2_32(DataSize);
+ }
+
+ bool isLegalNTLoad(Type *DataType, unsigned Alignment) {
+ // By default, assume nontemporal memory loads are available for loads that
+ // are aligned and have a size that is a power of 2.
+ unsigned DataSize = DL.getTypeStoreSize(DataType);
+ return Alignment >= DataSize && isPowerOf2_32(DataSize);
+ }
+
bool isLegalMaskedScatter(Type *DataType) { return false; }
bool isLegalMaskedGather(Type *DataType) { return false; }
+ bool isLegalMaskedCompressStore(Type *DataType) { return false; }
+
+ bool isLegalMaskedExpandLoad(Type *DataType) { return false; }
+
bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
@@ -308,9 +302,9 @@
bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
- const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
- bool IsZeroCmp) const {
- return nullptr;
+ TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
+ bool IsZeroCmp) const {
+ return {};
}
bool enableInterleavedAccessVectorization() { return false; }
@@ -584,6 +578,10 @@
return true;
}
+ unsigned getGISelRematGlobalCost() const {
+ return 1;
+ }
+
protected:
// Obtain the minimum required size to hold the value (without the sign)
// In case of a vector it returns the min required size for one element.
@@ -680,7 +678,7 @@
public:
using BaseT::getCallCost;
- unsigned getCallCost(const Function *F, int NumArgs) {
+ unsigned getCallCost(const Function *F, int NumArgs, const User *U) {
assert(F && "A concrete function must be provided to this routine.");
if (NumArgs < 0)
@@ -692,35 +690,34 @@
FunctionType *FTy = F->getFunctionType();
SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
return static_cast<T *>(this)
- ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys);
+ ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U);
}
if (!static_cast<T *>(this)->isLoweredToCall(F))
return TTI::TCC_Basic; // Give a basic cost if it will be lowered
// directly.
- return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs);
+ return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs, U);
}
- unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments) {
+ unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
+ const User *U) {
// Simply delegate to generic handling of the call.
// FIXME: We should use instsimplify or something else to catch calls which
// will constant fold with these arguments.
- return static_cast<T *>(this)->getCallCost(F, Arguments.size());
+ return static_cast<T *>(this)->getCallCost(F, Arguments.size(), U);
}
using BaseT::getGEPCost;
int getGEPCost(Type *PointeeType, const Value *Ptr,
ArrayRef<const Value *> Operands) {
- const GlobalValue *BaseGV = nullptr;
- if (Ptr != nullptr) {
- // TODO: will remove this when pointers have an opaque type.
- assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
- PointeeType &&
- "explicit pointee type doesn't match operand's pointee type");
- BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
- }
+ assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
+ // TODO: will remove this when pointers have an opaque type.
+ assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
+ PointeeType &&
+ "explicit pointee type doesn't match operand's pointee type");
+ auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
bool HasBaseReg = (BaseGV == nullptr);
auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
@@ -763,21 +760,60 @@
}
}
- // Assumes the address space is 0 when Ptr is nullptr.
- unsigned AS =
- (Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
-
if (static_cast<T *>(this)->isLegalAddressingMode(
TargetType, const_cast<GlobalValue *>(BaseGV),
- BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS))
+ BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
+ Ptr->getType()->getPointerAddressSpace()))
return TTI::TCC_Free;
return TTI::TCC_Basic;
}
- using BaseT::getIntrinsicCost;
+ unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
+ ArrayRef<Type *> ParamTys, const User *U) {
+ switch (IID) {
+ default:
+ // Intrinsics rarely (if ever) have normal argument setup constraints.
+ // Model them as having a basic instruction cost.
+ return TTI::TCC_Basic;
+
+ // TODO: other libc intrinsics.
+ case Intrinsic::memcpy:
+ return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U));
+
+ case Intrinsic::annotation:
+ case Intrinsic::assume:
+ case Intrinsic::sideeffect:
+ case Intrinsic::dbg_declare:
+ case Intrinsic::dbg_value:
+ case Intrinsic::dbg_label:
+ case Intrinsic::invariant_start:
+ case Intrinsic::invariant_end:
+ case Intrinsic::launder_invariant_group:
+ case Intrinsic::strip_invariant_group:
+ case Intrinsic::is_constant:
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::objectsize:
+ case Intrinsic::ptr_annotation:
+ case Intrinsic::var_annotation:
+ case Intrinsic::experimental_gc_result:
+ case Intrinsic::experimental_gc_relocate:
+ case Intrinsic::coro_alloc:
+ case Intrinsic::coro_begin:
+ case Intrinsic::coro_free:
+ case Intrinsic::coro_end:
+ case Intrinsic::coro_frame:
+ case Intrinsic::coro_size:
+ case Intrinsic::coro_suspend:
+ case Intrinsic::coro_param:
+ case Intrinsic::coro_subfn_addr:
+ // These intrinsics don't actually represent code after lowering.
+ return TTI::TCC_Free;
+ }
+ }
unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments) {
+ ArrayRef<const Value *> Arguments, const User *U) {
// Delegate to the generic intrinsic handling code. This mostly provides an
// opportunity for targets to (for example) special case the cost of
// certain intrinsics based on constants used as arguments.
@@ -785,7 +821,7 @@
ParamTys.reserve(Arguments.size());
for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
ParamTys.push_back(Arguments[Idx]->getType());
- return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys);
+ return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U);
}
unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) {
@@ -809,22 +845,18 @@
// Just use the called value type.
Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
return static_cast<T *>(this)
- ->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
+ ->getCallCost(cast<FunctionType>(FTy), CS.arg_size(), U);
}
SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
- return static_cast<T *>(this)->getCallCost(F, Arguments);
+ return static_cast<T *>(this)->getCallCost(F, Arguments, U);
}
- if (const CastInst *CI = dyn_cast<CastInst>(U)) {
- // Result of a cmp instruction is often extended (to be used by other
- // cmp instructions, logical or return instructions). These are usually
- // nop on most sane targets.
- if (isa<CmpInst>(CI->getOperand(0)))
- return TTI::TCC_Free;
- if (isa<SExtInst>(CI) || isa<ZExtInst>(CI) || isa<FPExtInst>(CI))
- return static_cast<T *>(this)->getExtCost(CI, Operands.back());
- }
+ if (isa<SExtInst>(U) || isa<ZExtInst>(U) || isa<FPExtInst>(U))
+ // The old behaviour of generally treating extensions of icmp to be free
+ // has been removed. A target that needs it should override getUserCost().
+ return static_cast<T *>(this)->getExtCost(cast<Instruction>(U),
+ Operands.back());
return static_cast<T *>(this)->getOperationCost(
Operator::getOpcode(U), U->getType(),
diff --git a/linux-x64/clang/include/llvm/Analysis/TypeBasedAliasAnalysis.h b/linux-x64/clang/include/llvm/Analysis/TypeBasedAliasAnalysis.h
index 1235005..344f268 100644
--- a/linux-x64/clang/include/llvm/Analysis/TypeBasedAliasAnalysis.h
+++ b/linux-x64/clang/include/llvm/Analysis/TypeBasedAliasAnalysis.h
@@ -40,12 +40,16 @@
return false;
}
- AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB);
- bool pointsToConstantMemory(const MemoryLocation &Loc, bool OrLocal);
+ AliasResult alias(const MemoryLocation &LocA, const MemoryLocation &LocB,
+ AAQueryInfo &AAQI);
+ bool pointsToConstantMemory(const MemoryLocation &Loc, AAQueryInfo &AAQI,
+ bool OrLocal);
FunctionModRefBehavior getModRefBehavior(const CallBase *Call);
FunctionModRefBehavior getModRefBehavior(const Function *F);
- ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc);
- ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2);
+ ModRefInfo getModRefInfo(const CallBase *Call, const MemoryLocation &Loc,
+ AAQueryInfo &AAQI);
+ ModRefInfo getModRefInfo(const CallBase *Call1, const CallBase *Call2,
+ AAQueryInfo &AAQI);
private:
bool Aliases(const MDNode *A, const MDNode *B) const;
diff --git a/linux-x64/clang/include/llvm/Analysis/ValueTracking.h b/linux-x64/clang/include/llvm/Analysis/ValueTracking.h
index b3c07b1..f14c2a4 100644
--- a/linux-x64/clang/include/llvm/Analysis/ValueTracking.h
+++ b/linux-x64/clang/include/llvm/Analysis/ValueTracking.h
@@ -16,6 +16,7 @@
#include "llvm/ADT/ArrayRef.h"
#include "llvm/ADT/Optional.h"
+#include "llvm/ADT/SmallSet.h"
#include "llvm/IR/CallSite.h"
#include "llvm/IR/Constants.h"
#include "llvm/IR/Instruction.h"
@@ -32,6 +33,7 @@
class DominatorTree;
class GEPOperator;
class IntrinsicInst;
+class WithOverflowInst;
struct KnownBits;
class Loop;
class LoopInfo;
@@ -350,7 +352,8 @@
/// Since A[i] and A[i-1] are independent pointers, getUnderlyingObjects
/// should not assume that Curr and Prev share the same underlying object thus
/// it shouldn't look through the phi above.
- void GetUnderlyingObjects(Value *V, SmallVectorImpl<Value *> &Objects,
+ void GetUnderlyingObjects(const Value *V,
+ SmallVectorImpl<const Value *> &Objects,
const DataLayout &DL, LoopInfo *LI = nullptr,
unsigned MaxLookup = 6);
@@ -410,7 +413,16 @@
bool isValidAssumeForContext(const Instruction *I, const Instruction *CxtI,
const DominatorTree *DT = nullptr);
- enum class OverflowResult { AlwaysOverflows, MayOverflow, NeverOverflows };
+ enum class OverflowResult {
+ /// Always overflows in the direction of signed/unsigned min value.
+ AlwaysOverflowsLow,
+ /// Always overflows in the direction of signed/unsigned max value.
+ AlwaysOverflowsHigh,
+ /// May or may not overflow.
+ MayOverflow,
+ /// Never overflows.
+ NeverOverflows,
+ };
OverflowResult computeOverflowForUnsignedMul(const Value *LHS,
const Value *RHS,
@@ -454,12 +466,17 @@
const Instruction *CxtI,
const DominatorTree *DT);
- /// Returns true if the arithmetic part of the \p II 's result is
+ /// Returns true if the arithmetic part of the \p WO 's result is
/// used only along the paths control dependent on the computation
- /// not overflowing, \p II being an <op>.with.overflow intrinsic.
- bool isOverflowIntrinsicNoWrap(const IntrinsicInst *II,
+ /// not overflowing, \p WO being an <op>.with.overflow intrinsic.
+ bool isOverflowIntrinsicNoWrap(const WithOverflowInst *WO,
const DominatorTree &DT);
+
+ /// Determine the possible constant range of an integer or vector of integer
+ /// value. This is intended as a cheap, non-recursive check.
+ ConstantRange computeConstantRange(const Value *V, bool UseInstrInfo = true);
+
/// Return true if this function can prove that the instruction I will
/// always transfer execution to one of its successors (including the next
/// instruction that follows within a basic block). E.g. this is not
@@ -505,6 +522,12 @@
/// value (all bits poison).
const Value *getGuaranteedNonFullPoisonOp(const Instruction *I);
+ /// Return true if the given instruction must trigger undefined behavior.
+ /// when I is executed with any operands which appear in KnownPoison holding
+ /// a full-poison value at the point of execution.
+ bool mustTriggerUB(const Instruction *I,
+ const SmallSet<const Value *, 16>& KnownPoison);
+
/// Return true if this function can prove that if PoisonI is executed
/// and yields a full-poison value (all bits poison), then that will
/// trigger undefined behavior.
@@ -583,6 +606,12 @@
return Result;
}
+ /// Determine the pattern that a select with the given compare as its
+ /// predicate and given values as its true/false operands would match.
+ SelectPatternResult matchDecomposedSelectPattern(
+ CmpInst *CmpI, Value *TrueVal, Value *FalseVal, Value *&LHS, Value *&RHS,
+ Instruction::CastOps *CastOp = nullptr, unsigned Depth = 0);
+
/// Return the canonical comparison predicate for the specified
/// minimum/maximum flavor.
CmpInst::Predicate getMinMaxPred(SelectPatternFlavor SPF,
diff --git a/linux-x64/clang/include/llvm/Analysis/VecFuncs.def b/linux-x64/clang/include/llvm/Analysis/VecFuncs.def
new file mode 100644
index 0000000..4c92062
--- /dev/null
+++ b/linux-x64/clang/include/llvm/Analysis/VecFuncs.def
@@ -0,0 +1,250 @@
+//===-- VecFuncs.def - Library information -------------*- C++ -*-----------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// This .def file will create mappings from scalar math functions to vector
+// functions along with their vectorization factor. The current support includes
+// such mappings for Accelerate framework, MASS vector library, and SVML library.
+
+#if !(defined(TLI_DEFINE_VECFUNC))
+#define TLI_DEFINE_VECFUNC(SCAL, VEC, VF) {SCAL, VEC, VF},
+#endif
+
+#if defined(TLI_DEFINE_ACCELERATE_VECFUNCS)
+// Accelerate framework's Vector Functions
+
+// Floating-Point Arithmetic and Auxiliary Functions
+TLI_DEFINE_VECFUNC("ceilf", "vceilf", 4)
+TLI_DEFINE_VECFUNC("fabsf", "vfabsf", 4)
+TLI_DEFINE_VECFUNC("llvm.fabs.f32", "vfabsf", 4)
+TLI_DEFINE_VECFUNC("floorf", "vfloorf", 4)
+TLI_DEFINE_VECFUNC("sqrtf", "vsqrtf", 4)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "vsqrtf", 4)
+
+// Exponential and Logarithmic Functions
+TLI_DEFINE_VECFUNC("expf", "vexpf", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "vexpf", 4)
+TLI_DEFINE_VECFUNC("expm1f", "vexpm1f", 4)
+TLI_DEFINE_VECFUNC("logf", "vlogf", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "vlogf", 4)
+TLI_DEFINE_VECFUNC("log1pf", "vlog1pf", 4)
+TLI_DEFINE_VECFUNC("log10f", "vlog10f", 4)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "vlog10f", 4)
+TLI_DEFINE_VECFUNC("logbf", "vlogbf", 4)
+
+// Trigonometric Functions
+TLI_DEFINE_VECFUNC("sinf", "vsinf", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "vsinf", 4)
+TLI_DEFINE_VECFUNC("cosf", "vcosf", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "vcosf", 4)
+TLI_DEFINE_VECFUNC("tanf", "vtanf", 4)
+TLI_DEFINE_VECFUNC("asinf", "vasinf", 4)
+TLI_DEFINE_VECFUNC("acosf", "vacosf", 4)
+TLI_DEFINE_VECFUNC("atanf", "vatanf", 4)
+
+// Hyperbolic Functions
+TLI_DEFINE_VECFUNC("sinhf", "vsinhf", 4)
+TLI_DEFINE_VECFUNC("coshf", "vcoshf", 4)
+TLI_DEFINE_VECFUNC("tanhf", "vtanhf", 4)
+TLI_DEFINE_VECFUNC("asinhf", "vasinhf", 4)
+TLI_DEFINE_VECFUNC("acoshf", "vacoshf", 4)
+TLI_DEFINE_VECFUNC("atanhf", "vatanhf", 4)
+
+
+#elif defined(TLI_DEFINE_MASSV_VECFUNCS)
+// IBM MASS library's vector Functions
+
+// Floating-Point Arithmetic and Auxiliary Functions
+TLI_DEFINE_VECFUNC("cbrt", "__cbrtd2_massv", 2)
+TLI_DEFINE_VECFUNC("cbrtf", "__cbrtf4_massv", 4)
+TLI_DEFINE_VECFUNC("pow", "__powd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__powd2_massv", 2)
+TLI_DEFINE_VECFUNC("powf", "__powf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__powf4_massv", 4)
+TLI_DEFINE_VECFUNC("sqrt", "__sqrtd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f64", "__sqrtd2_massv", 2)
+TLI_DEFINE_VECFUNC("sqrtf", "__sqrtf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.sqrt.f32", "__sqrtf4_massv", 4)
+
+// Exponential and Logarithmic Functions
+TLI_DEFINE_VECFUNC("exp", "__expd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__expd2_massv", 2)
+TLI_DEFINE_VECFUNC("expf", "__expf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__expf4_massv", 4)
+TLI_DEFINE_VECFUNC("exp2", "__exp2d2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.exp2.f64", "__exp2d2_massv", 2)
+TLI_DEFINE_VECFUNC("exp2f", "__exp2f4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.exp2.f32", "__exp2f4_massv", 4)
+TLI_DEFINE_VECFUNC("expm1", "__expm1d2_massv", 2)
+TLI_DEFINE_VECFUNC("expm1f", "__expm1f4_massv", 4)
+TLI_DEFINE_VECFUNC("log", "__logd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__logd2_massv", 2)
+TLI_DEFINE_VECFUNC("logf", "__logf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__logf4_massv", 4)
+TLI_DEFINE_VECFUNC("log1p", "__log1pd2_massv", 2)
+TLI_DEFINE_VECFUNC("log1pf", "__log1pf4_massv", 4)
+TLI_DEFINE_VECFUNC("log10", "__log10d2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.log10.f64", "__log10d2_massv", 2)
+TLI_DEFINE_VECFUNC("log10f", "__log10f4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.log10.f32", "__log10f4_massv", 4)
+TLI_DEFINE_VECFUNC("log2", "__log2d2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.log2.f64", "__log2d2_massv", 2)
+TLI_DEFINE_VECFUNC("log2f", "__log2f4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.log2.f32", "__log2f4_massv", 4)
+
+// Trigonometric Functions
+TLI_DEFINE_VECFUNC("sin", "__sind2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__sind2_massv", 2)
+TLI_DEFINE_VECFUNC("sinf", "__sinf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__sinf4_massv", 4)
+TLI_DEFINE_VECFUNC("cos", "__cosd2_massv", 2)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__cosd2_massv", 2)
+TLI_DEFINE_VECFUNC("cosf", "__cosf4_massv", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__cosf4_massv", 4)
+TLI_DEFINE_VECFUNC("tan", "__tand2_massv", 2)
+TLI_DEFINE_VECFUNC("tanf", "__tanf4_massv", 4)
+TLI_DEFINE_VECFUNC("asin", "__asind2_massv", 2)
+TLI_DEFINE_VECFUNC("asinf", "__asinf4_massv", 4)
+TLI_DEFINE_VECFUNC("acos", "__acosd2_massv", 2)
+TLI_DEFINE_VECFUNC("acosf", "__acosf4_massv", 4)
+TLI_DEFINE_VECFUNC("atan", "__atand2_massv", 2)
+TLI_DEFINE_VECFUNC("atanf", "__atanf4_massv", 4)
+TLI_DEFINE_VECFUNC("atan2", "__atan2d2_massv", 2)
+TLI_DEFINE_VECFUNC("atan2f", "__atan2f4_massv", 4)
+
+// Hyperbolic Functions
+TLI_DEFINE_VECFUNC("sinh", "__sinhd2_massv", 2)
+TLI_DEFINE_VECFUNC("sinhf", "__sinhf4_massv", 4)
+TLI_DEFINE_VECFUNC("cosh", "__coshd2_massv", 2)
+TLI_DEFINE_VECFUNC("coshf", "__coshf4_massv", 4)
+TLI_DEFINE_VECFUNC("tanh", "__tanhd2_massv", 2)
+TLI_DEFINE_VECFUNC("tanhf", "__tanhf4_massv", 4)
+TLI_DEFINE_VECFUNC("asinh", "__asinhd2_massv", 2)
+TLI_DEFINE_VECFUNC("asinhf", "__asinhf4_massv", 4)
+TLI_DEFINE_VECFUNC("acosh", "__acoshd2_massv", 2)
+TLI_DEFINE_VECFUNC("acoshf", "__acoshf4_massv", 4)
+TLI_DEFINE_VECFUNC("atanh", "__atanhd2_massv", 2)
+TLI_DEFINE_VECFUNC("atanhf", "__atanhf4_massv", 4)
+
+
+#elif defined(TLI_DEFINE_SVML_VECFUNCS)
+// Intel SVM library's Vector Functions
+
+TLI_DEFINE_VECFUNC("sin", "__svml_sin2", 2)
+TLI_DEFINE_VECFUNC("sin", "__svml_sin4", 4)
+TLI_DEFINE_VECFUNC("sin", "__svml_sin8", 8)
+
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf4", 4)
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf8", 8)
+TLI_DEFINE_VECFUNC("sinf", "__svml_sinf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin2", 2)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin4", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f64", "__svml_sin8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf4", 4)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf8", 8)
+TLI_DEFINE_VECFUNC("llvm.sin.f32", "__svml_sinf16", 16)
+
+TLI_DEFINE_VECFUNC("cos", "__svml_cos2", 2)
+TLI_DEFINE_VECFUNC("cos", "__svml_cos4", 4)
+TLI_DEFINE_VECFUNC("cos", "__svml_cos8", 8)
+
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf4", 4)
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf8", 8)
+TLI_DEFINE_VECFUNC("cosf", "__svml_cosf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos2", 2)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos4", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f64", "__svml_cos8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf4", 4)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf8", 8)
+TLI_DEFINE_VECFUNC("llvm.cos.f32", "__svml_cosf16", 16)
+
+TLI_DEFINE_VECFUNC("pow", "__svml_pow2", 2)
+TLI_DEFINE_VECFUNC("pow", "__svml_pow4", 4)
+TLI_DEFINE_VECFUNC("pow", "__svml_pow8", 8)
+
+TLI_DEFINE_VECFUNC("powf", "__svml_powf4", 4)
+TLI_DEFINE_VECFUNC("powf", "__svml_powf8", 8)
+TLI_DEFINE_VECFUNC("powf", "__svml_powf16", 16)
+
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow2", 2)
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow4", 4)
+TLI_DEFINE_VECFUNC("__pow_finite", "__svml_pow8", 8)
+
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf4", 4)
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf8", 8)
+TLI_DEFINE_VECFUNC("__powf_finite", "__svml_powf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow2", 2)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow4", 4)
+TLI_DEFINE_VECFUNC("llvm.pow.f64", "__svml_pow8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf4", 4)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf8", 8)
+TLI_DEFINE_VECFUNC("llvm.pow.f32", "__svml_powf16", 16)
+
+TLI_DEFINE_VECFUNC("exp", "__svml_exp2", 2)
+TLI_DEFINE_VECFUNC("exp", "__svml_exp4", 4)
+TLI_DEFINE_VECFUNC("exp", "__svml_exp8", 8)
+
+TLI_DEFINE_VECFUNC("expf", "__svml_expf4", 4)
+TLI_DEFINE_VECFUNC("expf", "__svml_expf8", 8)
+TLI_DEFINE_VECFUNC("expf", "__svml_expf16", 16)
+
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp2", 2)
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp4", 4)
+TLI_DEFINE_VECFUNC("__exp_finite", "__svml_exp8", 8)
+
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf4", 4)
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf8", 8)
+TLI_DEFINE_VECFUNC("__expf_finite", "__svml_expf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp2", 2)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp4", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f64", "__svml_exp8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf4", 4)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf8", 8)
+TLI_DEFINE_VECFUNC("llvm.exp.f32", "__svml_expf16", 16)
+
+TLI_DEFINE_VECFUNC("log", "__svml_log2", 2)
+TLI_DEFINE_VECFUNC("log", "__svml_log4", 4)
+TLI_DEFINE_VECFUNC("log", "__svml_log8", 8)
+
+TLI_DEFINE_VECFUNC("logf", "__svml_logf4", 4)
+TLI_DEFINE_VECFUNC("logf", "__svml_logf8", 8)
+TLI_DEFINE_VECFUNC("logf", "__svml_logf16", 16)
+
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log2", 2)
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log4", 4)
+TLI_DEFINE_VECFUNC("__log_finite", "__svml_log8", 8)
+
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf4", 4)
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf8", 8)
+TLI_DEFINE_VECFUNC("__logf_finite", "__svml_logf16", 16)
+
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log2", 2)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log4", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f64", "__svml_log8", 8)
+
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf4", 4)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf8", 8)
+TLI_DEFINE_VECFUNC("llvm.log.f32", "__svml_logf16", 16)
+
+
+#else
+#error "Must choose which vector library functions are to be defined."
+#endif
+
+#undef TLI_DEFINE_VECFUNC
+#undef TLI_DEFINE_ACCELERATE_VECFUNCS
+#undef TLI_DEFINE_MASSV_VECFUNCS
+#undef TLI_DEFINE_SVML_VECFUNCS
+
diff --git a/linux-x64/clang/include/llvm/Analysis/VectorUtils.h b/linux-x64/clang/include/llvm/Analysis/VectorUtils.h
index 60ef633..d93d2bc 100644
--- a/linux-x64/clang/include/llvm/Analysis/VectorUtils.h
+++ b/linux-x64/clang/include/llvm/Analysis/VectorUtils.h
@@ -17,6 +17,7 @@
#include "llvm/Analysis/LoopAccessAnalysis.h"
#include "llvm/Analysis/TargetLibraryInfo.h"
#include "llvm/IR/IRBuilder.h"
+#include "llvm/Support/CheckedArithmetic.h"
namespace llvm {
@@ -35,13 +36,12 @@
}
/// Identify if the intrinsic is trivially vectorizable.
-/// This method returns true if the intrinsic's argument types are all
-/// scalars for the scalar form of the intrinsic and all vectors for
-/// the vector form of the intrinsic.
+/// This method returns true if the intrinsic's argument types are all scalars
+/// for the scalar form of the intrinsic and all vectors (or scalars handled by
+/// hasVectorInstrinsicScalarOpd) for the vector form of the intrinsic.
bool isTriviallyVectorizable(Intrinsic::ID ID);
-/// Identifies if the intrinsic has a scalar operand. It checks for
-/// ctlz,cttz and powi special intrinsics whose argument is scalar.
+/// Identifies if the vector form of the intrinsic has a scalar operand.
bool hasVectorInstrinsicScalarOpd(Intrinsic::ID ID, unsigned ScalarOpdIdx);
/// Returns intrinsic ID for call.
@@ -77,6 +77,12 @@
/// a sequence of instructions that broadcast a single value into a vector.
const Value *getSplatValue(const Value *V);
+/// Return true if the input value is known to be a vector with all identical
+/// elements (potentially including undefined elements).
+/// This may be more powerful than the related getSplatValue() because it is
+/// not limited by finding a scalar source value to a splatted vector.
+bool isSplatValue(const Value *V, unsigned Depth = 0);
+
/// Compute a map of integer instructions to their minimum legal type
/// size.
///
@@ -222,6 +228,20 @@
/// elements, it will be padded with undefs.
Value *concatenateVectors(IRBuilder<> &Builder, ArrayRef<Value *> Vecs);
+/// Given a mask vector of the form <Y x i1>, Return true if all of the
+/// elements of this predicate mask are false or undef. That is, return true
+/// if all lanes can be assumed inactive.
+bool maskIsAllZeroOrUndef(Value *Mask);
+
+/// Given a mask vector of the form <Y x i1>, Return true if all of the
+/// elements of this predicate mask are true or undef. That is, return true
+/// if all lanes can be assumed active.
+bool maskIsAllOneOrUndef(Value *Mask);
+
+/// Given a mask vector of the form <Y x i1>, return an APInt (of bitwidth Y)
+/// for each lane which may be active.
+APInt possiblyDemandedEltsInMask(Value *Mask);
+
/// The group of interleaved loads/stores sharing the same stride and
/// close to each other.
///
@@ -250,10 +270,10 @@
/// the interleaved store group doesn't allow gaps.
template <typename InstTy> class InterleaveGroup {
public:
- InterleaveGroup(unsigned Factor, bool Reverse, unsigned Align)
+ InterleaveGroup(uint32_t Factor, bool Reverse, uint32_t Align)
: Factor(Factor), Reverse(Reverse), Align(Align), InsertPos(nullptr) {}
- InterleaveGroup(InstTy *Instr, int Stride, unsigned Align)
+ InterleaveGroup(InstTy *Instr, int32_t Stride, uint32_t Align)
: Align(Align), InsertPos(Instr) {
assert(Align && "The alignment should be non-zero");
@@ -265,19 +285,23 @@
}
bool isReverse() const { return Reverse; }
- unsigned getFactor() const { return Factor; }
- unsigned getAlignment() const { return Align; }
- unsigned getNumMembers() const { return Members.size(); }
+ uint32_t getFactor() const { return Factor; }
+ uint32_t getAlignment() const { return Align; }
+ uint32_t getNumMembers() const { return Members.size(); }
/// Try to insert a new member \p Instr with index \p Index and
/// alignment \p NewAlign. The index is related to the leader and it could be
/// negative if it is the new leader.
///
/// \returns false if the instruction doesn't belong to the group.
- bool insertMember(InstTy *Instr, int Index, unsigned NewAlign) {
+ bool insertMember(InstTy *Instr, int32_t Index, uint32_t NewAlign) {
assert(NewAlign && "The new member's alignment should be non-zero");
- int Key = Index + SmallestKey;
+ // Make sure the key fits in an int32_t.
+ Optional<int32_t> MaybeKey = checkedAdd(Index, SmallestKey);
+ if (!MaybeKey)
+ return false;
+ int32_t Key = *MaybeKey;
// Skip if there is already a member with the same index.
if (Members.find(Key) != Members.end())
@@ -285,13 +309,19 @@
if (Key > LargestKey) {
// The largest index is always less than the interleave factor.
- if (Index >= static_cast<int>(Factor))
+ if (Index >= static_cast<int32_t>(Factor))
return false;
LargestKey = Key;
} else if (Key < SmallestKey) {
+
+ // Make sure the largest index fits in an int32_t.
+ Optional<int32_t> MaybeLargestIndex = checkedSub(LargestKey, Key);
+ if (!MaybeLargestIndex)
+ return false;
+
// The largest index is always less than the interleave factor.
- if (LargestKey - Key >= static_cast<int>(Factor))
+ if (*MaybeLargestIndex >= static_cast<int64_t>(Factor))
return false;
SmallestKey = Key;
@@ -306,8 +336,8 @@
/// Get the member with the given index \p Index
///
/// \returns nullptr if contains no such member.
- InstTy *getMember(unsigned Index) const {
- int Key = SmallestKey + Index;
+ InstTy *getMember(uint32_t Index) const {
+ int32_t Key = SmallestKey + Index;
auto Member = Members.find(Key);
if (Member == Members.end())
return nullptr;
@@ -317,7 +347,7 @@
/// Get the index for the given member. Unlike the key in the member
/// map, the index starts from 0.
- unsigned getIndex(const InstTy *Instr) const {
+ uint32_t getIndex(const InstTy *Instr) const {
for (auto I : Members) {
if (I.second == Instr)
return I.first - SmallestKey;
@@ -355,12 +385,12 @@
}
private:
- unsigned Factor; // Interleave Factor.
+ uint32_t Factor; // Interleave Factor.
bool Reverse;
- unsigned Align;
- DenseMap<int, InstTy *> Members;
- int SmallestKey = 0;
- int LargestKey = 0;
+ uint32_t Align;
+ DenseMap<int32_t, InstTy *> Members;
+ int32_t SmallestKey = 0;
+ int32_t LargestKey = 0;
// To avoid breaking dependences, vectorized instructions of an interleave
// group should be inserted at either the first load or the last store in