Update prebuilt Clang to r416183b from Android.
https://android.googlesource.com/platform/prebuilts/clang/host/
linux-x86/+/06a71ddac05c22edb2d10b590e1769b3f8619bef
clang 12.0.5 (based on r416183b) from build 7284624.
Change-Id: I277a316abcf47307562d8b748b84870f31a72866
Signed-off-by: Olivier Deprez <olivier.deprez@arm.com>
diff --git a/linux-x64/clang/include/llvm/Analysis/TargetTransformInfo.h b/linux-x64/clang/include/llvm/Analysis/TargetTransformInfo.h
index af1a12d..ee34312 100644
--- a/linux-x64/clang/include/llvm/Analysis/TargetTransformInfo.h
+++ b/linux-x64/clang/include/llvm/Analysis/TargetTransformInfo.h
@@ -21,31 +21,36 @@
#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
-#include "llvm/ADT/Optional.h"
+#include "llvm/IR/InstrTypes.h"
#include "llvm/IR/Operator.h"
#include "llvm/IR/PassManager.h"
#include "llvm/Pass.h"
#include "llvm/Support/AtomicOrdering.h"
#include "llvm/Support/DataTypes.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/ScalarEvolution.h"
-#include "llvm/IR/Dominators.h"
-#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Support/InstructionCost.h"
#include <functional>
namespace llvm {
namespace Intrinsic {
-enum ID : unsigned;
+typedef unsigned ID;
}
class AssumptionCache;
+class BlockFrequencyInfo;
+class DominatorTree;
class BranchInst;
+class CallBase;
+class ExtractElementInst;
class Function;
class GlobalValue;
+class InstCombiner;
class IntrinsicInst;
class LoadInst;
+class LoopAccessInfo;
class Loop;
+class LoopInfo;
+class ProfileSummaryInfo;
class SCEV;
class ScalarEvolution;
class StoreInst;
@@ -54,6 +59,8 @@
class Type;
class User;
class Value;
+struct KnownBits;
+template <typename T> class Optional;
/// Information about a load/store intrinsic defined by the target.
struct MemIntrinsicInfo {
@@ -75,7 +82,8 @@
bool isUnordered() const {
return (Ordering == AtomicOrdering::NotAtomic ||
- Ordering == AtomicOrdering::Unordered) && !IsVolatile;
+ Ordering == AtomicOrdering::Unordered) &&
+ !IsVolatile;
}
};
@@ -86,7 +94,7 @@
Loop *L = nullptr;
BasicBlock *ExitBlock = nullptr;
BranchInst *ExitBranch = nullptr;
- const SCEV *ExitCount = nullptr;
+ const SCEV *TripCount = nullptr;
IntegerType *CountType = nullptr;
Value *LoopDecrement = nullptr; // Decrement the loop counter by this
// value in every iteration.
@@ -103,6 +111,68 @@
bool canAnalyze(LoopInfo &LI);
};
+class IntrinsicCostAttributes {
+ const IntrinsicInst *II = nullptr;
+ Type *RetTy = nullptr;
+ Intrinsic::ID IID;
+ SmallVector<Type *, 4> ParamTys;
+ SmallVector<const Value *, 4> Arguments;
+ FastMathFlags FMF;
+ ElementCount VF = ElementCount::getFixed(1);
+ // If ScalarizationCost is UINT_MAX, the cost of scalarizing the
+ // arguments and the return value will be computed based on types.
+ unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
+
+public:
+ IntrinsicCostAttributes(const IntrinsicInst &I);
+
+ IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI);
+
+ IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
+ ElementCount Factor);
+
+ IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,
+ ElementCount Factor, unsigned ScalarCost);
+
+ IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
+ ArrayRef<Type *> Tys, FastMathFlags Flags);
+
+ IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
+ ArrayRef<Type *> Tys, FastMathFlags Flags,
+ unsigned ScalarCost);
+
+ IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
+ ArrayRef<Type *> Tys, FastMathFlags Flags,
+ unsigned ScalarCost,
+ const IntrinsicInst *I);
+
+ IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
+ ArrayRef<Type *> Tys);
+
+ IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,
+ ArrayRef<const Value *> Args);
+
+ Intrinsic::ID getID() const { return IID; }
+ const IntrinsicInst *getInst() const { return II; }
+ Type *getReturnType() const { return RetTy; }
+ ElementCount getVectorFactor() const { return VF; }
+ FastMathFlags getFlags() const { return FMF; }
+ unsigned getScalarizationCost() const { return ScalarizationCost; }
+ const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }
+ const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }
+
+ bool isTypeBasedOnly() const {
+ return Arguments.empty();
+ }
+
+ bool skipScalarizationCost() const {
+ return ScalarizationCost != std::numeric_limits<unsigned>::max();
+ }
+};
+
+class TargetTransformInfo;
+typedef TargetTransformInfo TTI;
+
/// This pass provides access to the codegen interfaces that are needed
/// for IR-level transformations.
class TargetTransformInfo {
@@ -151,7 +221,8 @@
enum TargetCostKind {
TCK_RecipThroughput, ///< Reciprocal throughput.
TCK_Latency, ///< The latency of instruction.
- TCK_CodeSize ///< Instruction code size.
+ TCK_CodeSize, ///< Instruction code size.
+ TCK_SizeAndLatency ///< The weighted sum of size and latency.
};
/// Query the cost of a specified instruction.
@@ -161,18 +232,24 @@
///
/// Note, this method does not cache the cost calculation and it
/// can be expensive in some cases.
- int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
- switch (kind){
+ InstructionCost getInstructionCost(const Instruction *I,
+ enum TargetCostKind kind) const {
+ InstructionCost Cost;
+ switch (kind) {
case TCK_RecipThroughput:
- return getInstructionThroughput(I);
-
+ Cost = getInstructionThroughput(I);
+ break;
case TCK_Latency:
- return getInstructionLatency(I);
-
+ Cost = getInstructionLatency(I);
+ break;
case TCK_CodeSize:
- return getUserCost(I);
+ case TCK_SizeAndLatency:
+ Cost = getUserCost(I, kind);
+ break;
}
- llvm_unreachable("Unknown instruction cost kind");
+ if (Cost == -1)
+ Cost.setInvalid();
+ return Cost;
}
/// Underlying constants for 'cost' values in this interface.
@@ -199,61 +276,10 @@
TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
};
- /// Estimate the cost of a specific operation when lowered.
- ///
- /// Note that this is designed to work on an arbitrary synthetic opcode, and
- /// thus work for hypothetical queries before an instruction has even been
- /// formed. However, this does *not* work for GEPs, and must not be called
- /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
- /// analyzing a GEP's cost required more information.
- ///
- /// Typically only the result type is required, and the operand type can be
- /// omitted. However, if the opcode is one of the cast instructions, the
- /// operand type is required.
- ///
- /// The returned cost is defined in terms of \c TargetCostConstants, see its
- /// comments for a detailed explanation of the cost values.
- int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
-
/// Estimate the cost of a GEP operation when lowered.
- ///
- /// The contract for this function is the same as \c getOperationCost except
- /// that it supports an interface that provides extra information specific to
- /// the GEP operation.
int getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands) const;
-
- /// Estimate the cost of a EXT operation when lowered.
- ///
- /// The contract for this function is the same as \c getOperationCost except
- /// that it supports an interface that provides extra information specific to
- /// the EXT operation.
- int getExtCost(const Instruction *I, const Value *Src) const;
-
- /// Estimate the cost of a function call when lowered.
- ///
- /// The contract for this is the same as \c getOperationCost except that it
- /// supports an interface that provides extra information specific to call
- /// instructions.
- ///
- /// This is the most basic query for estimating call cost: it only knows the
- /// function type and (potentially) the number of arguments at the call site.
- /// The latter is only interesting for varargs function types.
- int getCallCost(FunctionType *FTy, int NumArgs = -1,
- const User *U = nullptr) const;
-
- /// Estimate the cost of calling a specific function when lowered.
- ///
- /// This overload adds the ability to reason about the particular function
- /// being called in the event it is a library call with special lowering.
- int getCallCost(const Function *F, int NumArgs = -1,
- const User *U = nullptr) const;
-
- /// Estimate the cost of calling a specific function when lowered.
- ///
- /// This overload allows specifying a set of candidate argument values.
- int getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
- const User *U = nullptr) const;
+ ArrayRef<const Value *> Operands,
+ TargetCostKind CostKind = TCK_SizeAndLatency) const;
/// \returns A value by which our inlining threshold should be multiplied.
/// This is primarily used to bump up the inlining threshold wholesale on
@@ -263,19 +289,17 @@
/// individual classes of instructions would be better.
unsigned getInliningThresholdMultiplier() const;
- /// Estimate the cost of an intrinsic when lowered.
+ /// \returns Vector bonus in percent.
///
- /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
- int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys,
- const User *U = nullptr) const;
-
- /// Estimate the cost of an intrinsic when lowered.
- ///
- /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
- int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments,
- const User *U = nullptr) const;
+ /// Vector bonuses: We want to more aggressively inline vector-dense kernels
+ /// and apply this bonus based on the percentage of vector instructions. A
+ /// bonus is applied if the vector instructions exceed 50% and half that
+ /// amount is applied if it exceeds 10%. Note that these bonuses are some what
+ /// arbitrary and evolved over time by accident as much as because they are
+ /// principled bonuses.
+ /// FIXME: It would be nice to base the bonus values on something more
+ /// scientific. A target may has no bonus on vector instructions.
+ int getInlinerVectorBonusPercent() const;
/// \return the expected cost of a memcpy, which could e.g. depend on the
/// source/destination type and alignment and the number of bytes copied.
@@ -285,20 +309,14 @@
/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
/// table.
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
- unsigned &JTSize) const;
+ unsigned &JTSize,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) const;
/// Estimate the cost of a given IR user when lowered.
///
/// This can estimate the cost of either a ConstantExpr or Instruction when
- /// lowered. It has two primary advantages over the \c getOperationCost and
- /// \c getGEPCost above, and one significant disadvantage: it can only be
- /// used when the IR construct has already been formed.
- ///
- /// The advantages are that it can inspect the SSA use graph to reason more
- /// accurately about the cost. For example, all-constant-GEPs can often be
- /// folded into a load or other instruction, but if they are used in some
- /// other context they may not be folded. This routine can distinguish such
- /// cases.
+ /// lowered.
///
/// \p Operands is a list of operands which can be a result of transformations
/// of the current operands. The number of the operands on the list must equal
@@ -308,14 +326,14 @@
///
/// The returned cost is defined in terms of \c TargetCostConstants, see its
/// comments for a detailed explanation of the cost values.
- int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
+ int getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TargetCostKind CostKind) const;
/// This is a helper function which calls the two-argument getUserCost
/// with \p Operands which are the current operands U has.
- int getUserCost(const User *U) const {
- SmallVector<const Value *, 4> Operands(U->value_op_begin(),
- U->value_op_end());
- return getUserCost(U, Operands);
+ int getUserCost(const User *U, TargetCostKind CostKind) const {
+ SmallVector<const Value *, 4> Operands(U->operand_values());
+ return getUserCost(U, Operands, CostKind);
}
/// Return true if branch divergence exists.
@@ -325,12 +343,16 @@
/// branches.
bool hasBranchDivergence() const;
+ /// Return true if the target prefers to use GPU divergence analysis to
+ /// replace the legacy version.
+ bool useGPUDivergenceAnalysis() const;
+
/// Returns whether V is a source of divergence.
///
/// This function provides the target-dependent information for
- /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first
- /// builds the dependency graph, and then runs the reachability algorithm
- /// starting with the sources of divergence.
+ /// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis
+ /// first builds the dependency graph, and then runs the reachability
+ /// algorithm starting with the sources of divergence.
bool isSourceOfDivergence(const Value *V) const;
// Returns true for the target specific
@@ -356,6 +378,25 @@
/// optimize away.
unsigned getFlatAddressSpace() const;
+ /// Return any intrinsic address operand indexes which may be rewritten if
+ /// they use a flat address space pointer.
+ ///
+ /// \returns true if the intrinsic was handled.
+ bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const;
+
+ bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;
+
+ unsigned getAssumedAddrSpace(const Value *V) const;
+
+ /// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p
+ /// NewV, which has a different address space. This should happen for every
+ /// operand index that collectFlatAddressOperands returned for the intrinsic.
+ /// \returns nullptr if the intrinsic was not handled. Otherwise, returns the
+ /// new value (which may be the original \p II with modified operands).
+ Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
+ Value *NewV) const;
+
/// Test whether calls to a function lower to actual program function
/// calls.
///
@@ -419,11 +460,6 @@
/// transformation will select an unrolling factor based on the current cost
/// threshold and other factors.
unsigned Count;
- /// A forced peeling factor (the number of bodied of the original loop
- /// that should be peeled off before the loop body). When set to 0, the
- /// unrolling transformation will select a peeling factor based on profile
- /// information and other factors.
- unsigned PeelCount;
/// Default unroll count for loops with run-time trip count.
unsigned DefaultUnrollRuntimeCount;
// Set the maximum unrolling factor. The unrolling factor may be selected
@@ -457,8 +493,6 @@
bool Force;
/// Allow using trip count upper bound to unroll loops.
bool UpperBound;
- /// Allow peeling off loop iterations for loops with low dynamic tripcount.
- bool AllowPeeling;
/// Allow unrolling of all the iterations of the runtime loop remainder.
bool UnrollRemainder;
/// Allow unroll and jam. Used to enable unroll and jam for the target.
@@ -468,6 +502,9 @@
/// This value is used in the same manner to limit the size of the inner
/// loop.
unsigned UnrollAndJamInnerLoopThreshold;
+ /// Don't allow loop unrolling to simulate more than this number of
+ /// iterations when checking full unroll profitability
+ unsigned MaxIterationsCountToAnalyze;
};
/// Get target-customized preferences for the generic loop unrolling
@@ -479,10 +516,65 @@
/// Query the target whether it would be profitable to convert the given loop
/// into a hardware loop.
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
- AssumptionCache &AC,
- TargetLibraryInfo *LibInfo,
+ AssumptionCache &AC, TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) const;
+ /// Query the target whether it would be prefered to create a predicated
+ /// vector loop, which can avoid the need to emit a scalar epilogue loop.
+ bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
+ AssumptionCache &AC, TargetLibraryInfo *TLI,
+ DominatorTree *DT,
+ const LoopAccessInfo *LAI) const;
+
+ /// Query the target whether lowering of the llvm.get.active.lane.mask
+ /// intrinsic is supported.
+ bool emitGetActiveLaneMask() const;
+
+ // Parameters that control the loop peeling transformation
+ struct PeelingPreferences {
+ /// A forced peeling factor (the number of bodied of the original loop
+ /// that should be peeled off before the loop body). When set to 0, the
+ /// a peeling factor based on profile information and other factors.
+ unsigned PeelCount;
+ /// Allow peeling off loop iterations.
+ bool AllowPeeling;
+ /// Allow peeling off loop iterations for loop nests.
+ bool AllowLoopNestsPeeling;
+ /// Allow peeling basing on profile. Uses to enable peeling off all
+ /// iterations basing on provided profile.
+ /// If the value is true the peeling cost model can decide to peel only
+ /// some iterations and in this case it will set this to false.
+ bool PeelProfiledIterations;
+ };
+
+ /// Get target-customized preferences for the generic loop peeling
+ /// transformation. The caller will initialize \p PP with the current
+ /// target-independent defaults with information from \p L and \p SE.
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ PeelingPreferences &PP) const;
+
+ /// Targets can implement their own combinations for target-specific
+ /// intrinsics. This function will be called from the InstCombine pass every
+ /// time a target-specific intrinsic is encountered.
+ ///
+ /// \returns None to not do anything target specific or a value that will be
+ /// returned from the InstCombiner. It is possible to return null and stop
+ /// further processing of the intrinsic by returning nullptr.
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) const;
+ /// Can be used to implement target-specific instruction combining.
+ /// \see instCombineIntrinsic
+ Optional<Value *>
+ simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
+ APInt DemandedMask, KnownBits &Known,
+ bool &KnownBitsComputed) const;
+ /// Can be used to implement target-specific instruction combining.
+ /// \see instCombineIntrinsic
+ Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) const;
/// @}
/// \name Scalar Target Information
@@ -524,6 +616,14 @@
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) const;
+ /// Return true if LSR major cost is number of registers. Targets which
+ /// implement their own isLSRCostLess and unset number of registers as major
+ /// cost should return false, otherwise return true.
+ bool isNumRegsMajorCostOfLSR() const;
+
+ /// \returns true if LSR should not optimize a chain that includes \p I.
+ bool isProfitableLSRChainElement(Instruction *I) const;
+
/// Return true if the target can fuse a compare and branch.
/// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
/// calculation for the instructions in a loop.
@@ -543,20 +643,20 @@
/// modes that operate across loop iterations.
bool shouldFavorBackedgeIndex(const Loop *L) const;
- /// Return true if the target supports masked load.
- bool isLegalMaskedStore(Type *DataType) const;
/// Return true if the target supports masked store.
- bool isLegalMaskedLoad(Type *DataType) const;
+ bool isLegalMaskedStore(Type *DataType, Align Alignment) const;
+ /// Return true if the target supports masked load.
+ bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;
/// Return true if the target supports nontemporal store.
- bool isLegalNTStore(Type *DataType, unsigned Alignment) const;
+ bool isLegalNTStore(Type *DataType, Align Alignment) const;
/// Return true if the target supports nontemporal load.
- bool isLegalNTLoad(Type *DataType, unsigned Alignment) const;
+ bool isLegalNTLoad(Type *DataType, Align Alignment) const;
/// Return true if the target supports masked scatter.
- bool isLegalMaskedScatter(Type *DataType) const;
+ bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;
/// Return true if the target supports masked gather.
- bool isLegalMaskedGather(Type *DataType) const;
+ bool isLegalMaskedGather(Type *DataType, Align Alignment) const;
/// Return true if the target supports masked compress store.
bool isLegalMaskedCompressStore(Type *DataType) const;
@@ -610,11 +710,8 @@
/// Return true if this type is legal.
bool isTypeLegal(Type *Ty) const;
- /// Returns the target's jmp_buf alignment in bytes.
- unsigned getJumpBufAlignment() const;
-
- /// Returns the target's jmp_buf size in bytes.
- unsigned getJumpBufSize() const;
+ /// Returns the estimated number of registers required to represent \p Ty.
+ unsigned getRegUsageForType(Type *Ty) const;
/// Return true if switches should be turned into lookup tables for the
/// target.
@@ -628,8 +725,15 @@
/// should use coldcc calling convention.
bool useColdCCForColdCall(Function &F) const;
- unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
+ /// Estimate the overhead of scalarizing an instruction. Insert and Extract
+ /// are set if the demanded result elements need to be inserted and/or
+ /// extracted from vectors.
+ unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
+ bool Insert, bool Extract) const;
+ /// Estimate the overhead of scalarizing an instructions unique
+ /// non-constant operands. The types of the arguments are ordinarily
+ /// scalar, in which case the costs are multiplied with VF.
unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
unsigned VF) const;
@@ -689,8 +793,8 @@
bool isFPVectorizationPotentiallyUnsafe() const;
/// Determine if the target supports unaligned memory accesses.
- bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
- unsigned BitWidth, unsigned AddressSpace = 0,
+ bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
+ unsigned AddressSpace = 0,
unsigned Alignment = 1,
bool *Fast = nullptr) const;
@@ -712,15 +816,16 @@
/// Return the expected cost of materializing for the given integer
/// immediate of the specified type.
- int getIntImmCost(const APInt &Imm, Type *Ty) const;
+ int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const;
/// Return the expected cost of materialization for the given integer
/// immediate of the specified type for a given instruction. The cost can be
/// zero if the immediate can be folded into the specified instruction.
- int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty) const;
- int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) const;
+ int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind,
+ Instruction *Inst = nullptr) const;
+ int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty, TargetCostKind CostKind) const;
/// Return the expected cost for the given integer when optimising
/// for size. This is different than the other integer immediate cost
@@ -738,20 +843,54 @@
/// The various kinds of shuffle patterns for vector queries.
enum ShuffleKind {
- SK_Broadcast, ///< Broadcast element 0 to all other elements.
- SK_Reverse, ///< Reverse the order of the vector.
- SK_Select, ///< Selects elements from the corresponding lane of
- ///< either source operand. This is equivalent to a
- ///< vector select with a constant condition operand.
- SK_Transpose, ///< Transpose two vectors.
- SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
- SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
- SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
- ///< with any shuffle mask.
- SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
- ///< shuffle mask.
+ SK_Broadcast, ///< Broadcast element 0 to all other elements.
+ SK_Reverse, ///< Reverse the order of the vector.
+ SK_Select, ///< Selects elements from the corresponding lane of
+ ///< either source operand. This is equivalent to a
+ ///< vector select with a constant condition operand.
+ SK_Transpose, ///< Transpose two vectors.
+ SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
+ SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.
+ SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
+ ///< with any shuffle mask.
+ SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
+ ///< shuffle mask.
};
+ /// Kind of the reduction data.
+ enum ReductionKind {
+ RK_None, /// Not a reduction.
+ RK_Arithmetic, /// Binary reduction data.
+ RK_MinMax, /// Min/max reduction data.
+ RK_UnsignedMinMax, /// Unsigned min/max reduction data.
+ };
+
+ /// Contains opcode + LHS/RHS parts of the reduction operations.
+ struct ReductionData {
+ ReductionData() = delete;
+ ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)
+ : Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {
+ assert(Kind != RK_None && "expected binary or min/max reduction only.");
+ }
+ unsigned Opcode = 0;
+ Value *LHS = nullptr;
+ Value *RHS = nullptr;
+ ReductionKind Kind = RK_None;
+ bool hasSameData(ReductionData &RD) const {
+ return Kind == RD.Kind && Opcode == RD.Opcode;
+ }
+ };
+
+ static ReductionKind matchPairwiseReduction(
+ const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
+
+ static ReductionKind matchVectorSplittingReduction(
+ const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);
+
+ static ReductionKind matchVectorReduction(const ExtractElementInst *ReduxRoot,
+ unsigned &Opcode, VectorType *&Ty,
+ bool &IsPairwise);
+
/// Additional information about an operand's possible values.
enum OperandValueKind {
OK_AnyValue, // Operand can have any value.
@@ -763,10 +902,24 @@
/// Additional properties of an operand's values.
enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
- /// \return The number of scalar or vector registers that the target has.
- /// If 'Vectors' is true, it returns the number of vector registers. If it is
- /// set to false, it returns the number of scalar registers.
- unsigned getNumberOfRegisters(bool Vector) const;
+ /// \return the number of registers in the target-provided register class.
+ unsigned getNumberOfRegisters(unsigned ClassID) const;
+
+ /// \return the target-provided register class ID for the provided type,
+ /// accounting for type promotion and other type-legalization techniques that
+ /// the target might apply. However, it specifically does not account for the
+ /// scalarization or splitting of vector types. Should a vector type require
+ /// scalarization or splitting into multiple underlying vector registers, that
+ /// type should be mapped to a register class containing no registers.
+ /// Specifically, this is designed to provide a simple, high-level view of the
+ /// register allocation later performed by the backend. These register classes
+ /// don't necessarily map onto the register classes used by the backend.
+ /// FIXME: It's not currently possible to determine how many registers
+ /// are used by the provided type.
+ unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;
+
+ /// \return the target-provided register class name
+ const char *getRegisterClassName(unsigned ClassID) const;
/// \return The width of the largest scalar or vector register type.
unsigned getRegisterBitWidth(bool Vector) const;
@@ -774,6 +927,10 @@
/// \return The width of the smallest vector register type.
unsigned getMinVectorRegisterBitWidth() const;
+ /// \return The maximum value of vscale if the target specifies an
+ /// architectural maximum vector length, and None otherwise.
+ Optional<unsigned> getMaxVScale() const;
+
/// \return True if the vectorization factor should be chosen to
/// make the vector of the smallest element type match the size of a
/// vector register. For wider element types, this could result in
@@ -787,6 +944,11 @@
/// applies when shouldMaximizeVectorBandwidth returns true.
unsigned getMinimumVF(unsigned ElemWidth) const;
+ /// \return The maximum vectorization factor for types of given element
+ /// bit width and opcode, or 0 if there is no maximum VF.
+ /// Currently only used by the SLP vectorizer.
+ unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;
+
/// \return True if it should be considered for address type promotion.
/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
/// profitable without finding other extensions fed by the same input.
@@ -798,8 +960,8 @@
/// The possible cache levels
enum class CacheLevel {
- L1D, // The L1 data cache
- L2D, // The L2 data cache
+ L1D, // The L1 data cache
+ L2D, // The L2 data cache
// We currently do not model L3 caches, as their sizes differ widely between
// microarchitectures. Also, we currently do not have a use for L3 cache
@@ -807,32 +969,52 @@
};
/// \return The size of the cache level in bytes, if available.
- llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
+ Optional<unsigned> getCacheSize(CacheLevel Level) const;
/// \return The associativity of the cache level, if available.
- llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
+ Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
- /// \return How much before a load we should place the prefetch instruction.
- /// This is currently measured in number of instructions.
+ /// \return How much before a load we should place the prefetch
+ /// instruction. This is currently measured in number of
+ /// instructions.
unsigned getPrefetchDistance() const;
- /// \return Some HW prefetchers can handle accesses up to a certain constant
- /// stride. This is the minimum stride in bytes where it makes sense to start
- /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
- unsigned getMinPrefetchStride() const;
+ /// Some HW prefetchers can handle accesses up to a certain constant stride.
+ /// Sometimes prefetching is beneficial even below the HW prefetcher limit,
+ /// and the arguments provided are meant to serve as a basis for deciding this
+ /// for a particular loop.
+ ///
+ /// \param NumMemAccesses Number of memory accesses in the loop.
+ /// \param NumStridedMemAccesses Number of the memory accesses that
+ /// ScalarEvolution could find a known stride
+ /// for.
+ /// \param NumPrefetches Number of software prefetches that will be
+ /// emitted as determined by the addresses
+ /// involved and the cache line size.
+ /// \param HasCall True if the loop contains a call.
+ ///
+ /// \return This is the minimum stride in bytes where it makes sense to start
+ /// adding SW prefetches. The default is 1, i.e. prefetch with any
+ /// stride.
+ unsigned getMinPrefetchStride(unsigned NumMemAccesses,
+ unsigned NumStridedMemAccesses,
+ unsigned NumPrefetches, bool HasCall) const;
- /// \return The maximum number of iterations to prefetch ahead. If the
- /// required number of iterations is more than this number, no prefetching is
- /// performed.
+ /// \return The maximum number of iterations to prefetch ahead. If
+ /// the required number of iterations is more than this number, no
+ /// prefetching is performed.
unsigned getMaxPrefetchIterationsAhead() const;
+ /// \return True if prefetching should also be done for writes.
+ bool enableWritePrefetching() const;
+
/// \return The maximum interleave factor that any transform should try to
/// perform for this target. This number depends on the level of parallelism
/// and the number of execution units in the CPU.
unsigned getMaxInterleaveFactor(unsigned VF) const;
/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.
- static OperandValueKind getOperandInfo(Value *V,
+ static OperandValueKind getOperandInfo(const Value *V,
OperandValueProperties &OpProps);
/// This is an approximation of reciprocal throughput of a math/logic op.
@@ -849,25 +1031,68 @@
/// \p Args is an optional argument which holds the instruction operands
/// values so the TTI can analyze those values searching for special
/// cases or optimizations based on those values.
+ /// \p CxtI is the optional original context instruction, if one exists, to
+ /// provide even more information.
int getArithmeticInstrCost(
- unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
+ unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ OperandValueKind Opd1Info = OK_AnyValue,
OperandValueKind Opd2Info = OK_AnyValue,
OperandValueProperties Opd1PropInfo = OP_None,
OperandValueProperties Opd2PropInfo = OP_None,
- ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
+ ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
+ const Instruction *CxtI = nullptr) const;
/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
/// The index and subtype parameters are used by the subvector insertion and
/// extraction shuffle kinds to show the insert/extract point and the type of
/// the subvector being inserted/extracted.
/// NOTE: For subvector extractions Tp represents the source type.
- int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
- Type *SubTp = nullptr) const;
+ int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index = 0,
+ VectorType *SubTp = nullptr) const;
+
+ /// Represents a hint about the context in which a cast is used.
+ ///
+ /// For zext/sext, the context of the cast is the operand, which must be a
+ /// load of some kind. For trunc, the context is of the cast is the single
+ /// user of the instruction, which must be a store of some kind.
+ ///
+ /// This enum allows the vectorizer to give getCastInstrCost an idea of the
+ /// type of cast it's dealing with, as not every cast is equal. For instance,
+ /// the zext of a load may be free, but the zext of an interleaving load can
+ //// be (very) expensive!
+ ///
+ /// See \c getCastContextHint to compute a CastContextHint from a cast
+ /// Instruction*. Callers can use it if they don't need to override the
+ /// context and just want it to be calculated from the instruction.
+ ///
+ /// FIXME: This handles the types of load/store that the vectorizer can
+ /// produce, which are the cases where the context instruction is most
+ /// likely to be incorrect. There are other situations where that can happen
+ /// too, which might be handled here but in the long run a more general
+ /// solution of costing multiple instructions at the same times may be better.
+ enum class CastContextHint : uint8_t {
+ None, ///< The cast is not used with a load/store of any kind.
+ Normal, ///< The cast is used with a normal load/store.
+ Masked, ///< The cast is used with a masked load/store.
+ GatherScatter, ///< The cast is used with a gather/scatter.
+ Interleave, ///< The cast is used with an interleaved load/store.
+ Reversed, ///< The cast is used with a reversed load/store.
+ };
+
+ /// Calculates a CastContextHint from \p I.
+ /// This should be used by callers of getCastInstrCost if they wish to
+ /// determine the context from some instruction.
+ /// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,
+ /// or if it's another type of cast.
+ static CastContextHint getCastContextHint(const Instruction *I);
/// \return The expected cost of cast instructions, such as bitcast, trunc,
/// zext, etc. If there is an existing instruction that holds Opcode, it
/// may be passed in the 'I' parameter.
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ TTI::CastContextHint CCH,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,
const Instruction *I = nullptr) const;
/// \return The expected cost of a sign- or zero-extended vector extract. Use
@@ -877,25 +1102,34 @@
/// \return The expected cost of control-flow related instructions such as
/// Phi, Ret, Br.
- int getCFInstrCost(unsigned Opcode) const;
+ int getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
/// \returns The expected cost of compare and select instructions. If there
/// is an existing instruction that holds Opcode, it may be passed in the
- /// 'I' parameter.
- int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy = nullptr, const Instruction *I = nullptr) const;
+ /// 'I' parameter. The \p VecPred parameter can be used to indicate the select
+ /// is using a compare with the specified predicate as condition. When vector
+ /// types are passed, \p VecPred must be used for all lanes.
+ int getCmpSelInstrCost(
+ unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,
+ CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ const Instruction *I = nullptr) const;
/// \return The expected cost of vector Insert and Extract.
/// Use -1 to indicate that there is no information on the index value.
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
/// \return The cost of Load and Store instructions.
- int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace, const Instruction *I = nullptr) const;
+ int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ const Instruction *I = nullptr) const;
/// \return The cost of masked Load and Store instructions.
- int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) const;
+ int getMaskedMemoryOpCost(
+ unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
/// \return The cost of Gather or Scatter operation
/// \p Opcode - is a type of memory access Load or Store
@@ -904,8 +1138,12 @@
/// \p VariableMask - true when the memory access is predicated with a mask
/// that is not a compile-time constant
/// \p Alignment - alignment of single element
- int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
- bool VariableMask, unsigned Alignment) const;
+ /// \p I - the optional original context instruction, if one exists, e.g. the
+ /// load/store to transform or the call to the gather/scatter intrinsic
+ int getGatherScatterOpCost(
+ unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,
+ Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ const Instruction *I = nullptr) const;
/// \return The cost of the interleaved memory operation.
/// \p Opcode is the memory operation code
@@ -917,11 +1155,11 @@
/// \p AddressSpace is address space of the pointer.
/// \p UseMaskForCond indicates if the memory access is predicated.
/// \p UseMaskForGaps indicates if gaps should be masked.
- int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
- ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false) const;
+ int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false) const;
/// Calculate the cost of performing a vector reduction.
///
@@ -936,27 +1174,23 @@
/// Split:
/// (v0, v1, v2, v3)
/// ((v0+v2), (v1+v3), undef, undef)
- int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwiseForm) const;
- int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
- bool IsUnsigned) const;
+ int getArithmeticReductionCost(
+ unsigned Opcode, VectorType *Ty, bool IsPairwiseForm,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
+
+ int getMinMaxReductionCost(
+ VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,
+ TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;
/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
/// Three cases are handled: 1. scalar instruction 2. vector instruction
- /// 3. scalar instruction which is to be vectorized with VF.
- int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF,
- unsigned VF = 1) const;
-
- /// \returns The cost of Intrinsic instructions. Types analysis only.
- /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
- /// arguments and the return value will be computed based on types.
- int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed = UINT_MAX) const;
+ /// 3. scalar instruction which is to be vectorized.
+ int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) const;
/// \returns The cost of Call instructions.
- int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
+ int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;
/// \returns The number of pieces into which the provided type must be
/// split during legalization. Zero is returned when the answer is unknown.
@@ -998,6 +1232,7 @@
/// \returns The type to use in a loop expansion of a memcpy call.
Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
+ unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign, unsigned DestAlign) const;
/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
@@ -1006,11 +1241,10 @@
/// Calculates the operand types to use when copying \p RemainingBytes of
/// memory, where source and destination alignments are \p SrcAlign and
/// \p DestAlign respectively.
- void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
- LLVMContext &Context,
- unsigned RemainingBytes,
- unsigned SrcAlign,
- unsigned DestAlign) const;
+ void getMemcpyLoopResidualLoweringType(
+ SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
+ unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
+ unsigned SrcAlign, unsigned DestAlign) const;
/// \returns True if the two functions have compatible attributes for inlining
/// purposes.
@@ -1027,11 +1261,11 @@
/// The type of load/store indexing.
enum MemIndexedMode {
- MIM_Unindexed, ///< No indexing.
- MIM_PreInc, ///< Pre-incrementing.
- MIM_PreDec, ///< Pre-decrementing.
- MIM_PostInc, ///< Post-incrementing.
- MIM_PostDec ///< Post-decrementing.
+ MIM_Unindexed, ///< No indexing.
+ MIM_PreInc, ///< Pre-incrementing.
+ MIM_PreDec, ///< Pre-decrementing.
+ MIM_PostInc, ///< Post-incrementing.
+ MIM_PostDec ///< Post-decrementing.
};
/// \returns True if the specified indexed load for the given type is legal.
@@ -1051,13 +1285,11 @@
bool isLegalToVectorizeStore(StoreInst *SI) const;
/// \returns True if it is legal to vectorize the given load chain.
- bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
/// \returns True if it is legal to vectorize the given store chain.
- bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const;
/// \returns The new vector factor value if the target doesn't support \p
@@ -1085,6 +1317,24 @@
bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags Flags) const;
+ /// \returns True if the target prefers reductions in loop.
+ bool preferInLoopReduction(unsigned Opcode, Type *Ty,
+ ReductionFlags Flags) const;
+
+ /// \returns True if the target prefers reductions select kept in the loop
+ /// when tail folding. i.e.
+ /// loop:
+ /// p = phi (0, s)
+ /// a = add (p, x)
+ /// s = select (mask, a, p)
+ /// vecreduce.add(s)
+ ///
+ /// As opposed to the normal scheme of p = phi (0, a) which allows the select
+ /// to be pulled out of the loop. If the select(.., add, ..) can be predicated
+ /// by the target, this can lead to cleaner code generation.
+ bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
+ ReductionFlags Flags) const;
+
/// \returns True if the target wants to expand the given reduction intrinsic
/// into a shuffle sequence.
bool shouldExpandReduction(const IntrinsicInst *II) const;
@@ -1093,6 +1343,18 @@
/// to a stack reload.
unsigned getGISelRematGlobalCost() const;
+ /// \returns True if the target supports scalable vectors.
+ bool supportsScalableVectors() const;
+
+ /// \name Vector Predication Information
+ /// @{
+ /// Whether the target supports the %evl parameter of VP intrinsic efficiently
+ /// in hardware. (see LLVM Language Reference - "Vector Predication
+ /// Intrinsics") Use of %evl is discouraged when that is not the case.
+ bool hasActiveVectorLength() const;
+
+ /// @}
+
/// @}
private:
@@ -1119,57 +1381,77 @@
public:
virtual ~Concept() = 0;
virtual const DataLayout &getDataLayout() const = 0;
- virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands) = 0;
- virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
- virtual int getCallCost(FunctionType *FTy, int NumArgs, const User *U) = 0;
- virtual int getCallCost(const Function *F, int NumArgs, const User *U) = 0;
- virtual int getCallCost(const Function *F,
- ArrayRef<const Value *> Arguments, const User *U) = 0;
+ ArrayRef<const Value *> Operands,
+ TTI::TargetCostKind CostKind) = 0;
virtual unsigned getInliningThresholdMultiplier() = 0;
- virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys, const User *U) = 0;
- virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments,
- const User *U) = 0;
+ virtual int getInlinerVectorBonusPercent() = 0;
virtual int getMemcpyCost(const Instruction *I) = 0;
- virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
- unsigned &JTSize) = 0;
- virtual int
- getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
+ virtual unsigned
+ getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) = 0;
+ virtual int getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TargetCostKind CostKind) = 0;
virtual bool hasBranchDivergence() = 0;
+ virtual bool useGPUDivergenceAnalysis() = 0;
virtual bool isSourceOfDivergence(const Value *V) = 0;
virtual bool isAlwaysUniform(const Value *V) = 0;
virtual unsigned getFlatAddressSpace() = 0;
+ virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const = 0;
+ virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;
+ virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;
+ virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,
+ Value *OldV,
+ Value *NewV) const = 0;
virtual bool isLoweredToCall(const Function *F) = 0;
virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
UnrollingPreferences &UP) = 0;
+ virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ PeelingPreferences &PP) = 0;
virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
AssumptionCache &AC,
TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) = 0;
+ virtual bool
+ preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
+ AssumptionCache &AC, TargetLibraryInfo *TLI,
+ DominatorTree *DT, const LoopAccessInfo *LAI) = 0;
+ virtual bool emitGetActiveLaneMask() = 0;
+ virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) = 0;
+ virtual Optional<Value *>
+ simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
+ APInt DemandedMask, KnownBits &Known,
+ bool &KnownBitsComputed) = 0;
+ virtual Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) = 0;
virtual bool isLegalAddImmediate(int64_t Imm) = 0;
virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
int64_t BaseOffset, bool HasBaseReg,
- int64_t Scale,
- unsigned AddrSpace,
+ int64_t Scale, unsigned AddrSpace,
Instruction *I) = 0;
virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) = 0;
+ virtual bool isNumRegsMajorCostOfLSR() = 0;
+ virtual bool isProfitableLSRChainElement(Instruction *I) = 0;
virtual bool canMacroFuseCmp() = 0;
virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,
LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
TargetLibraryInfo *LibInfo) = 0;
virtual bool shouldFavorPostInc() const = 0;
virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;
- virtual bool isLegalMaskedStore(Type *DataType) = 0;
- virtual bool isLegalMaskedLoad(Type *DataType) = 0;
- virtual bool isLegalNTStore(Type *DataType, unsigned Alignment) = 0;
- virtual bool isLegalNTLoad(Type *DataType, unsigned Alignment) = 0;
- virtual bool isLegalMaskedScatter(Type *DataType) = 0;
- virtual bool isLegalMaskedGather(Type *DataType) = 0;
+ virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;
+ virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;
+ virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;
+ virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;
+ virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;
+ virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;
virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;
virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;
virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
@@ -1183,15 +1465,16 @@
virtual bool isProfitableToHoist(Instruction *I) = 0;
virtual bool useAA() = 0;
virtual bool isTypeLegal(Type *Ty) = 0;
- virtual unsigned getJumpBufAlignment() = 0;
- virtual unsigned getJumpBufSize() = 0;
+ virtual unsigned getRegUsageForType(Type *Ty) = 0;
virtual bool shouldBuildLookupTables() = 0;
virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
virtual bool useColdCCForColdCall(Function &F) = 0;
+ virtual unsigned getScalarizationOverhead(VectorType *Ty,
+ const APInt &DemandedElts,
+ bool Insert, bool Extract) = 0;
virtual unsigned
- getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
- virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
- unsigned VF) = 0;
+ getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
+ unsigned VF) = 0;
virtual bool supportsEfficientVectorElementLoadStore() = 0;
virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
virtual MemCmpExpansionOptions
@@ -1208,70 +1491,108 @@
virtual bool haveFastSqrt(Type *Ty) = 0;
virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
virtual int getFPOpCost(Type *Ty) = 0;
- virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty) = 0;
- virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
- virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty) = 0;
- virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) = 0;
- virtual unsigned getNumberOfRegisters(bool Vector) = 0;
+ virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,
+ const APInt &Imm, Type *Ty) = 0;
+ virtual int getIntImmCost(const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind) = 0;
+ virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,
+ Type *Ty, TargetCostKind CostKind,
+ Instruction *Inst = nullptr) = 0;
+ virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,
+ const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind) = 0;
+ virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;
+ virtual unsigned getRegisterClassForType(bool Vector,
+ Type *Ty = nullptr) const = 0;
+ virtual const char *getRegisterClassName(unsigned ClassID) const = 0;
virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
virtual unsigned getMinVectorRegisterBitWidth() = 0;
+ virtual Optional<unsigned> getMaxVScale() const = 0;
virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;
+ virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;
virtual bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
- virtual unsigned getCacheLineSize() = 0;
- virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
- virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
- virtual unsigned getPrefetchDistance() = 0;
- virtual unsigned getMinPrefetchStride() = 0;
- virtual unsigned getMaxPrefetchIterationsAhead() = 0;
+ virtual unsigned getCacheLineSize() const = 0;
+ virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;
+ virtual Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;
+
+ /// \return How much before a load we should place the prefetch
+ /// instruction. This is currently measured in number of
+ /// instructions.
+ virtual unsigned getPrefetchDistance() const = 0;
+
+ /// \return Some HW prefetchers can handle accesses up to a certain
+ /// constant stride. This is the minimum stride in bytes where it
+ /// makes sense to start adding SW prefetches. The default is 1,
+ /// i.e. prefetch with any stride. Sometimes prefetching is beneficial
+ /// even below the HW prefetcher limit, and the arguments provided are
+ /// meant to serve as a basis for deciding this for a particular loop.
+ virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
+ unsigned NumStridedMemAccesses,
+ unsigned NumPrefetches,
+ bool HasCall) const = 0;
+
+ /// \return The maximum number of iterations to prefetch ahead. If
+ /// the required number of iterations is more than this number, no
+ /// prefetching is performed.
+ virtual unsigned getMaxPrefetchIterationsAhead() const = 0;
+
+ /// \return True if prefetching should also be done for writes.
+ virtual bool enableWritePrefetching() const = 0;
+
virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
- virtual unsigned
- getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
- OperandValueKind Opd2Info,
- OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo,
- ArrayRef<const Value *> Args) = 0;
- virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) = 0;
+ virtual unsigned getArithmeticInstrCost(
+ unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind,
+ OperandValueKind Opd1Info,
+ OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,
+ const Instruction *CxtI = nullptr) = 0;
+ virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp) = 0;
virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ CastContextHint CCH,
+ TTI::TargetCostKind CostKind,
const Instruction *I) = 0;
virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
VectorType *VecTy, unsigned Index) = 0;
- virtual int getCFInstrCost(unsigned Opcode) = 0;
- virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
- Type *CondTy, const Instruction *I) = 0;
+ virtual int getCFInstrCost(unsigned Opcode,
+ TTI::TargetCostKind CostKind) = 0;
+ virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) = 0;
virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
unsigned Index) = 0;
- virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace, const Instruction *I) = 0;
- virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
- unsigned Alignment,
- unsigned AddressSpace) = 0;
+ virtual int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I) = 0;
+ virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) = 0;
virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
- Value *Ptr, bool VariableMask,
- unsigned Alignment) = 0;
- virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
- unsigned Factor,
- ArrayRef<unsigned> Indices,
- unsigned Alignment,
- unsigned AddressSpace,
- bool UseMaskForCond = false,
- bool UseMaskForGaps = false) = 0;
- virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwiseForm) = 0;
- virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
- bool IsPairwiseForm, bool IsUnsigned) = 0;
- virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Type *> Tys, FastMathFlags FMF,
- unsigned ScalarizationCostPassed) = 0;
- virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
+ const Value *Ptr, bool VariableMask,
+ Align Alignment,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) = 0;
+
+ virtual int getInterleavedMemoryOpCost(
+ unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
+ Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;
+ virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ bool IsPairwiseForm,
+ TTI::TargetCostKind CostKind) = 0;
+ virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsPairwiseForm, bool IsUnsigned,
+ TTI::TargetCostKind CostKind) = 0;
+ virtual int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) = 0;
virtual int getCallInstrCost(Function *F, Type *RetTy,
- ArrayRef<Type *> Tys) = 0;
+ ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind) = 0;
virtual unsigned getNumberOfParts(Type *Tp) = 0;
virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
const SCEV *Ptr) = 0;
@@ -1282,26 +1603,29 @@
virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
Type *ExpectedType) = 0;
virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
+ unsigned SrcAddrSpace,
+ unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign) const = 0;
virtual void getMemcpyLoopResidualLoweringType(
SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
- unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
+ unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
+ unsigned SrcAlign, unsigned DestAlign) const = 0;
virtual bool areInlineCompatible(const Function *Caller,
const Function *Callee) const = 0;
virtual bool
areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
SmallPtrSetImpl<Argument *> &Args) const = 0;
virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
- virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
+ virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;
virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ Align Alignment,
unsigned AddrSpace) const = 0;
virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ Align Alignment,
unsigned AddrSpace) const = 0;
virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
unsigned ChainSizeInBytes,
@@ -1311,8 +1635,14 @@
VectorType *VecTy) const = 0;
virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
ReductionFlags) const = 0;
+ virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,
+ ReductionFlags) const = 0;
+ virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
+ ReductionFlags) const = 0;
virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
virtual unsigned getGISelRematGlobalCost() const = 0;
+ virtual bool supportsScalableVectors() const = 0;
+ virtual bool hasActiveVectorLength() const = 0;
virtual int getInstructionLatency(const Instruction *I) = 0;
};
@@ -1328,45 +1658,28 @@
return Impl.getDataLayout();
}
- int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
- return Impl.getOperationCost(Opcode, Ty, OpTy);
- }
int getGEPCost(Type *PointeeType, const Value *Ptr,
- ArrayRef<const Value *> Operands) override {
+ ArrayRef<const Value *> Operands,
+ enum TargetTransformInfo::TargetCostKind CostKind) override {
return Impl.getGEPCost(PointeeType, Ptr, Operands);
}
- int getExtCost(const Instruction *I, const Value *Src) override {
- return Impl.getExtCost(I, Src);
- }
- int getCallCost(FunctionType *FTy, int NumArgs, const User *U) override {
- return Impl.getCallCost(FTy, NumArgs, U);
- }
- int getCallCost(const Function *F, int NumArgs, const User *U) override {
- return Impl.getCallCost(F, NumArgs, U);
- }
- int getCallCost(const Function *F,
- ArrayRef<const Value *> Arguments, const User *U) override {
- return Impl.getCallCost(F, Arguments, U);
- }
unsigned getInliningThresholdMultiplier() override {
return Impl.getInliningThresholdMultiplier();
}
- int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<Type *> ParamTys, const User *U = nullptr) override {
- return Impl.getIntrinsicCost(IID, RetTy, ParamTys, U);
- }
- int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
- ArrayRef<const Value *> Arguments,
- const User *U = nullptr) override {
- return Impl.getIntrinsicCost(IID, RetTy, Arguments, U);
+ int getInlinerVectorBonusPercent() override {
+ return Impl.getInlinerVectorBonusPercent();
}
int getMemcpyCost(const Instruction *I) override {
return Impl.getMemcpyCost(I);
}
- int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
- return Impl.getUserCost(U, Operands);
+ int getUserCost(const User *U, ArrayRef<const Value *> Operands,
+ TargetCostKind CostKind) override {
+ return Impl.getUserCost(U, Operands, CostKind);
}
bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
+ bool useGPUDivergenceAnalysis() override {
+ return Impl.useGPUDivergenceAnalysis();
+ }
bool isSourceOfDivergence(const Value *V) override {
return Impl.isSourceOfDivergence(V);
}
@@ -1375,8 +1688,24 @@
return Impl.isAlwaysUniform(V);
}
- unsigned getFlatAddressSpace() override {
- return Impl.getFlatAddressSpace();
+ unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }
+
+ bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
+ Intrinsic::ID IID) const override {
+ return Impl.collectFlatAddressOperands(OpIndexes, IID);
+ }
+
+ bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {
+ return Impl.isNoopAddrSpaceCast(FromAS, ToAS);
+ }
+
+ unsigned getAssumedAddrSpace(const Value *V) const override {
+ return Impl.getAssumedAddrSpace(V);
+ }
+
+ Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
+ Value *NewV) const override {
+ return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);
}
bool isLoweredToCall(const Function *F) override {
@@ -1386,12 +1715,44 @@
UnrollingPreferences &UP) override {
return Impl.getUnrollingPreferences(L, SE, UP);
}
+ void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
+ PeelingPreferences &PP) override {
+ return Impl.getPeelingPreferences(L, SE, PP);
+ }
bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
- AssumptionCache &AC,
- TargetLibraryInfo *LibInfo,
+ AssumptionCache &AC, TargetLibraryInfo *LibInfo,
HardwareLoopInfo &HWLoopInfo) override {
return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
}
+ bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
+ AssumptionCache &AC, TargetLibraryInfo *TLI,
+ DominatorTree *DT,
+ const LoopAccessInfo *LAI) override {
+ return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
+ }
+ bool emitGetActiveLaneMask() override {
+ return Impl.emitGetActiveLaneMask();
+ }
+ Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
+ IntrinsicInst &II) override {
+ return Impl.instCombineIntrinsic(IC, II);
+ }
+ Optional<Value *>
+ simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,
+ APInt DemandedMask, KnownBits &Known,
+ bool &KnownBitsComputed) override {
+ return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
+ KnownBitsComputed);
+ }
+ Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
+ InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
+ APInt &UndefElts2, APInt &UndefElts3,
+ std::function<void(Instruction *, unsigned, APInt, APInt &)>
+ SimplifyAndSetOp) override {
+ return Impl.simplifyDemandedVectorEltsIntrinsic(
+ IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
+ SimplifyAndSetOp);
+ }
bool isLegalAddImmediate(int64_t Imm) override {
return Impl.isLegalAddImmediate(Imm);
}
@@ -1399,48 +1760,48 @@
return Impl.isLegalICmpImmediate(Imm);
}
bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
- bool HasBaseReg, int64_t Scale,
- unsigned AddrSpace,
+ bool HasBaseReg, int64_t Scale, unsigned AddrSpace,
Instruction *I) override {
- return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
- Scale, AddrSpace, I);
+ return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
+ AddrSpace, I);
}
bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
TargetTransformInfo::LSRCost &C2) override {
return Impl.isLSRCostLess(C1, C2);
}
- bool canMacroFuseCmp() override {
- return Impl.canMacroFuseCmp();
+ bool isNumRegsMajorCostOfLSR() override {
+ return Impl.isNumRegsMajorCostOfLSR();
}
- bool canSaveCmp(Loop *L, BranchInst **BI,
- ScalarEvolution *SE,
- LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,
- TargetLibraryInfo *LibInfo) override {
+ bool isProfitableLSRChainElement(Instruction *I) override {
+ return Impl.isProfitableLSRChainElement(I);
+ }
+ bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }
+ bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
+ DominatorTree *DT, AssumptionCache *AC,
+ TargetLibraryInfo *LibInfo) override {
return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);
}
- bool shouldFavorPostInc() const override {
- return Impl.shouldFavorPostInc();
- }
+ bool shouldFavorPostInc() const override { return Impl.shouldFavorPostInc(); }
bool shouldFavorBackedgeIndex(const Loop *L) const override {
return Impl.shouldFavorBackedgeIndex(L);
}
- bool isLegalMaskedStore(Type *DataType) override {
- return Impl.isLegalMaskedStore(DataType);
+ bool isLegalMaskedStore(Type *DataType, Align Alignment) override {
+ return Impl.isLegalMaskedStore(DataType, Alignment);
}
- bool isLegalMaskedLoad(Type *DataType) override {
- return Impl.isLegalMaskedLoad(DataType);
+ bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {
+ return Impl.isLegalMaskedLoad(DataType, Alignment);
}
- bool isLegalNTStore(Type *DataType, unsigned Alignment) override {
+ bool isLegalNTStore(Type *DataType, Align Alignment) override {
return Impl.isLegalNTStore(DataType, Alignment);
}
- bool isLegalNTLoad(Type *DataType, unsigned Alignment) override {
+ bool isLegalNTLoad(Type *DataType, Align Alignment) override {
return Impl.isLegalNTLoad(DataType, Alignment);
}
- bool isLegalMaskedScatter(Type *DataType) override {
- return Impl.isLegalMaskedScatter(DataType);
+ bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {
+ return Impl.isLegalMaskedScatter(DataType, Alignment);
}
- bool isLegalMaskedGather(Type *DataType) override {
- return Impl.isLegalMaskedGather(DataType);
+ bool isLegalMaskedGather(Type *DataType, Align Alignment) override {
+ return Impl.isLegalMaskedGather(DataType, Alignment);
}
bool isLegalMaskedCompressStore(Type *DataType) override {
return Impl.isLegalMaskedCompressStore(DataType);
@@ -1460,12 +1821,10 @@
int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
bool HasBaseReg, int64_t Scale,
unsigned AddrSpace) override {
- return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
- Scale, AddrSpace);
+ return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,
+ AddrSpace);
}
- bool LSRWithInstrQueries() override {
- return Impl.LSRWithInstrQueries();
- }
+ bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }
bool isTruncateFree(Type *Ty1, Type *Ty2) override {
return Impl.isTruncateFree(Ty1, Ty2);
}
@@ -1474,8 +1833,9 @@
}
bool useAA() override { return Impl.useAA(); }
bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
- unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
- unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
+ unsigned getRegUsageForType(Type *Ty) override {
+ return Impl.getRegUsageForType(Ty);
+ }
bool shouldBuildLookupTables() override {
return Impl.shouldBuildLookupTables();
}
@@ -1486,9 +1846,9 @@
return Impl.useColdCCForColdCall(F);
}
- unsigned getScalarizationOverhead(Type *Ty, bool Insert,
- bool Extract) override {
- return Impl.getScalarizationOverhead(Ty, Insert, Extract);
+ unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,
+ bool Insert, bool Extract) override {
+ return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
}
unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
unsigned VF) override {
@@ -1515,9 +1875,9 @@
bool isFPVectorizationPotentiallyUnsafe() override {
return Impl.isFPVectorizationPotentiallyUnsafe();
}
- bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
- unsigned BitWidth, unsigned AddressSpace,
- unsigned Alignment, bool *Fast) override {
+ bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
+ unsigned AddressSpace, unsigned Alignment,
+ bool *Fast) override {
return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
Alignment, Fast);
}
@@ -1536,19 +1896,28 @@
Type *Ty) override {
return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
}
- int getIntImmCost(const APInt &Imm, Type *Ty) override {
- return Impl.getIntImmCost(Imm, Ty);
+ int getIntImmCost(const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind) override {
+ return Impl.getIntImmCost(Imm, Ty, CostKind);
}
- int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
- Type *Ty) override {
- return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
+ int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,
+ TargetCostKind CostKind,
+ Instruction *Inst = nullptr) override {
+ return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);
}
- int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
- Type *Ty) override {
- return Impl.getIntImmCost(IID, Idx, Imm, Ty);
+ int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
+ Type *Ty, TargetCostKind CostKind) override {
+ return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);
}
- unsigned getNumberOfRegisters(bool Vector) override {
- return Impl.getNumberOfRegisters(Vector);
+ unsigned getNumberOfRegisters(unsigned ClassID) const override {
+ return Impl.getNumberOfRegisters(ClassID);
+ }
+ unsigned getRegisterClassForType(bool Vector,
+ Type *Ty = nullptr) const override {
+ return Impl.getRegisterClassForType(Vector, Ty);
+ }
+ const char *getRegisterClassName(unsigned ClassID) const override {
+ return Impl.getRegisterClassName(ClassID);
}
unsigned getRegisterBitWidth(bool Vector) const override {
return Impl.getRegisterBitWidth(Vector);
@@ -1556,113 +1925,154 @@
unsigned getMinVectorRegisterBitWidth() override {
return Impl.getMinVectorRegisterBitWidth();
}
+ Optional<unsigned> getMaxVScale() const override {
+ return Impl.getMaxVScale();
+ }
bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
return Impl.shouldMaximizeVectorBandwidth(OptSize);
}
unsigned getMinimumVF(unsigned ElemWidth) const override {
return Impl.getMinimumVF(ElemWidth);
}
+ unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {
+ return Impl.getMaximumVF(ElemWidth, Opcode);
+ }
bool shouldConsiderAddressTypePromotion(
const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
return Impl.shouldConsiderAddressTypePromotion(
I, AllowPromotionWithoutCommonHeader);
}
- unsigned getCacheLineSize() override {
- return Impl.getCacheLineSize();
- }
- llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
+ unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }
+ Optional<unsigned> getCacheSize(CacheLevel Level) const override {
return Impl.getCacheSize(Level);
}
- llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
+ Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {
return Impl.getCacheAssociativity(Level);
}
- unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
- unsigned getMinPrefetchStride() override {
- return Impl.getMinPrefetchStride();
+
+ /// Return the preferred prefetch distance in terms of instructions.
+ ///
+ unsigned getPrefetchDistance() const override {
+ return Impl.getPrefetchDistance();
}
- unsigned getMaxPrefetchIterationsAhead() override {
+
+ /// Return the minimum stride necessary to trigger software
+ /// prefetching.
+ ///
+ unsigned getMinPrefetchStride(unsigned NumMemAccesses,
+ unsigned NumStridedMemAccesses,
+ unsigned NumPrefetches,
+ bool HasCall) const override {
+ return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
+ NumPrefetches, HasCall);
+ }
+
+ /// Return the maximum prefetch distance in terms of loop
+ /// iterations.
+ ///
+ unsigned getMaxPrefetchIterationsAhead() const override {
return Impl.getMaxPrefetchIterationsAhead();
}
+
+ /// \return True if prefetching should also be done for writes.
+ bool enableWritePrefetching() const override {
+ return Impl.enableWritePrefetching();
+ }
+
unsigned getMaxInterleaveFactor(unsigned VF) override {
return Impl.getMaxInterleaveFactor(VF);
}
unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
- unsigned &JTSize) override {
- return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
+ unsigned &JTSize,
+ ProfileSummaryInfo *PSI,
+ BlockFrequencyInfo *BFI) override {
+ return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);
}
- unsigned
- getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
- OperandValueKind Opd2Info,
- OperandValueProperties Opd1PropInfo,
- OperandValueProperties Opd2PropInfo,
- ArrayRef<const Value *> Args) override {
- return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
- Opd1PropInfo, Opd2PropInfo, Args);
+ unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
+ TTI::TargetCostKind CostKind,
+ OperandValueKind Opd1Info,
+ OperandValueKind Opd2Info,
+ OperandValueProperties Opd1PropInfo,
+ OperandValueProperties Opd2PropInfo,
+ ArrayRef<const Value *> Args,
+ const Instruction *CxtI = nullptr) override {
+ return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,
+ Opd1PropInfo, Opd2PropInfo, Args, CxtI);
}
- int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
- Type *SubTp) override {
+ int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,
+ VectorType *SubTp) override {
return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
}
int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
+ CastContextHint CCH, TTI::TargetCostKind CostKind,
const Instruction *I) override {
- return Impl.getCastInstrCost(Opcode, Dst, Src, I);
+ return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);
}
int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
unsigned Index) override {
return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
}
- int getCFInstrCost(unsigned Opcode) override {
- return Impl.getCFInstrCost(Opcode);
+ int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) override {
+ return Impl.getCFInstrCost(Opcode, CostKind);
}
int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
+ CmpInst::Predicate VecPred,
+ TTI::TargetCostKind CostKind,
const Instruction *I) override {
- return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
+ return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);
}
int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
return Impl.getVectorInstrCost(Opcode, Val, Index);
}
- int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace, const Instruction *I) override {
- return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
+ int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace, TTI::TargetCostKind CostKind,
+ const Instruction *I) override {
+ return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind, I);
}
- int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
- unsigned AddressSpace) override {
- return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
+ int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind) override {
+ return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,
+ CostKind);
}
- int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
- Value *Ptr, bool VariableMask,
- unsigned Alignment) override {
+ int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,
+ bool VariableMask, Align Alignment,
+ TTI::TargetCostKind CostKind,
+ const Instruction *I = nullptr) override {
return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
- Alignment);
+ Alignment, CostKind, I);
}
int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
- ArrayRef<unsigned> Indices, unsigned Alignment,
- unsigned AddressSpace, bool UseMaskForCond,
+ ArrayRef<unsigned> Indices, Align Alignment,
+ unsigned AddressSpace,
+ TTI::TargetCostKind CostKind,
+ bool UseMaskForCond,
bool UseMaskForGaps) override {
return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
- Alignment, AddressSpace,
+ Alignment, AddressSpace, CostKind,
UseMaskForCond, UseMaskForGaps);
}
- int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
- bool IsPairwiseForm) override {
- return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
+ int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
+ bool IsPairwiseForm,
+ TTI::TargetCostKind CostKind) override {
+ return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm,
+ CostKind);
}
- int getMinMaxReductionCost(Type *Ty, Type *CondTy,
- bool IsPairwiseForm, bool IsUnsigned) override {
- return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
- }
- int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
- FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
- return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
- ScalarizationCostPassed);
+ int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
+ bool IsPairwiseForm, bool IsUnsigned,
+ TTI::TargetCostKind CostKind) override {
+ return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned,
+ CostKind);
}
- int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
- ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
- return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
+ int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
+ TTI::TargetCostKind CostKind) override {
+ return Impl.getIntrinsicInstrCost(ICA, CostKind);
}
int getCallInstrCost(Function *F, Type *RetTy,
- ArrayRef<Type *> Tys) override {
- return Impl.getCallInstrCost(F, RetTy, Tys);
+ ArrayRef<Type *> Tys,
+ TTI::TargetCostKind CostKind) override {
+ return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);
}
unsigned getNumberOfParts(Type *Tp) override {
return Impl.getNumberOfParts(Tp);
@@ -1686,16 +2096,18 @@
return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
}
Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
+ unsigned SrcAddrSpace, unsigned DestAddrSpace,
unsigned SrcAlign,
unsigned DestAlign) const override {
- return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
+ return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,
+ DestAddrSpace, SrcAlign, DestAlign);
}
- void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
- LLVMContext &Context,
- unsigned RemainingBytes,
- unsigned SrcAlign,
- unsigned DestAlign) const override {
+ void getMemcpyLoopResidualLoweringType(
+ SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
+ unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,
+ unsigned SrcAlign, unsigned DestAlign) const override {
Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
+ SrcAddrSpace, DestAddrSpace,
SrcAlign, DestAlign);
}
bool areInlineCompatible(const Function *Caller,
@@ -1722,14 +2134,12 @@
bool isLegalToVectorizeStore(StoreInst *SI) const override {
return Impl.isLegalToVectorizeStore(SI);
}
- bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const override {
return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
AddrSpace);
}
- bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
- unsigned Alignment,
+ bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,
unsigned AddrSpace) const override {
return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
AddrSpace);
@@ -1748,6 +2158,14 @@
ReductionFlags Flags) const override {
return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
}
+ bool preferInLoopReduction(unsigned Opcode, Type *Ty,
+ ReductionFlags Flags) const override {
+ return Impl.preferInLoopReduction(Opcode, Ty, Flags);
+ }
+ bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,
+ ReductionFlags Flags) const override {
+ return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);
+ }
bool shouldExpandReduction(const IntrinsicInst *II) const override {
return Impl.shouldExpandReduction(II);
}
@@ -1756,6 +2174,14 @@
return Impl.getGISelRematGlobalCost();
}
+ bool supportsScalableVectors() const override {
+ return Impl.supportsScalableVectors();
+ }
+
+ bool hasActiveVectorLength() const override {
+ return Impl.hasActiveVectorLength();
+ }
+
int getInstructionLatency(const Instruction *I) override {
return Impl.getInstructionLatency(I);
}
@@ -1858,6 +2284,6 @@
/// clients.
ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
-} // End llvm namespace
+} // namespace llvm
#endif