Blame - linux-x64/clang/include/llvm/Analysis/TargetTransformInfoImpl.h - hafnium/prebuilts

blob: df4f853acf9da235ef1f8c1e8275e0c598655db6 [file] [log] [blame]

Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame^]	1	//===- TargetTransformInfoImpl.h --------------------------------- C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	/// \file
				10	/// This file provides helpers for the implementation of
				11	/// a TargetTransformInfo-conforming class.
				12	///
				13	//===----------------------------------------------------------------------===//
				14
				15	#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
				16	#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
				17
				18	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				19	#include "llvm/Analysis/TargetTransformInfo.h"
				20	#include "llvm/Analysis/VectorUtils.h"
				21	#include "llvm/IR/CallSite.h"
				22	#include "llvm/IR/DataLayout.h"
				23	#include "llvm/IR/Function.h"
				24	#include "llvm/IR/GetElementPtrTypeIterator.h"
				25	#include "llvm/IR/Operator.h"
				26	#include "llvm/IR/Type.h"
				27
				28	namespace llvm {
				29
				30	/// \brief Base class for use as a mix-in that aids implementing
				31	/// a TargetTransformInfo-compatible class.
				32	class TargetTransformInfoImplBase {
				33	protected:
				34	typedef TargetTransformInfo TTI;
				35
				36	const DataLayout &DL;
				37
				38	explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
				39
				40	public:
				41	// Provide value semantics. MSVC requires that we spell all of these out.
				42	TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
				43	: DL(Arg.DL) {}
				44	TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
				45
				46	const DataLayout &getDataLayout() const { return DL; }
				47
				48	unsigned getOperationCost(unsigned Opcode, Type Ty, Type OpTy) {
				49	switch (Opcode) {
				50	default:
				51	// By default, just classify everything as 'basic'.
				52	return TTI::TCC_Basic;
				53
				54	case Instruction::GetElementPtr:
				55	llvm_unreachable("Use getGEPCost for GEP operations!");
				56
				57	case Instruction::BitCast:
				58	assert(OpTy && "Cast instructions must provide the operand type");
				59	if (Ty == OpTy \|\| (Ty->isPointerTy() && OpTy->isPointerTy()))
				60	// Identity and pointer-to-pointer casts are free.
				61	return TTI::TCC_Free;
				62
				63	// Otherwise, the default basic cost is used.
				64	return TTI::TCC_Basic;
				65
				66	case Instruction::FDiv:
				67	case Instruction::FRem:
				68	case Instruction::SDiv:
				69	case Instruction::SRem:
				70	case Instruction::UDiv:
				71	case Instruction::URem:
				72	return TTI::TCC_Expensive;
				73
				74	case Instruction::IntToPtr: {
				75	// An inttoptr cast is free so long as the input is a legal integer type
				76	// which doesn't contain values outside the range of a pointer.
				77	unsigned OpSize = OpTy->getScalarSizeInBits();
				78	if (DL.isLegalInteger(OpSize) &&
				79	OpSize <= DL.getPointerTypeSizeInBits(Ty))
				80	return TTI::TCC_Free;
				81
				82	// Otherwise it's not a no-op.
				83	return TTI::TCC_Basic;
				84	}
				85	case Instruction::PtrToInt: {
				86	// A ptrtoint cast is free so long as the result is large enough to store
				87	// the pointer, and a legal integer type.
				88	unsigned DestSize = Ty->getScalarSizeInBits();
				89	if (DL.isLegalInteger(DestSize) &&
				90	DestSize >= DL.getPointerTypeSizeInBits(OpTy))
				91	return TTI::TCC_Free;
				92
				93	// Otherwise it's not a no-op.
				94	return TTI::TCC_Basic;
				95	}
				96	case Instruction::Trunc:
				97	// trunc to a native type is free (assuming the target has compare and
				98	// shift-right of the same width).
				99	if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty)))
				100	return TTI::TCC_Free;
				101
				102	return TTI::TCC_Basic;
				103	}
				104	}
				105
				106	int getGEPCost(Type PointeeType, const Value Ptr,
				107	ArrayRef<const Value *> Operands) {
				108	// In the basic model, we just assume that all-constant GEPs will be folded
				109	// into their uses via addressing modes.
				110	for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
				111	if (!isa<Constant>(Operands[Idx]))
				112	return TTI::TCC_Basic;
				113
				114	return TTI::TCC_Free;
				115	}
				116
				117	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
				118	unsigned &JTSize) {
				119	JTSize = 0;
				120	return SI.getNumCases();
				121	}
				122
				123	int getExtCost(const Instruction I, const Value Src) {
				124	return TTI::TCC_Basic;
				125	}
				126
				127	unsigned getCallCost(FunctionType *FTy, int NumArgs) {
				128	assert(FTy && "FunctionType must be provided to this routine.");
				129
				130	// The target-independent implementation just measures the size of the
				131	// function by approximating that each argument will take on average one
				132	// instruction to prepare.
				133
				134	if (NumArgs < 0)
				135	// Set the argument number to the number of explicit arguments in the
				136	// function.
				137	NumArgs = FTy->getNumParams();
				138
				139	return TTI::TCC_Basic * (NumArgs + 1);
				140	}
				141
				142	unsigned getInliningThresholdMultiplier() { return 1; }
				143
				144	unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
				145	ArrayRef<Type *> ParamTys) {
				146	switch (IID) {
				147	default:
				148	// Intrinsics rarely (if ever) have normal argument setup constraints.
				149	// Model them as having a basic instruction cost.
				150	// FIXME: This is wrong for libc intrinsics.
				151	return TTI::TCC_Basic;
				152
				153	case Intrinsic::annotation:
				154	case Intrinsic::assume:
				155	case Intrinsic::sideeffect:
				156	case Intrinsic::dbg_declare:
				157	case Intrinsic::dbg_value:
				158	case Intrinsic::invariant_start:
				159	case Intrinsic::invariant_end:
				160	case Intrinsic::lifetime_start:
				161	case Intrinsic::lifetime_end:
				162	case Intrinsic::objectsize:
				163	case Intrinsic::ptr_annotation:
				164	case Intrinsic::var_annotation:
				165	case Intrinsic::experimental_gc_result:
				166	case Intrinsic::experimental_gc_relocate:
				167	case Intrinsic::coro_alloc:
				168	case Intrinsic::coro_begin:
				169	case Intrinsic::coro_free:
				170	case Intrinsic::coro_end:
				171	case Intrinsic::coro_frame:
				172	case Intrinsic::coro_size:
				173	case Intrinsic::coro_suspend:
				174	case Intrinsic::coro_param:
				175	case Intrinsic::coro_subfn_addr:
				176	// These intrinsics don't actually represent code after lowering.
				177	return TTI::TCC_Free;
				178	}
				179	}
				180
				181	bool hasBranchDivergence() { return false; }
				182
				183	bool isSourceOfDivergence(const Value *V) { return false; }
				184
				185	bool isAlwaysUniform(const Value *V) { return false; }
				186
				187	unsigned getFlatAddressSpace () {
				188	return -1;
				189	}
				190
				191	bool isLoweredToCall(const Function *F) {
				192	assert(F && "A concrete function must be provided to this routine.");
				193
				194	// FIXME: These should almost certainly not be handled here, and instead
				195	// handled with the help of TLI or the target itself. This was largely
				196	// ported from existing analysis heuristics here so that such refactorings
				197	// can take place in the future.
				198
				199	if (F->isIntrinsic())
				200	return false;
				201
				202	if (F->hasLocalLinkage() \|\| !F->hasName())
				203	return true;
				204
				205	StringRef Name = F->getName();
				206
				207	// These will all likely lower to a single selection DAG node.
				208	if (Name == "copysign" \|\| Name == "copysignf" \|\| Name == "copysignl" \|\|
				209	Name == "fabs" \|\| Name == "fabsf" \|\| Name == "fabsl" \|\| Name == "sin" \|\|
				210	Name == "fmin" \|\| Name == "fminf" \|\| Name == "fminl" \|\|
				211	Name == "fmax" \|\| Name == "fmaxf" \|\| Name == "fmaxl" \|\|
				212	Name == "sinf" \|\| Name == "sinl" \|\| Name == "cos" \|\| Name == "cosf" \|\|
				213	Name == "cosl" \|\| Name == "sqrt" \|\| Name == "sqrtf" \|\| Name == "sqrtl")
				214	return false;
				215
				216	// These are all likely to be optimized into something smaller.
				217	if (Name == "pow" \|\| Name == "powf" \|\| Name == "powl" \|\| Name == "exp2" \|\|
				218	Name == "exp2l" \|\| Name == "exp2f" \|\| Name == "floor" \|\|
				219	Name == "floorf" \|\| Name == "ceil" \|\| Name == "round" \|\|
				220	Name == "ffs" \|\| Name == "ffsl" \|\| Name == "abs" \|\| Name == "labs" \|\|
				221	Name == "llabs")
				222	return false;
				223
				224	return true;
				225	}
				226
				227	void getUnrollingPreferences(Loop *, ScalarEvolution &,
				228	TTI::UnrollingPreferences &) {}
				229
				230	bool isLegalAddImmediate(int64_t Imm) { return false; }
				231
				232	bool isLegalICmpImmediate(int64_t Imm) { return false; }
				233
				234	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
				235	bool HasBaseReg, int64_t Scale,
				236	unsigned AddrSpace, Instruction *I = nullptr) {
				237	// Guess that only reg and reg+reg addressing is allowed. This heuristic is
				238	// taken from the implementation of LSR.
				239	return !BaseGV && BaseOffset == 0 && (Scale == 0 \|\| Scale == 1);
				240	}
				241
				242	bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
				243	return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
				244	C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
				245	std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
				246	C2.ScaleCost, C2.ImmCost, C2.SetupCost);
				247	}
				248
				249	bool canMacroFuseCmp() { return false; }
				250
				251	bool shouldFavorPostInc() const { return false; }
				252
				253	bool isLegalMaskedStore(Type *DataType) { return false; }
				254
				255	bool isLegalMaskedLoad(Type *DataType) { return false; }
				256
				257	bool isLegalMaskedScatter(Type *DataType) { return false; }
				258
				259	bool isLegalMaskedGather(Type *DataType) { return false; }
				260
				261	bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
				262
				263	bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
				264
				265	bool prefersVectorizedAddressing() { return true; }
				266
				267	int getScalingFactorCost(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
				268	bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
				269	// Guess that all legal addressing mode are free.
				270	if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
				271	Scale, AddrSpace))
				272	return 0;
				273	return -1;
				274	}
				275
				276	bool LSRWithInstrQueries() { return false; }
				277
				278	bool isTruncateFree(Type Ty1, Type Ty2) { return false; }
				279
				280	bool isProfitableToHoist(Instruction *I) { return true; }
				281
				282	bool useAA() { return false; }
				283
				284	bool isTypeLegal(Type *Ty) { return false; }
				285
				286	unsigned getJumpBufAlignment() { return 0; }
				287
				288	unsigned getJumpBufSize() { return 0; }
				289
				290	bool shouldBuildLookupTables() { return true; }
				291	bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
				292
				293	bool useColdCCForColdCall(Function &F) { return false; }
				294
				295	unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
				296	return 0;
				297	}
				298
				299	unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
				300	unsigned VF) { return 0; }
				301
				302	bool supportsEfficientVectorElementLoadStore() { return false; }
				303
				304	bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
				305
				306	const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
				307	bool IsZeroCmp) const {
				308	return nullptr;
				309	}
				310
				311	bool enableInterleavedAccessVectorization() { return false; }
				312
				313	bool isFPVectorizationPotentiallyUnsafe() { return false; }
				314
				315	bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
				316	unsigned BitWidth,
				317	unsigned AddressSpace,
				318	unsigned Alignment,
				319	bool *Fast) { return false; }
				320
				321	TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
				322	return TTI::PSK_Software;
				323	}
				324
				325	bool haveFastSqrt(Type *Ty) { return false; }
				326
				327	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
				328
				329	unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
				330
				331	int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
				332	Type *Ty) {
				333	return 0;
				334	}
				335
				336	unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
				337
				338	unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
				339	Type *Ty) {
				340	return TTI::TCC_Free;
				341	}
				342
				343	unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
				344	Type *Ty) {
				345	return TTI::TCC_Free;
				346	}
				347
				348	unsigned getNumberOfRegisters(bool Vector) { return 8; }
				349
				350	unsigned getRegisterBitWidth(bool Vector) const { return 32; }
				351
				352	unsigned getMinVectorRegisterBitWidth() { return 128; }
				353
				354	bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
				355
				356	bool
				357	shouldConsiderAddressTypePromotion(const Instruction &I,
				358	bool &AllowPromotionWithoutCommonHeader) {
				359	AllowPromotionWithoutCommonHeader = false;
				360	return false;
				361	}
				362
				363	unsigned getCacheLineSize() { return 0; }
				364
				365	llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) {
				366	switch (Level) {
				367	case TargetTransformInfo::CacheLevel::L1D:
				368	LLVM_FALLTHROUGH;
				369	case TargetTransformInfo::CacheLevel::L2D:
				370	return llvm::Optional<unsigned>();
				371	}
				372
				373	llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
				374	}
				375
				376	llvm::Optional<unsigned> getCacheAssociativity(
				377	TargetTransformInfo::CacheLevel Level) {
				378	switch (Level) {
				379	case TargetTransformInfo::CacheLevel::L1D:
				380	LLVM_FALLTHROUGH;
				381	case TargetTransformInfo::CacheLevel::L2D:
				382	return llvm::Optional<unsigned>();
				383	}
				384
				385	llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
				386	}
				387
				388	unsigned getPrefetchDistance() { return 0; }
				389
				390	unsigned getMinPrefetchStride() { return 1; }
				391
				392	unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
				393
				394	unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
				395
				396	unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
				397	TTI::OperandValueKind Opd1Info,
				398	TTI::OperandValueKind Opd2Info,
				399	TTI::OperandValueProperties Opd1PropInfo,
				400	TTI::OperandValueProperties Opd2PropInfo,
				401	ArrayRef<const Value *> Args) {
				402	return 1;
				403	}
				404
				405	unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index,
				406	Type *SubTp) {
				407	return 1;
				408	}
				409
				410	unsigned getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
				411	const Instruction *I) { return 1; }
				412
				413	unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
				414	VectorType *VecTy, unsigned Index) {
				415	return 1;
				416	}
				417
				418	unsigned getCFInstrCost(unsigned Opcode) { return 1; }
				419
				420	unsigned getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
				421	const Instruction *I) {
				422	return 1;
				423	}
				424
				425	unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
				426	return 1;
				427	}
				428
				429	unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				430	unsigned AddressSpace, const Instruction *I) {
				431	return 1;
				432	}
				433
				434	unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				435	unsigned AddressSpace) {
				436	return 1;
				437	}
				438
				439	unsigned getGatherScatterOpCost(unsigned Opcode, Type DataTy, Value Ptr,
				440	bool VariableMask,
				441	unsigned Alignment) {
				442	return 1;
				443	}
				444
				445	unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
				446	unsigned Factor,
				447	ArrayRef<unsigned> Indices,
				448	unsigned Alignment,
				449	unsigned AddressSpace) {
				450	return 1;
				451	}
				452
				453	unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
				454	ArrayRef<Type *> Tys, FastMathFlags FMF,
				455	unsigned ScalarizationCostPassed) {
				456	return 1;
				457	}
				458	unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
				459	ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
				460	return 1;
				461	}
				462
				463	unsigned getCallInstrCost(Function F, Type RetTy, ArrayRef<Type *> Tys) {
				464	return 1;
				465	}
				466
				467	unsigned getNumberOfParts(Type *Tp) { return 0; }
				468
				469	unsigned getAddressComputationCost(Type Tp, ScalarEvolution ,
				470	const SCEV *) {
				471	return 0;
				472	}
				473
				474	unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; }
				475
				476	unsigned getMinMaxReductionCost(Type , Type , bool, bool) { return 1; }
				477
				478	unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
				479
				480	bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) {
				481	return false;
				482	}
				483
				484	unsigned getAtomicMemIntrinsicMaxElementSize() const {
				485	// Note for overrides: You must ensure for all element unordered-atomic
				486	// memory intrinsics that all power-of-2 element sizes up to, and
				487	// including, the return value of this method have a corresponding
				488	// runtime lib call. These runtime lib call definitions can be found
				489	// in RuntimeLibcalls.h
				490	return 0;
				491	}
				492
				493	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
				494	Type *ExpectedType) {
				495	return nullptr;
				496	}
				497
				498	Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
				499	unsigned SrcAlign, unsigned DestAlign) const {
				500	return Type::getInt8Ty(Context);
				501	}
				502
				503	void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
				504	LLVMContext &Context,
				505	unsigned RemainingBytes,
				506	unsigned SrcAlign,
				507	unsigned DestAlign) const {
				508	for (unsigned i = 0; i != RemainingBytes; ++i)
				509	OpsOut.push_back(Type::getInt8Ty(Context));
				510	}
				511
				512	bool areInlineCompatible(const Function *Caller,
				513	const Function *Callee) const {
				514	return (Caller->getFnAttribute("target-cpu") ==
				515	Callee->getFnAttribute("target-cpu")) &&
				516	(Caller->getFnAttribute("target-features") ==
				517	Callee->getFnAttribute("target-features"));
				518	}
				519
				520	bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
				521	const DataLayout &DL) const {
				522	return false;
				523	}
				524
				525	bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
				526	const DataLayout &DL) const {
				527	return false;
				528	}
				529
				530	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
				531
				532	bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
				533
				534	bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
				535
				536	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
				537	unsigned Alignment,
				538	unsigned AddrSpace) const {
				539	return true;
				540	}
				541
				542	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
				543	unsigned Alignment,
				544	unsigned AddrSpace) const {
				545	return true;
				546	}
				547
				548	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
				549	unsigned ChainSizeInBytes,
				550	VectorType *VecTy) const {
				551	return VF;
				552	}
				553
				554	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
				555	unsigned ChainSizeInBytes,
				556	VectorType *VecTy) const {
				557	return VF;
				558	}
				559
				560	bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
				561	TTI::ReductionFlags Flags) const {
				562	return false;
				563	}
				564
				565	bool shouldExpandReduction(const IntrinsicInst *II) const {
				566	return true;
				567	}
				568
				569	protected:
				570	// Obtain the minimum required size to hold the value (without the sign)
				571	// In case of a vector it returns the min required size for one element.
				572	unsigned minRequiredElementSize(const Value* Val, bool &isSigned) {
				573	if (isa<ConstantDataVector>(Val) \|\| isa<ConstantVector>(Val)) {
				574	const auto* VectorValue = cast<Constant>(Val);
				575
				576	// In case of a vector need to pick the max between the min
				577	// required size for each element
				578	auto *VT = cast<VectorType>(Val->getType());
				579
				580	// Assume unsigned elements
				581	isSigned = false;
				582
				583	// The max required size is the total vector width divided by num
				584	// of elements in the vector
				585	unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements();
				586
				587	unsigned MinRequiredSize = 0;
				588	for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
				589	if (auto* IntElement =
				590	dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
				591	bool signedElement = IntElement->getValue().isNegative();
				592	// Get the element min required size.
				593	unsigned ElementMinRequiredSize =
				594	IntElement->getValue().getMinSignedBits() - 1;
				595	// In case one element is signed then all the vector is signed.
				596	isSigned \|= signedElement;
				597	// Save the max required bit size between all the elements.
				598	MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
				599	}
				600	else {
				601	// not an int constant element
				602	return MaxRequiredSize;
				603	}
				604	}
				605	return MinRequiredSize;
				606	}
				607
				608	if (const auto* CI = dyn_cast<ConstantInt>(Val)) {
				609	isSigned = CI->getValue().isNegative();
				610	return CI->getValue().getMinSignedBits() - 1;
				611	}
				612
				613	if (const auto* Cast = dyn_cast<SExtInst>(Val)) {
				614	isSigned = true;
				615	return Cast->getSrcTy()->getScalarSizeInBits() - 1;
				616	}
				617
				618	if (const auto* Cast = dyn_cast<ZExtInst>(Val)) {
				619	isSigned = false;
				620	return Cast->getSrcTy()->getScalarSizeInBits();
				621	}
				622
				623	isSigned = false;
				624	return Val->getType()->getScalarSizeInBits();
				625	}
				626
				627	bool isStridedAccess(const SCEV *Ptr) {
				628	return Ptr && isa<SCEVAddRecExpr>(Ptr);
				629	}
				630
				631	const SCEVConstant getConstantStrideStep(ScalarEvolution SE,
				632	const SCEV *Ptr) {
				633	if (!isStridedAccess(Ptr))
				634	return nullptr;
				635	const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
				636	return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
				637	}
				638
				639	bool isConstantStridedAccessLessThan(ScalarEvolution SE, const SCEV Ptr,
				640	int64_t MergeDistance) {
				641	const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
				642	if (!Step)
				643	return false;
				644	APInt StrideVal = Step->getAPInt();
				645	if (StrideVal.getBitWidth() > 64)
				646	return false;
				647	// FIXME: Need to take absolute value for negative stride case.
				648	return StrideVal.getSExtValue() < MergeDistance;
				649	}
				650	};
				651
				652	/// \brief CRTP base class for use as a mix-in that aids implementing
				653	/// a TargetTransformInfo-compatible class.
				654	template <typename T>
				655	class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
				656	private:
				657	typedef TargetTransformInfoImplBase BaseT;
				658
				659	protected:
				660	explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
				661
				662	public:
				663	using BaseT::getCallCost;
				664
				665	unsigned getCallCost(const Function *F, int NumArgs) {
				666	assert(F && "A concrete function must be provided to this routine.");
				667
				668	if (NumArgs < 0)
				669	// Set the argument number to the number of explicit arguments in the
				670	// function.
				671	NumArgs = F->arg_size();
				672
				673	if (Intrinsic::ID IID = F->getIntrinsicID()) {
				674	FunctionType *FTy = F->getFunctionType();
				675	SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
				676	return static_cast<T *>(this)
				677	->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys);
				678	}
				679
				680	if (!static_cast<T *>(this)->isLoweredToCall(F))
				681	return TTI::TCC_Basic; // Give a basic cost if it will be lowered
				682	// directly.
				683
				684	return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs);
				685	}
				686
				687	unsigned getCallCost(const Function F, ArrayRef<const Value > Arguments) {
				688	// Simply delegate to generic handling of the call.
				689	// FIXME: We should use instsimplify or something else to catch calls which
				690	// will constant fold with these arguments.
				691	return static_cast<T *>(this)->getCallCost(F, Arguments.size());
				692	}
				693
				694	using BaseT::getGEPCost;
				695
				696	int getGEPCost(Type PointeeType, const Value Ptr,
				697	ArrayRef<const Value *> Operands) {
				698	const GlobalValue *BaseGV = nullptr;
				699	if (Ptr != nullptr) {
				700	// TODO: will remove this when pointers have an opaque type.
				701	assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
				702	PointeeType &&
				703	"explicit pointee type doesn't match operand's pointee type");
				704	BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
				705	}
				706	bool HasBaseReg = (BaseGV == nullptr);
				707
				708	auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
				709	APInt BaseOffset(PtrSizeBits, 0);
				710	int64_t Scale = 0;
				711
				712	auto GTI = gep_type_begin(PointeeType, Operands);
				713	Type *TargetType = nullptr;
				714
				715	// Handle the case where the GEP instruction has a single operand,
				716	// the basis, therefore TargetType is a nullptr.
				717	if (Operands.empty())
				718	return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
				719
				720	for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
				721	TargetType = GTI.getIndexedType();
				722	// We assume that the cost of Scalar GEP with constant index and the
				723	// cost of Vector GEP with splat constant index are the same.
				724	const ConstantInt ConstIdx = dyn_cast<ConstantInt>(I);
				725	if (!ConstIdx)
				726	if (auto Splat = getSplatValue(*I))
				727	ConstIdx = dyn_cast<ConstantInt>(Splat);
				728	if (StructType *STy = GTI.getStructTypeOrNull()) {
				729	// For structures the index is always splat or scalar constant
				730	assert(ConstIdx && "Unexpected GEP index");
				731	uint64_t Field = ConstIdx->getZExtValue();
				732	BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
				733	} else {
				734	int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
				735	if (ConstIdx) {
				736	BaseOffset +=
				737	ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
				738	} else {
				739	// Needs scale register.
				740	if (Scale != 0)
				741	// No addressing mode takes two scale registers.
				742	return TTI::TCC_Basic;
				743	Scale = ElementSize;
				744	}
				745	}
				746	}
				747
				748	// Assumes the address space is 0 when Ptr is nullptr.
				749	unsigned AS =
				750	(Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
				751
				752	if (static_cast<T *>(this)->isLegalAddressingMode(
				753	TargetType, const_cast<GlobalValue *>(BaseGV),
				754	BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS))
				755	return TTI::TCC_Free;
				756	return TTI::TCC_Basic;
				757	}
				758
				759	using BaseT::getIntrinsicCost;
				760
				761	unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
				762	ArrayRef<const Value *> Arguments) {
				763	// Delegate to the generic intrinsic handling code. This mostly provides an
				764	// opportunity for targets to (for example) special case the cost of
				765	// certain intrinsics based on constants used as arguments.
				766	SmallVector<Type *, 8> ParamTys;
				767	ParamTys.reserve(Arguments.size());
				768	for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
				769	ParamTys.push_back(Arguments[Idx]->getType());
				770	return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys);
				771	}
				772
				773	unsigned getUserCost(const User U, ArrayRef<const Value > Operands) {
				774	if (isa<PHINode>(U))
				775	return TTI::TCC_Free; // Model all PHI nodes as free.
				776
				777	// Static alloca doesn't generate target instructions.
				778	if (auto *A = dyn_cast<AllocaInst>(U))
				779	if (A->isStaticAlloca())
				780	return TTI::TCC_Free;
				781
				782	if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
				783	return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
				784	GEP->getPointerOperand(),
				785	Operands.drop_front());
				786	}
				787
				788	if (auto CS = ImmutableCallSite(U)) {
				789	const Function *F = CS.getCalledFunction();
				790	if (!F) {
				791	// Just use the called value type.
				792	Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
				793	return static_cast<T *>(this)
				794	->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
				795	}
				796
				797	SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
				798	return static_cast<T *>(this)->getCallCost(F, Arguments);
				799	}
				800
				801	if (const CastInst *CI = dyn_cast<CastInst>(U)) {
				802	// Result of a cmp instruction is often extended (to be used by other
				803	// cmp instructions, logical or return instructions). These are usually
				804	// nop on most sane targets.
				805	if (isa<CmpInst>(CI->getOperand(0)))
				806	return TTI::TCC_Free;
				807	if (isa<SExtInst>(CI) \|\| isa<ZExtInst>(CI) \|\| isa<FPExtInst>(CI))
				808	return static_cast<T *>(this)->getExtCost(CI, Operands.back());
				809	}
				810
				811	return static_cast<T *>(this)->getOperationCost(
				812	Operator::getOpcode(U), U->getType(),
				813	U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
				814	}
				815
				816	int getInstructionLatency(const Instruction *I) {
				817	SmallVector<const Value *, 4> Operands(I->value_op_begin(),
				818	I->value_op_end());
				819	if (getUserCost(I, Operands) == TTI::TCC_Free)
				820	return 0;
				821
				822	if (isa<LoadInst>(I))
				823	return 4;
				824
				825	Type *DstTy = I->getType();
				826
				827	// Usually an intrinsic is a simple instruction.
				828	// A real function call is much slower.
				829	if (auto *CI = dyn_cast<CallInst>(I)) {
				830	const Function *F = CI->getCalledFunction();
				831	if (!F \|\| static_cast<T *>(this)->isLoweredToCall(F))
				832	return 40;
				833	// Some intrinsics return a value and a flag, we use the value type
				834	// to decide its latency.
				835	if (StructType* StructTy = dyn_cast<StructType>(DstTy))
				836	DstTy = StructTy->getElementType(0);
				837	// Fall through to simple instructions.
				838	}
				839
				840	if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
				841	DstTy = VectorTy->getElementType();
				842	if (DstTy->isFloatingPointTy())
				843	return 3;
				844
				845	return 1;
				846	}
				847	};
				848	}
				849
				850	#endif