Blame - linux-x64/clang/include/llvm/Analysis/TargetTransformInfoImpl.h - hafnium/prebuilts

blob: d80ae1d6845d5eb7975d5bd54d74ac8ddffc5ec5 [file] [log] [blame]

Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	1	//===- TargetTransformInfoImpl.h --------------------------------- C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	/// \file
				10	/// This file provides helpers for the implementation of
				11	/// a TargetTransformInfo-conforming class.
				12	///
				13	//===----------------------------------------------------------------------===//
				14
				15	#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
				16	#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
				17
				18	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				19	#include "llvm/Analysis/TargetTransformInfo.h"
				20	#include "llvm/Analysis/VectorUtils.h"
				21	#include "llvm/IR/CallSite.h"
				22	#include "llvm/IR/DataLayout.h"
				23	#include "llvm/IR/Function.h"
				24	#include "llvm/IR/GetElementPtrTypeIterator.h"
				25	#include "llvm/IR/Operator.h"
				26	#include "llvm/IR/Type.h"
				27
				28	namespace llvm {
				29
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame^]	30	/// Base class for use as a mix-in that aids implementing
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	31	/// a TargetTransformInfo-compatible class.
				32	class TargetTransformInfoImplBase {
				33	protected:
				34	typedef TargetTransformInfo TTI;
				35
				36	const DataLayout &DL;
				37
				38	explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
				39
				40	public:
				41	// Provide value semantics. MSVC requires that we spell all of these out.
				42	TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
				43	: DL(Arg.DL) {}
				44	TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
				45
				46	const DataLayout &getDataLayout() const { return DL; }
				47
				48	unsigned getOperationCost(unsigned Opcode, Type Ty, Type OpTy) {
				49	switch (Opcode) {
				50	default:
				51	// By default, just classify everything as 'basic'.
				52	return TTI::TCC_Basic;
				53
				54	case Instruction::GetElementPtr:
				55	llvm_unreachable("Use getGEPCost for GEP operations!");
				56
				57	case Instruction::BitCast:
				58	assert(OpTy && "Cast instructions must provide the operand type");
				59	if (Ty == OpTy \|\| (Ty->isPointerTy() && OpTy->isPointerTy()))
				60	// Identity and pointer-to-pointer casts are free.
				61	return TTI::TCC_Free;
				62
				63	// Otherwise, the default basic cost is used.
				64	return TTI::TCC_Basic;
				65
				66	case Instruction::FDiv:
				67	case Instruction::FRem:
				68	case Instruction::SDiv:
				69	case Instruction::SRem:
				70	case Instruction::UDiv:
				71	case Instruction::URem:
				72	return TTI::TCC_Expensive;
				73
				74	case Instruction::IntToPtr: {
				75	// An inttoptr cast is free so long as the input is a legal integer type
				76	// which doesn't contain values outside the range of a pointer.
				77	unsigned OpSize = OpTy->getScalarSizeInBits();
				78	if (DL.isLegalInteger(OpSize) &&
				79	OpSize <= DL.getPointerTypeSizeInBits(Ty))
				80	return TTI::TCC_Free;
				81
				82	// Otherwise it's not a no-op.
				83	return TTI::TCC_Basic;
				84	}
				85	case Instruction::PtrToInt: {
				86	// A ptrtoint cast is free so long as the result is large enough to store
				87	// the pointer, and a legal integer type.
				88	unsigned DestSize = Ty->getScalarSizeInBits();
				89	if (DL.isLegalInteger(DestSize) &&
				90	DestSize >= DL.getPointerTypeSizeInBits(OpTy))
				91	return TTI::TCC_Free;
				92
				93	// Otherwise it's not a no-op.
				94	return TTI::TCC_Basic;
				95	}
				96	case Instruction::Trunc:
				97	// trunc to a native type is free (assuming the target has compare and
				98	// shift-right of the same width).
				99	if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty)))
				100	return TTI::TCC_Free;
				101
				102	return TTI::TCC_Basic;
				103	}
				104	}
				105
				106	int getGEPCost(Type PointeeType, const Value Ptr,
				107	ArrayRef<const Value *> Operands) {
				108	// In the basic model, we just assume that all-constant GEPs will be folded
				109	// into their uses via addressing modes.
				110	for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
				111	if (!isa<Constant>(Operands[Idx]))
				112	return TTI::TCC_Basic;
				113
				114	return TTI::TCC_Free;
				115	}
				116
				117	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
				118	unsigned &JTSize) {
				119	JTSize = 0;
				120	return SI.getNumCases();
				121	}
				122
				123	int getExtCost(const Instruction I, const Value Src) {
				124	return TTI::TCC_Basic;
				125	}
				126
				127	unsigned getCallCost(FunctionType *FTy, int NumArgs) {
				128	assert(FTy && "FunctionType must be provided to this routine.");
				129
				130	// The target-independent implementation just measures the size of the
				131	// function by approximating that each argument will take on average one
				132	// instruction to prepare.
				133
				134	if (NumArgs < 0)
				135	// Set the argument number to the number of explicit arguments in the
				136	// function.
				137	NumArgs = FTy->getNumParams();
				138
				139	return TTI::TCC_Basic * (NumArgs + 1);
				140	}
				141
				142	unsigned getInliningThresholdMultiplier() { return 1; }
				143
				144	unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
				145	ArrayRef<Type *> ParamTys) {
				146	switch (IID) {
				147	default:
				148	// Intrinsics rarely (if ever) have normal argument setup constraints.
				149	// Model them as having a basic instruction cost.
				150	// FIXME: This is wrong for libc intrinsics.
				151	return TTI::TCC_Basic;
				152
				153	case Intrinsic::annotation:
				154	case Intrinsic::assume:
				155	case Intrinsic::sideeffect:
				156	case Intrinsic::dbg_declare:
				157	case Intrinsic::dbg_value:
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame^]	158	case Intrinsic::dbg_label:
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	159	case Intrinsic::invariant_start:
				160	case Intrinsic::invariant_end:
				161	case Intrinsic::lifetime_start:
				162	case Intrinsic::lifetime_end:
				163	case Intrinsic::objectsize:
				164	case Intrinsic::ptr_annotation:
				165	case Intrinsic::var_annotation:
				166	case Intrinsic::experimental_gc_result:
				167	case Intrinsic::experimental_gc_relocate:
				168	case Intrinsic::coro_alloc:
				169	case Intrinsic::coro_begin:
				170	case Intrinsic::coro_free:
				171	case Intrinsic::coro_end:
				172	case Intrinsic::coro_frame:
				173	case Intrinsic::coro_size:
				174	case Intrinsic::coro_suspend:
				175	case Intrinsic::coro_param:
				176	case Intrinsic::coro_subfn_addr:
				177	// These intrinsics don't actually represent code after lowering.
				178	return TTI::TCC_Free;
				179	}
				180	}
				181
				182	bool hasBranchDivergence() { return false; }
				183
				184	bool isSourceOfDivergence(const Value *V) { return false; }
				185
				186	bool isAlwaysUniform(const Value *V) { return false; }
				187
				188	unsigned getFlatAddressSpace () {
				189	return -1;
				190	}
				191
				192	bool isLoweredToCall(const Function *F) {
				193	assert(F && "A concrete function must be provided to this routine.");
				194
				195	// FIXME: These should almost certainly not be handled here, and instead
				196	// handled with the help of TLI or the target itself. This was largely
				197	// ported from existing analysis heuristics here so that such refactorings
				198	// can take place in the future.
				199
				200	if (F->isIntrinsic())
				201	return false;
				202
				203	if (F->hasLocalLinkage() \|\| !F->hasName())
				204	return true;
				205
				206	StringRef Name = F->getName();
				207
				208	// These will all likely lower to a single selection DAG node.
				209	if (Name == "copysign" \|\| Name == "copysignf" \|\| Name == "copysignl" \|\|
				210	Name == "fabs" \|\| Name == "fabsf" \|\| Name == "fabsl" \|\| Name == "sin" \|\|
				211	Name == "fmin" \|\| Name == "fminf" \|\| Name == "fminl" \|\|
				212	Name == "fmax" \|\| Name == "fmaxf" \|\| Name == "fmaxl" \|\|
				213	Name == "sinf" \|\| Name == "sinl" \|\| Name == "cos" \|\| Name == "cosf" \|\|
				214	Name == "cosl" \|\| Name == "sqrt" \|\| Name == "sqrtf" \|\| Name == "sqrtl")
				215	return false;
				216
				217	// These are all likely to be optimized into something smaller.
				218	if (Name == "pow" \|\| Name == "powf" \|\| Name == "powl" \|\| Name == "exp2" \|\|
				219	Name == "exp2l" \|\| Name == "exp2f" \|\| Name == "floor" \|\|
				220	Name == "floorf" \|\| Name == "ceil" \|\| Name == "round" \|\|
				221	Name == "ffs" \|\| Name == "ffsl" \|\| Name == "abs" \|\| Name == "labs" \|\|
				222	Name == "llabs")
				223	return false;
				224
				225	return true;
				226	}
				227
				228	void getUnrollingPreferences(Loop *, ScalarEvolution &,
				229	TTI::UnrollingPreferences &) {}
				230
				231	bool isLegalAddImmediate(int64_t Imm) { return false; }
				232
				233	bool isLegalICmpImmediate(int64_t Imm) { return false; }
				234
				235	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
				236	bool HasBaseReg, int64_t Scale,
				237	unsigned AddrSpace, Instruction *I = nullptr) {
				238	// Guess that only reg and reg+reg addressing is allowed. This heuristic is
				239	// taken from the implementation of LSR.
				240	return !BaseGV && BaseOffset == 0 && (Scale == 0 \|\| Scale == 1);
				241	}
				242
				243	bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
				244	return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
				245	C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
				246	std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
				247	C2.ScaleCost, C2.ImmCost, C2.SetupCost);
				248	}
				249
				250	bool canMacroFuseCmp() { return false; }
				251
				252	bool shouldFavorPostInc() const { return false; }
				253
				254	bool isLegalMaskedStore(Type *DataType) { return false; }
				255
				256	bool isLegalMaskedLoad(Type *DataType) { return false; }
				257
				258	bool isLegalMaskedScatter(Type *DataType) { return false; }
				259
				260	bool isLegalMaskedGather(Type *DataType) { return false; }
				261
				262	bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
				263
				264	bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
				265
				266	bool prefersVectorizedAddressing() { return true; }
				267
				268	int getScalingFactorCost(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
				269	bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
				270	// Guess that all legal addressing mode are free.
				271	if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
				272	Scale, AddrSpace))
				273	return 0;
				274	return -1;
				275	}
				276
				277	bool LSRWithInstrQueries() { return false; }
				278
				279	bool isTruncateFree(Type Ty1, Type Ty2) { return false; }
				280
				281	bool isProfitableToHoist(Instruction *I) { return true; }
				282
				283	bool useAA() { return false; }
				284
				285	bool isTypeLegal(Type *Ty) { return false; }
				286
				287	unsigned getJumpBufAlignment() { return 0; }
				288
				289	unsigned getJumpBufSize() { return 0; }
				290
				291	bool shouldBuildLookupTables() { return true; }
				292	bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
				293
				294	bool useColdCCForColdCall(Function &F) { return false; }
				295
				296	unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
				297	return 0;
				298	}
				299
				300	unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
				301	unsigned VF) { return 0; }
				302
				303	bool supportsEfficientVectorElementLoadStore() { return false; }
				304
				305	bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
				306
				307	const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
				308	bool IsZeroCmp) const {
				309	return nullptr;
				310	}
				311
				312	bool enableInterleavedAccessVectorization() { return false; }
				313
				314	bool isFPVectorizationPotentiallyUnsafe() { return false; }
				315
				316	bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
				317	unsigned BitWidth,
				318	unsigned AddressSpace,
				319	unsigned Alignment,
				320	bool *Fast) { return false; }
				321
				322	TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
				323	return TTI::PSK_Software;
				324	}
				325
				326	bool haveFastSqrt(Type *Ty) { return false; }
				327
				328	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame^]	329
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	330	unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
				331
				332	int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
				333	Type *Ty) {
				334	return 0;
				335	}
				336
				337	unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
				338
				339	unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
				340	Type *Ty) {
				341	return TTI::TCC_Free;
				342	}
				343
				344	unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
				345	Type *Ty) {
				346	return TTI::TCC_Free;
				347	}
				348
				349	unsigned getNumberOfRegisters(bool Vector) { return 8; }
				350
				351	unsigned getRegisterBitWidth(bool Vector) const { return 32; }
				352
				353	unsigned getMinVectorRegisterBitWidth() { return 128; }
				354
				355	bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
				356
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame^]	357	unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
				358
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	359	bool
				360	shouldConsiderAddressTypePromotion(const Instruction &I,
				361	bool &AllowPromotionWithoutCommonHeader) {
				362	AllowPromotionWithoutCommonHeader = false;
				363	return false;
				364	}
				365
				366	unsigned getCacheLineSize() { return 0; }
				367
				368	llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) {
				369	switch (Level) {
				370	case TargetTransformInfo::CacheLevel::L1D:
				371	LLVM_FALLTHROUGH;
				372	case TargetTransformInfo::CacheLevel::L2D:
				373	return llvm::Optional<unsigned>();
				374	}
				375
				376	llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
				377	}
				378
				379	llvm::Optional<unsigned> getCacheAssociativity(
				380	TargetTransformInfo::CacheLevel Level) {
				381	switch (Level) {
				382	case TargetTransformInfo::CacheLevel::L1D:
				383	LLVM_FALLTHROUGH;
				384	case TargetTransformInfo::CacheLevel::L2D:
				385	return llvm::Optional<unsigned>();
				386	}
				387
				388	llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
				389	}
				390
				391	unsigned getPrefetchDistance() { return 0; }
				392
				393	unsigned getMinPrefetchStride() { return 1; }
				394
				395	unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
				396
				397	unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
				398
				399	unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
				400	TTI::OperandValueKind Opd1Info,
				401	TTI::OperandValueKind Opd2Info,
				402	TTI::OperandValueProperties Opd1PropInfo,
				403	TTI::OperandValueProperties Opd2PropInfo,
				404	ArrayRef<const Value *> Args) {
				405	return 1;
				406	}
				407
				408	unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index,
				409	Type *SubTp) {
				410	return 1;
				411	}
				412
				413	unsigned getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
				414	const Instruction *I) { return 1; }
				415
				416	unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
				417	VectorType *VecTy, unsigned Index) {
				418	return 1;
				419	}
				420
				421	unsigned getCFInstrCost(unsigned Opcode) { return 1; }
				422
				423	unsigned getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
				424	const Instruction *I) {
				425	return 1;
				426	}
				427
				428	unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
				429	return 1;
				430	}
				431
				432	unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				433	unsigned AddressSpace, const Instruction *I) {
				434	return 1;
				435	}
				436
				437	unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				438	unsigned AddressSpace) {
				439	return 1;
				440	}
				441
				442	unsigned getGatherScatterOpCost(unsigned Opcode, Type DataTy, Value Ptr,
				443	bool VariableMask,
				444	unsigned Alignment) {
				445	return 1;
				446	}
				447
				448	unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
				449	unsigned Factor,
				450	ArrayRef<unsigned> Indices,
				451	unsigned Alignment,
				452	unsigned AddressSpace) {
				453	return 1;
				454	}
				455
				456	unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
				457	ArrayRef<Type *> Tys, FastMathFlags FMF,
				458	unsigned ScalarizationCostPassed) {
				459	return 1;
				460	}
				461	unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
				462	ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
				463	return 1;
				464	}
				465
				466	unsigned getCallInstrCost(Function F, Type RetTy, ArrayRef<Type *> Tys) {
				467	return 1;
				468	}
				469
				470	unsigned getNumberOfParts(Type *Tp) { return 0; }
				471
				472	unsigned getAddressComputationCost(Type Tp, ScalarEvolution ,
				473	const SCEV *) {
				474	return 0;
				475	}
				476
				477	unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; }
				478
				479	unsigned getMinMaxReductionCost(Type , Type , bool, bool) { return 1; }
				480
				481	unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
				482
				483	bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) {
				484	return false;
				485	}
				486
				487	unsigned getAtomicMemIntrinsicMaxElementSize() const {
				488	// Note for overrides: You must ensure for all element unordered-atomic
				489	// memory intrinsics that all power-of-2 element sizes up to, and
				490	// including, the return value of this method have a corresponding
				491	// runtime lib call. These runtime lib call definitions can be found
				492	// in RuntimeLibcalls.h
				493	return 0;
				494	}
				495
				496	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
				497	Type *ExpectedType) {
				498	return nullptr;
				499	}
				500
				501	Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
				502	unsigned SrcAlign, unsigned DestAlign) const {
				503	return Type::getInt8Ty(Context);
				504	}
				505
				506	void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
				507	LLVMContext &Context,
				508	unsigned RemainingBytes,
				509	unsigned SrcAlign,
				510	unsigned DestAlign) const {
				511	for (unsigned i = 0; i != RemainingBytes; ++i)
				512	OpsOut.push_back(Type::getInt8Ty(Context));
				513	}
				514
				515	bool areInlineCompatible(const Function *Caller,
				516	const Function *Callee) const {
				517	return (Caller->getFnAttribute("target-cpu") ==
				518	Callee->getFnAttribute("target-cpu")) &&
				519	(Caller->getFnAttribute("target-features") ==
				520	Callee->getFnAttribute("target-features"));
				521	}
				522
				523	bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
				524	const DataLayout &DL) const {
				525	return false;
				526	}
				527
				528	bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
				529	const DataLayout &DL) const {
				530	return false;
				531	}
				532
				533	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
				534
				535	bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
				536
				537	bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
				538
				539	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
				540	unsigned Alignment,
				541	unsigned AddrSpace) const {
				542	return true;
				543	}
				544
				545	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
				546	unsigned Alignment,
				547	unsigned AddrSpace) const {
				548	return true;
				549	}
				550
				551	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
				552	unsigned ChainSizeInBytes,
				553	VectorType *VecTy) const {
				554	return VF;
				555	}
				556
				557	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
				558	unsigned ChainSizeInBytes,
				559	VectorType *VecTy) const {
				560	return VF;
				561	}
				562
				563	bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
				564	TTI::ReductionFlags Flags) const {
				565	return false;
				566	}
				567
				568	bool shouldExpandReduction(const IntrinsicInst *II) const {
				569	return true;
				570	}
				571
				572	protected:
				573	// Obtain the minimum required size to hold the value (without the sign)
				574	// In case of a vector it returns the min required size for one element.
				575	unsigned minRequiredElementSize(const Value* Val, bool &isSigned) {
				576	if (isa<ConstantDataVector>(Val) \|\| isa<ConstantVector>(Val)) {
				577	const auto* VectorValue = cast<Constant>(Val);
				578
				579	// In case of a vector need to pick the max between the min
				580	// required size for each element
				581	auto *VT = cast<VectorType>(Val->getType());
				582
				583	// Assume unsigned elements
				584	isSigned = false;
				585
				586	// The max required size is the total vector width divided by num
				587	// of elements in the vector
				588	unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements();
				589
				590	unsigned MinRequiredSize = 0;
				591	for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
				592	if (auto* IntElement =
				593	dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
				594	bool signedElement = IntElement->getValue().isNegative();
				595	// Get the element min required size.
				596	unsigned ElementMinRequiredSize =
				597	IntElement->getValue().getMinSignedBits() - 1;
				598	// In case one element is signed then all the vector is signed.
				599	isSigned \|= signedElement;
				600	// Save the max required bit size between all the elements.
				601	MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
				602	}
				603	else {
				604	// not an int constant element
				605	return MaxRequiredSize;
				606	}
				607	}
				608	return MinRequiredSize;
				609	}
				610
				611	if (const auto* CI = dyn_cast<ConstantInt>(Val)) {
				612	isSigned = CI->getValue().isNegative();
				613	return CI->getValue().getMinSignedBits() - 1;
				614	}
				615
				616	if (const auto* Cast = dyn_cast<SExtInst>(Val)) {
				617	isSigned = true;
				618	return Cast->getSrcTy()->getScalarSizeInBits() - 1;
				619	}
				620
				621	if (const auto* Cast = dyn_cast<ZExtInst>(Val)) {
				622	isSigned = false;
				623	return Cast->getSrcTy()->getScalarSizeInBits();
				624	}
				625
				626	isSigned = false;
				627	return Val->getType()->getScalarSizeInBits();
				628	}
				629
				630	bool isStridedAccess(const SCEV *Ptr) {
				631	return Ptr && isa<SCEVAddRecExpr>(Ptr);
				632	}
				633
				634	const SCEVConstant getConstantStrideStep(ScalarEvolution SE,
				635	const SCEV *Ptr) {
				636	if (!isStridedAccess(Ptr))
				637	return nullptr;
				638	const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
				639	return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
				640	}
				641
				642	bool isConstantStridedAccessLessThan(ScalarEvolution SE, const SCEV Ptr,
				643	int64_t MergeDistance) {
				644	const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
				645	if (!Step)
				646	return false;
				647	APInt StrideVal = Step->getAPInt();
				648	if (StrideVal.getBitWidth() > 64)
				649	return false;
				650	// FIXME: Need to take absolute value for negative stride case.
				651	return StrideVal.getSExtValue() < MergeDistance;
				652	}
				653	};
				654
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame^]	655	/// CRTP base class for use as a mix-in that aids implementing
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	656	/// a TargetTransformInfo-compatible class.
				657	template <typename T>
				658	class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
				659	private:
				660	typedef TargetTransformInfoImplBase BaseT;
				661
				662	protected:
				663	explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
				664
				665	public:
				666	using BaseT::getCallCost;
				667
				668	unsigned getCallCost(const Function *F, int NumArgs) {
				669	assert(F && "A concrete function must be provided to this routine.");
				670
				671	if (NumArgs < 0)
				672	// Set the argument number to the number of explicit arguments in the
				673	// function.
				674	NumArgs = F->arg_size();
				675
				676	if (Intrinsic::ID IID = F->getIntrinsicID()) {
				677	FunctionType *FTy = F->getFunctionType();
				678	SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
				679	return static_cast<T *>(this)
				680	->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys);
				681	}
				682
				683	if (!static_cast<T *>(this)->isLoweredToCall(F))
				684	return TTI::TCC_Basic; // Give a basic cost if it will be lowered
				685	// directly.
				686
				687	return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs);
				688	}
				689
				690	unsigned getCallCost(const Function F, ArrayRef<const Value > Arguments) {
				691	// Simply delegate to generic handling of the call.
				692	// FIXME: We should use instsimplify or something else to catch calls which
				693	// will constant fold with these arguments.
				694	return static_cast<T *>(this)->getCallCost(F, Arguments.size());
				695	}
				696
				697	using BaseT::getGEPCost;
				698
				699	int getGEPCost(Type PointeeType, const Value Ptr,
				700	ArrayRef<const Value *> Operands) {
				701	const GlobalValue *BaseGV = nullptr;
				702	if (Ptr != nullptr) {
				703	// TODO: will remove this when pointers have an opaque type.
				704	assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
				705	PointeeType &&
				706	"explicit pointee type doesn't match operand's pointee type");
				707	BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
				708	}
				709	bool HasBaseReg = (BaseGV == nullptr);
				710
				711	auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
				712	APInt BaseOffset(PtrSizeBits, 0);
				713	int64_t Scale = 0;
				714
				715	auto GTI = gep_type_begin(PointeeType, Operands);
				716	Type *TargetType = nullptr;
				717
				718	// Handle the case where the GEP instruction has a single operand,
				719	// the basis, therefore TargetType is a nullptr.
				720	if (Operands.empty())
				721	return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
				722
				723	for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
				724	TargetType = GTI.getIndexedType();
				725	// We assume that the cost of Scalar GEP with constant index and the
				726	// cost of Vector GEP with splat constant index are the same.
				727	const ConstantInt ConstIdx = dyn_cast<ConstantInt>(I);
				728	if (!ConstIdx)
				729	if (auto Splat = getSplatValue(*I))
				730	ConstIdx = dyn_cast<ConstantInt>(Splat);
				731	if (StructType *STy = GTI.getStructTypeOrNull()) {
				732	// For structures the index is always splat or scalar constant
				733	assert(ConstIdx && "Unexpected GEP index");
				734	uint64_t Field = ConstIdx->getZExtValue();
				735	BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
				736	} else {
				737	int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
				738	if (ConstIdx) {
				739	BaseOffset +=
				740	ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
				741	} else {
				742	// Needs scale register.
				743	if (Scale != 0)
				744	// No addressing mode takes two scale registers.
				745	return TTI::TCC_Basic;
				746	Scale = ElementSize;
				747	}
				748	}
				749	}
				750
				751	// Assumes the address space is 0 when Ptr is nullptr.
				752	unsigned AS =
				753	(Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
				754
				755	if (static_cast<T *>(this)->isLegalAddressingMode(
				756	TargetType, const_cast<GlobalValue *>(BaseGV),
				757	BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS))
				758	return TTI::TCC_Free;
				759	return TTI::TCC_Basic;
				760	}
				761
				762	using BaseT::getIntrinsicCost;
				763
				764	unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
				765	ArrayRef<const Value *> Arguments) {
				766	// Delegate to the generic intrinsic handling code. This mostly provides an
				767	// opportunity for targets to (for example) special case the cost of
				768	// certain intrinsics based on constants used as arguments.
				769	SmallVector<Type *, 8> ParamTys;
				770	ParamTys.reserve(Arguments.size());
				771	for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
				772	ParamTys.push_back(Arguments[Idx]->getType());
				773	return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys);
				774	}
				775
				776	unsigned getUserCost(const User U, ArrayRef<const Value > Operands) {
				777	if (isa<PHINode>(U))
				778	return TTI::TCC_Free; // Model all PHI nodes as free.
				779
				780	// Static alloca doesn't generate target instructions.
				781	if (auto *A = dyn_cast<AllocaInst>(U))
				782	if (A->isStaticAlloca())
				783	return TTI::TCC_Free;
				784
				785	if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
				786	return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
				787	GEP->getPointerOperand(),
				788	Operands.drop_front());
				789	}
				790
				791	if (auto CS = ImmutableCallSite(U)) {
				792	const Function *F = CS.getCalledFunction();
				793	if (!F) {
				794	// Just use the called value type.
				795	Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
				796	return static_cast<T *>(this)
				797	->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
				798	}
				799
				800	SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
				801	return static_cast<T *>(this)->getCallCost(F, Arguments);
				802	}
				803
				804	if (const CastInst *CI = dyn_cast<CastInst>(U)) {
				805	// Result of a cmp instruction is often extended (to be used by other
				806	// cmp instructions, logical or return instructions). These are usually
				807	// nop on most sane targets.
				808	if (isa<CmpInst>(CI->getOperand(0)))
				809	return TTI::TCC_Free;
				810	if (isa<SExtInst>(CI) \|\| isa<ZExtInst>(CI) \|\| isa<FPExtInst>(CI))
				811	return static_cast<T *>(this)->getExtCost(CI, Operands.back());
				812	}
				813
				814	return static_cast<T *>(this)->getOperationCost(
				815	Operator::getOpcode(U), U->getType(),
				816	U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
				817	}
				818
				819	int getInstructionLatency(const Instruction *I) {
				820	SmallVector<const Value *, 4> Operands(I->value_op_begin(),
				821	I->value_op_end());
				822	if (getUserCost(I, Operands) == TTI::TCC_Free)
				823	return 0;
				824
				825	if (isa<LoadInst>(I))
				826	return 4;
				827
				828	Type *DstTy = I->getType();
				829
				830	// Usually an intrinsic is a simple instruction.
				831	// A real function call is much slower.
				832	if (auto *CI = dyn_cast<CallInst>(I)) {
				833	const Function *F = CI->getCalledFunction();
				834	if (!F \|\| static_cast<T *>(this)->isLoweredToCall(F))
				835	return 40;
				836	// Some intrinsics return a value and a flag, we use the value type
				837	// to decide its latency.
				838	if (StructType* StructTy = dyn_cast<StructType>(DstTy))
				839	DstTy = StructTy->getElementType(0);
				840	// Fall through to simple instructions.
				841	}
				842
				843	if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
				844	DstTy = VectorTy->getElementType();
				845	if (DstTy->isFloatingPointTy())
				846	return 3;
				847
				848	return 1;
				849	}
				850	};
				851	}
				852
				853	#endif