Blame - linux-x64/clang/include/llvm/Analysis/TargetTransformInfoImpl.h - hafnium/prebuilts

blob: e39fe66c0a479536a514cdbddc07fa24aebcd3e2 [file] [log] [blame]

Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	1	//===- TargetTransformInfoImpl.h --------------------------------- C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	/// \file
				10	/// This file provides helpers for the implementation of
				11	/// a TargetTransformInfo-conforming class.
				12	///
				13	//===----------------------------------------------------------------------===//
				14
				15	#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
				16	#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
				17
				18	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				19	#include "llvm/Analysis/TargetTransformInfo.h"
				20	#include "llvm/Analysis/VectorUtils.h"
				21	#include "llvm/IR/CallSite.h"
				22	#include "llvm/IR/DataLayout.h"
				23	#include "llvm/IR/Function.h"
				24	#include "llvm/IR/GetElementPtrTypeIterator.h"
				25	#include "llvm/IR/Operator.h"
				26	#include "llvm/IR/Type.h"
				27
				28	namespace llvm {
				29
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	30	/// Base class for use as a mix-in that aids implementing
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	31	/// a TargetTransformInfo-compatible class.
				32	class TargetTransformInfoImplBase {
				33	protected:
				34	typedef TargetTransformInfo TTI;
				35
				36	const DataLayout &DL;
				37
				38	explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
				39
				40	public:
				41	// Provide value semantics. MSVC requires that we spell all of these out.
				42	TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
				43	: DL(Arg.DL) {}
				44	TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
				45
				46	const DataLayout &getDataLayout() const { return DL; }
				47
				48	unsigned getOperationCost(unsigned Opcode, Type Ty, Type OpTy) {
				49	switch (Opcode) {
				50	default:
				51	// By default, just classify everything as 'basic'.
				52	return TTI::TCC_Basic;
				53
				54	case Instruction::GetElementPtr:
				55	llvm_unreachable("Use getGEPCost for GEP operations!");
				56
				57	case Instruction::BitCast:
				58	assert(OpTy && "Cast instructions must provide the operand type");
				59	if (Ty == OpTy \|\| (Ty->isPointerTy() && OpTy->isPointerTy()))
				60	// Identity and pointer-to-pointer casts are free.
				61	return TTI::TCC_Free;
				62
				63	// Otherwise, the default basic cost is used.
				64	return TTI::TCC_Basic;
				65
				66	case Instruction::FDiv:
				67	case Instruction::FRem:
				68	case Instruction::SDiv:
				69	case Instruction::SRem:
				70	case Instruction::UDiv:
				71	case Instruction::URem:
				72	return TTI::TCC_Expensive;
				73
				74	case Instruction::IntToPtr: {
				75	// An inttoptr cast is free so long as the input is a legal integer type
				76	// which doesn't contain values outside the range of a pointer.
				77	unsigned OpSize = OpTy->getScalarSizeInBits();
				78	if (DL.isLegalInteger(OpSize) &&
				79	OpSize <= DL.getPointerTypeSizeInBits(Ty))
				80	return TTI::TCC_Free;
				81
				82	// Otherwise it's not a no-op.
				83	return TTI::TCC_Basic;
				84	}
				85	case Instruction::PtrToInt: {
				86	// A ptrtoint cast is free so long as the result is large enough to store
				87	// the pointer, and a legal integer type.
				88	unsigned DestSize = Ty->getScalarSizeInBits();
				89	if (DL.isLegalInteger(DestSize) &&
				90	DestSize >= DL.getPointerTypeSizeInBits(OpTy))
				91	return TTI::TCC_Free;
				92
				93	// Otherwise it's not a no-op.
				94	return TTI::TCC_Basic;
				95	}
				96	case Instruction::Trunc:
				97	// trunc to a native type is free (assuming the target has compare and
				98	// shift-right of the same width).
				99	if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty)))
				100	return TTI::TCC_Free;
				101
				102	return TTI::TCC_Basic;
				103	}
				104	}
				105
				106	int getGEPCost(Type PointeeType, const Value Ptr,
				107	ArrayRef<const Value *> Operands) {
				108	// In the basic model, we just assume that all-constant GEPs will be folded
				109	// into their uses via addressing modes.
				110	for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
				111	if (!isa<Constant>(Operands[Idx]))
				112	return TTI::TCC_Basic;
				113
				114	return TTI::TCC_Free;
				115	}
				116
				117	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
				118	unsigned &JTSize) {
				119	JTSize = 0;
				120	return SI.getNumCases();
				121	}
				122
				123	int getExtCost(const Instruction I, const Value Src) {
				124	return TTI::TCC_Basic;
				125	}
				126
				127	unsigned getCallCost(FunctionType *FTy, int NumArgs) {
				128	assert(FTy && "FunctionType must be provided to this routine.");
				129
				130	// The target-independent implementation just measures the size of the
				131	// function by approximating that each argument will take on average one
				132	// instruction to prepare.
				133
				134	if (NumArgs < 0)
				135	// Set the argument number to the number of explicit arguments in the
				136	// function.
				137	NumArgs = FTy->getNumParams();
				138
				139	return TTI::TCC_Basic * (NumArgs + 1);
				140	}
				141
				142	unsigned getInliningThresholdMultiplier() { return 1; }
				143
				144	unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
				145	ArrayRef<Type *> ParamTys) {
				146	switch (IID) {
				147	default:
				148	// Intrinsics rarely (if ever) have normal argument setup constraints.
				149	// Model them as having a basic instruction cost.
				150	// FIXME: This is wrong for libc intrinsics.
				151	return TTI::TCC_Basic;
				152
				153	case Intrinsic::annotation:
				154	case Intrinsic::assume:
				155	case Intrinsic::sideeffect:
				156	case Intrinsic::dbg_declare:
				157	case Intrinsic::dbg_value:
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	158	case Intrinsic::dbg_label:
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	159	case Intrinsic::invariant_start:
				160	case Intrinsic::invariant_end:
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame^]	161	case Intrinsic::launder_invariant_group:
				162	case Intrinsic::strip_invariant_group:
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	163	case Intrinsic::lifetime_start:
				164	case Intrinsic::lifetime_end:
				165	case Intrinsic::objectsize:
				166	case Intrinsic::ptr_annotation:
				167	case Intrinsic::var_annotation:
				168	case Intrinsic::experimental_gc_result:
				169	case Intrinsic::experimental_gc_relocate:
				170	case Intrinsic::coro_alloc:
				171	case Intrinsic::coro_begin:
				172	case Intrinsic::coro_free:
				173	case Intrinsic::coro_end:
				174	case Intrinsic::coro_frame:
				175	case Intrinsic::coro_size:
				176	case Intrinsic::coro_suspend:
				177	case Intrinsic::coro_param:
				178	case Intrinsic::coro_subfn_addr:
				179	// These intrinsics don't actually represent code after lowering.
				180	return TTI::TCC_Free;
				181	}
				182	}
				183
				184	bool hasBranchDivergence() { return false; }
				185
				186	bool isSourceOfDivergence(const Value *V) { return false; }
				187
				188	bool isAlwaysUniform(const Value *V) { return false; }
				189
				190	unsigned getFlatAddressSpace () {
				191	return -1;
				192	}
				193
				194	bool isLoweredToCall(const Function *F) {
				195	assert(F && "A concrete function must be provided to this routine.");
				196
				197	// FIXME: These should almost certainly not be handled here, and instead
				198	// handled with the help of TLI or the target itself. This was largely
				199	// ported from existing analysis heuristics here so that such refactorings
				200	// can take place in the future.
				201
				202	if (F->isIntrinsic())
				203	return false;
				204
				205	if (F->hasLocalLinkage() \|\| !F->hasName())
				206	return true;
				207
				208	StringRef Name = F->getName();
				209
				210	// These will all likely lower to a single selection DAG node.
				211	if (Name == "copysign" \|\| Name == "copysignf" \|\| Name == "copysignl" \|\|
				212	Name == "fabs" \|\| Name == "fabsf" \|\| Name == "fabsl" \|\| Name == "sin" \|\|
				213	Name == "fmin" \|\| Name == "fminf" \|\| Name == "fminl" \|\|
				214	Name == "fmax" \|\| Name == "fmaxf" \|\| Name == "fmaxl" \|\|
				215	Name == "sinf" \|\| Name == "sinl" \|\| Name == "cos" \|\| Name == "cosf" \|\|
				216	Name == "cosl" \|\| Name == "sqrt" \|\| Name == "sqrtf" \|\| Name == "sqrtl")
				217	return false;
				218
				219	// These are all likely to be optimized into something smaller.
				220	if (Name == "pow" \|\| Name == "powf" \|\| Name == "powl" \|\| Name == "exp2" \|\|
				221	Name == "exp2l" \|\| Name == "exp2f" \|\| Name == "floor" \|\|
				222	Name == "floorf" \|\| Name == "ceil" \|\| Name == "round" \|\|
				223	Name == "ffs" \|\| Name == "ffsl" \|\| Name == "abs" \|\| Name == "labs" \|\|
				224	Name == "llabs")
				225	return false;
				226
				227	return true;
				228	}
				229
				230	void getUnrollingPreferences(Loop *, ScalarEvolution &,
				231	TTI::UnrollingPreferences &) {}
				232
				233	bool isLegalAddImmediate(int64_t Imm) { return false; }
				234
				235	bool isLegalICmpImmediate(int64_t Imm) { return false; }
				236
				237	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
				238	bool HasBaseReg, int64_t Scale,
				239	unsigned AddrSpace, Instruction *I = nullptr) {
				240	// Guess that only reg and reg+reg addressing is allowed. This heuristic is
				241	// taken from the implementation of LSR.
				242	return !BaseGV && BaseOffset == 0 && (Scale == 0 \|\| Scale == 1);
				243	}
				244
				245	bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
				246	return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
				247	C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
				248	std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
				249	C2.ScaleCost, C2.ImmCost, C2.SetupCost);
				250	}
				251
				252	bool canMacroFuseCmp() { return false; }
				253
				254	bool shouldFavorPostInc() const { return false; }
				255
				256	bool isLegalMaskedStore(Type *DataType) { return false; }
				257
				258	bool isLegalMaskedLoad(Type *DataType) { return false; }
				259
				260	bool isLegalMaskedScatter(Type *DataType) { return false; }
				261
				262	bool isLegalMaskedGather(Type *DataType) { return false; }
				263
				264	bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
				265
				266	bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
				267
				268	bool prefersVectorizedAddressing() { return true; }
				269
				270	int getScalingFactorCost(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
				271	bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
				272	// Guess that all legal addressing mode are free.
				273	if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
				274	Scale, AddrSpace))
				275	return 0;
				276	return -1;
				277	}
				278
				279	bool LSRWithInstrQueries() { return false; }
				280
				281	bool isTruncateFree(Type Ty1, Type Ty2) { return false; }
				282
				283	bool isProfitableToHoist(Instruction *I) { return true; }
				284
				285	bool useAA() { return false; }
				286
				287	bool isTypeLegal(Type *Ty) { return false; }
				288
				289	unsigned getJumpBufAlignment() { return 0; }
				290
				291	unsigned getJumpBufSize() { return 0; }
				292
				293	bool shouldBuildLookupTables() { return true; }
				294	bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
				295
				296	bool useColdCCForColdCall(Function &F) { return false; }
				297
				298	unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
				299	return 0;
				300	}
				301
				302	unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
				303	unsigned VF) { return 0; }
				304
				305	bool supportsEfficientVectorElementLoadStore() { return false; }
				306
				307	bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
				308
				309	const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
				310	bool IsZeroCmp) const {
				311	return nullptr;
				312	}
				313
				314	bool enableInterleavedAccessVectorization() { return false; }
				315
				316	bool isFPVectorizationPotentiallyUnsafe() { return false; }
				317
				318	bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
				319	unsigned BitWidth,
				320	unsigned AddressSpace,
				321	unsigned Alignment,
				322	bool *Fast) { return false; }
				323
				324	TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
				325	return TTI::PSK_Software;
				326	}
				327
				328	bool haveFastSqrt(Type *Ty) { return false; }
				329
				330	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	331
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	332	unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
				333
				334	int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
				335	Type *Ty) {
				336	return 0;
				337	}
				338
				339	unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
				340
				341	unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
				342	Type *Ty) {
				343	return TTI::TCC_Free;
				344	}
				345
				346	unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
				347	Type *Ty) {
				348	return TTI::TCC_Free;
				349	}
				350
				351	unsigned getNumberOfRegisters(bool Vector) { return 8; }
				352
				353	unsigned getRegisterBitWidth(bool Vector) const { return 32; }
				354
				355	unsigned getMinVectorRegisterBitWidth() { return 128; }
				356
				357	bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
				358
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	359	unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
				360
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	361	bool
				362	shouldConsiderAddressTypePromotion(const Instruction &I,
				363	bool &AllowPromotionWithoutCommonHeader) {
				364	AllowPromotionWithoutCommonHeader = false;
				365	return false;
				366	}
				367
				368	unsigned getCacheLineSize() { return 0; }
				369
				370	llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) {
				371	switch (Level) {
				372	case TargetTransformInfo::CacheLevel::L1D:
				373	LLVM_FALLTHROUGH;
				374	case TargetTransformInfo::CacheLevel::L2D:
				375	return llvm::Optional<unsigned>();
				376	}
				377
				378	llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
				379	}
				380
				381	llvm::Optional<unsigned> getCacheAssociativity(
				382	TargetTransformInfo::CacheLevel Level) {
				383	switch (Level) {
				384	case TargetTransformInfo::CacheLevel::L1D:
				385	LLVM_FALLTHROUGH;
				386	case TargetTransformInfo::CacheLevel::L2D:
				387	return llvm::Optional<unsigned>();
				388	}
				389
				390	llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
				391	}
				392
				393	unsigned getPrefetchDistance() { return 0; }
				394
				395	unsigned getMinPrefetchStride() { return 1; }
				396
				397	unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
				398
				399	unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
				400
				401	unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
				402	TTI::OperandValueKind Opd1Info,
				403	TTI::OperandValueKind Opd2Info,
				404	TTI::OperandValueProperties Opd1PropInfo,
				405	TTI::OperandValueProperties Opd2PropInfo,
				406	ArrayRef<const Value *> Args) {
				407	return 1;
				408	}
				409
				410	unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index,
				411	Type *SubTp) {
				412	return 1;
				413	}
				414
				415	unsigned getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
				416	const Instruction *I) { return 1; }
				417
				418	unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
				419	VectorType *VecTy, unsigned Index) {
				420	return 1;
				421	}
				422
				423	unsigned getCFInstrCost(unsigned Opcode) { return 1; }
				424
				425	unsigned getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
				426	const Instruction *I) {
				427	return 1;
				428	}
				429
				430	unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
				431	return 1;
				432	}
				433
				434	unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				435	unsigned AddressSpace, const Instruction *I) {
				436	return 1;
				437	}
				438
				439	unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				440	unsigned AddressSpace) {
				441	return 1;
				442	}
				443
				444	unsigned getGatherScatterOpCost(unsigned Opcode, Type DataTy, Value Ptr,
				445	bool VariableMask,
				446	unsigned Alignment) {
				447	return 1;
				448	}
				449
				450	unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
				451	unsigned Factor,
				452	ArrayRef<unsigned> Indices,
				453	unsigned Alignment,
				454	unsigned AddressSpace) {
				455	return 1;
				456	}
				457
				458	unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
				459	ArrayRef<Type *> Tys, FastMathFlags FMF,
				460	unsigned ScalarizationCostPassed) {
				461	return 1;
				462	}
				463	unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
				464	ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
				465	return 1;
				466	}
				467
				468	unsigned getCallInstrCost(Function F, Type RetTy, ArrayRef<Type *> Tys) {
				469	return 1;
				470	}
				471
				472	unsigned getNumberOfParts(Type *Tp) { return 0; }
				473
				474	unsigned getAddressComputationCost(Type Tp, ScalarEvolution ,
				475	const SCEV *) {
				476	return 0;
				477	}
				478
				479	unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; }
				480
				481	unsigned getMinMaxReductionCost(Type , Type , bool, bool) { return 1; }
				482
				483	unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
				484
				485	bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) {
				486	return false;
				487	}
				488
				489	unsigned getAtomicMemIntrinsicMaxElementSize() const {
				490	// Note for overrides: You must ensure for all element unordered-atomic
				491	// memory intrinsics that all power-of-2 element sizes up to, and
				492	// including, the return value of this method have a corresponding
				493	// runtime lib call. These runtime lib call definitions can be found
				494	// in RuntimeLibcalls.h
				495	return 0;
				496	}
				497
				498	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
				499	Type *ExpectedType) {
				500	return nullptr;
				501	}
				502
				503	Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
				504	unsigned SrcAlign, unsigned DestAlign) const {
				505	return Type::getInt8Ty(Context);
				506	}
				507
				508	void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
				509	LLVMContext &Context,
				510	unsigned RemainingBytes,
				511	unsigned SrcAlign,
				512	unsigned DestAlign) const {
				513	for (unsigned i = 0; i != RemainingBytes; ++i)
				514	OpsOut.push_back(Type::getInt8Ty(Context));
				515	}
				516
				517	bool areInlineCompatible(const Function *Caller,
				518	const Function *Callee) const {
				519	return (Caller->getFnAttribute("target-cpu") ==
				520	Callee->getFnAttribute("target-cpu")) &&
				521	(Caller->getFnAttribute("target-features") ==
				522	Callee->getFnAttribute("target-features"));
				523	}
				524
				525	bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
				526	const DataLayout &DL) const {
				527	return false;
				528	}
				529
				530	bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
				531	const DataLayout &DL) const {
				532	return false;
				533	}
				534
				535	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
				536
				537	bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
				538
				539	bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
				540
				541	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
				542	unsigned Alignment,
				543	unsigned AddrSpace) const {
				544	return true;
				545	}
				546
				547	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
				548	unsigned Alignment,
				549	unsigned AddrSpace) const {
				550	return true;
				551	}
				552
				553	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
				554	unsigned ChainSizeInBytes,
				555	VectorType *VecTy) const {
				556	return VF;
				557	}
				558
				559	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
				560	unsigned ChainSizeInBytes,
				561	VectorType *VecTy) const {
				562	return VF;
				563	}
				564
				565	bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
				566	TTI::ReductionFlags Flags) const {
				567	return false;
				568	}
				569
				570	bool shouldExpandReduction(const IntrinsicInst *II) const {
				571	return true;
				572	}
				573
				574	protected:
				575	// Obtain the minimum required size to hold the value (without the sign)
				576	// In case of a vector it returns the min required size for one element.
				577	unsigned minRequiredElementSize(const Value* Val, bool &isSigned) {
				578	if (isa<ConstantDataVector>(Val) \|\| isa<ConstantVector>(Val)) {
				579	const auto* VectorValue = cast<Constant>(Val);
				580
				581	// In case of a vector need to pick the max between the min
				582	// required size for each element
				583	auto *VT = cast<VectorType>(Val->getType());
				584
				585	// Assume unsigned elements
				586	isSigned = false;
				587
				588	// The max required size is the total vector width divided by num
				589	// of elements in the vector
				590	unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements();
				591
				592	unsigned MinRequiredSize = 0;
				593	for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
				594	if (auto* IntElement =
				595	dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
				596	bool signedElement = IntElement->getValue().isNegative();
				597	// Get the element min required size.
				598	unsigned ElementMinRequiredSize =
				599	IntElement->getValue().getMinSignedBits() - 1;
				600	// In case one element is signed then all the vector is signed.
				601	isSigned \|= signedElement;
				602	// Save the max required bit size between all the elements.
				603	MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
				604	}
				605	else {
				606	// not an int constant element
				607	return MaxRequiredSize;
				608	}
				609	}
				610	return MinRequiredSize;
				611	}
				612
				613	if (const auto* CI = dyn_cast<ConstantInt>(Val)) {
				614	isSigned = CI->getValue().isNegative();
				615	return CI->getValue().getMinSignedBits() - 1;
				616	}
				617
				618	if (const auto* Cast = dyn_cast<SExtInst>(Val)) {
				619	isSigned = true;
				620	return Cast->getSrcTy()->getScalarSizeInBits() - 1;
				621	}
				622
				623	if (const auto* Cast = dyn_cast<ZExtInst>(Val)) {
				624	isSigned = false;
				625	return Cast->getSrcTy()->getScalarSizeInBits();
				626	}
				627
				628	isSigned = false;
				629	return Val->getType()->getScalarSizeInBits();
				630	}
				631
				632	bool isStridedAccess(const SCEV *Ptr) {
				633	return Ptr && isa<SCEVAddRecExpr>(Ptr);
				634	}
				635
				636	const SCEVConstant getConstantStrideStep(ScalarEvolution SE,
				637	const SCEV *Ptr) {
				638	if (!isStridedAccess(Ptr))
				639	return nullptr;
				640	const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
				641	return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
				642	}
				643
				644	bool isConstantStridedAccessLessThan(ScalarEvolution SE, const SCEV Ptr,
				645	int64_t MergeDistance) {
				646	const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
				647	if (!Step)
				648	return false;
				649	APInt StrideVal = Step->getAPInt();
				650	if (StrideVal.getBitWidth() > 64)
				651	return false;
				652	// FIXME: Need to take absolute value for negative stride case.
				653	return StrideVal.getSExtValue() < MergeDistance;
				654	}
				655	};
				656
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	657	/// CRTP base class for use as a mix-in that aids implementing
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	658	/// a TargetTransformInfo-compatible class.
				659	template <typename T>
				660	class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
				661	private:
				662	typedef TargetTransformInfoImplBase BaseT;
				663
				664	protected:
				665	explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
				666
				667	public:
				668	using BaseT::getCallCost;
				669
				670	unsigned getCallCost(const Function *F, int NumArgs) {
				671	assert(F && "A concrete function must be provided to this routine.");
				672
				673	if (NumArgs < 0)
				674	// Set the argument number to the number of explicit arguments in the
				675	// function.
				676	NumArgs = F->arg_size();
				677
				678	if (Intrinsic::ID IID = F->getIntrinsicID()) {
				679	FunctionType *FTy = F->getFunctionType();
				680	SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
				681	return static_cast<T *>(this)
				682	->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys);
				683	}
				684
				685	if (!static_cast<T *>(this)->isLoweredToCall(F))
				686	return TTI::TCC_Basic; // Give a basic cost if it will be lowered
				687	// directly.
				688
				689	return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs);
				690	}
				691
				692	unsigned getCallCost(const Function F, ArrayRef<const Value > Arguments) {
				693	// Simply delegate to generic handling of the call.
				694	// FIXME: We should use instsimplify or something else to catch calls which
				695	// will constant fold with these arguments.
				696	return static_cast<T *>(this)->getCallCost(F, Arguments.size());
				697	}
				698
				699	using BaseT::getGEPCost;
				700
				701	int getGEPCost(Type PointeeType, const Value Ptr,
				702	ArrayRef<const Value *> Operands) {
				703	const GlobalValue *BaseGV = nullptr;
				704	if (Ptr != nullptr) {
				705	// TODO: will remove this when pointers have an opaque type.
				706	assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
				707	PointeeType &&
				708	"explicit pointee type doesn't match operand's pointee type");
				709	BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
				710	}
				711	bool HasBaseReg = (BaseGV == nullptr);
				712
				713	auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
				714	APInt BaseOffset(PtrSizeBits, 0);
				715	int64_t Scale = 0;
				716
				717	auto GTI = gep_type_begin(PointeeType, Operands);
				718	Type *TargetType = nullptr;
				719
				720	// Handle the case where the GEP instruction has a single operand,
				721	// the basis, therefore TargetType is a nullptr.
				722	if (Operands.empty())
				723	return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
				724
				725	for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
				726	TargetType = GTI.getIndexedType();
				727	// We assume that the cost of Scalar GEP with constant index and the
				728	// cost of Vector GEP with splat constant index are the same.
				729	const ConstantInt ConstIdx = dyn_cast<ConstantInt>(I);
				730	if (!ConstIdx)
				731	if (auto Splat = getSplatValue(*I))
				732	ConstIdx = dyn_cast<ConstantInt>(Splat);
				733	if (StructType *STy = GTI.getStructTypeOrNull()) {
				734	// For structures the index is always splat or scalar constant
				735	assert(ConstIdx && "Unexpected GEP index");
				736	uint64_t Field = ConstIdx->getZExtValue();
				737	BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
				738	} else {
				739	int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
				740	if (ConstIdx) {
				741	BaseOffset +=
				742	ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
				743	} else {
				744	// Needs scale register.
				745	if (Scale != 0)
				746	// No addressing mode takes two scale registers.
				747	return TTI::TCC_Basic;
				748	Scale = ElementSize;
				749	}
				750	}
				751	}
				752
				753	// Assumes the address space is 0 when Ptr is nullptr.
				754	unsigned AS =
				755	(Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
				756
				757	if (static_cast<T *>(this)->isLegalAddressingMode(
				758	TargetType, const_cast<GlobalValue *>(BaseGV),
				759	BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS))
				760	return TTI::TCC_Free;
				761	return TTI::TCC_Basic;
				762	}
				763
				764	using BaseT::getIntrinsicCost;
				765
				766	unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
				767	ArrayRef<const Value *> Arguments) {
				768	// Delegate to the generic intrinsic handling code. This mostly provides an
				769	// opportunity for targets to (for example) special case the cost of
				770	// certain intrinsics based on constants used as arguments.
				771	SmallVector<Type *, 8> ParamTys;
				772	ParamTys.reserve(Arguments.size());
				773	for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
				774	ParamTys.push_back(Arguments[Idx]->getType());
				775	return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys);
				776	}
				777
				778	unsigned getUserCost(const User U, ArrayRef<const Value > Operands) {
				779	if (isa<PHINode>(U))
				780	return TTI::TCC_Free; // Model all PHI nodes as free.
				781
				782	// Static alloca doesn't generate target instructions.
				783	if (auto *A = dyn_cast<AllocaInst>(U))
				784	if (A->isStaticAlloca())
				785	return TTI::TCC_Free;
				786
				787	if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
				788	return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
				789	GEP->getPointerOperand(),
				790	Operands.drop_front());
				791	}
				792
				793	if (auto CS = ImmutableCallSite(U)) {
				794	const Function *F = CS.getCalledFunction();
				795	if (!F) {
				796	// Just use the called value type.
				797	Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
				798	return static_cast<T *>(this)
				799	->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
				800	}
				801
				802	SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
				803	return static_cast<T *>(this)->getCallCost(F, Arguments);
				804	}
				805
				806	if (const CastInst *CI = dyn_cast<CastInst>(U)) {
				807	// Result of a cmp instruction is often extended (to be used by other
				808	// cmp instructions, logical or return instructions). These are usually
				809	// nop on most sane targets.
				810	if (isa<CmpInst>(CI->getOperand(0)))
				811	return TTI::TCC_Free;
				812	if (isa<SExtInst>(CI) \|\| isa<ZExtInst>(CI) \|\| isa<FPExtInst>(CI))
				813	return static_cast<T *>(this)->getExtCost(CI, Operands.back());
				814	}
				815
				816	return static_cast<T *>(this)->getOperationCost(
				817	Operator::getOpcode(U), U->getType(),
				818	U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
				819	}
				820
				821	int getInstructionLatency(const Instruction *I) {
				822	SmallVector<const Value *, 4> Operands(I->value_op_begin(),
				823	I->value_op_end());
				824	if (getUserCost(I, Operands) == TTI::TCC_Free)
				825	return 0;
				826
				827	if (isa<LoadInst>(I))
				828	return 4;
				829
				830	Type *DstTy = I->getType();
				831
				832	// Usually an intrinsic is a simple instruction.
				833	// A real function call is much slower.
				834	if (auto *CI = dyn_cast<CallInst>(I)) {
				835	const Function *F = CI->getCalledFunction();
				836	if (!F \|\| static_cast<T *>(this)->isLoweredToCall(F))
				837	return 40;
				838	// Some intrinsics return a value and a flag, we use the value type
				839	// to decide its latency.
				840	if (StructType* StructTy = dyn_cast<StructType>(DstTy))
				841	DstTy = StructTy->getElementType(0);
				842	// Fall through to simple instructions.
				843	}
				844
				845	if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
				846	DstTy = VectorTy->getElementType();
				847	if (DstTy->isFloatingPointTy())
				848	return 3;
				849
				850	return 1;
				851	}
				852	};
				853	}
				854
				855	#endif