Blame - linux-x64/clang/include/llvm/Analysis/TargetTransformInfoImpl.h - hafnium/prebuilts

blob: 4705933750d16281490491eafc90719367e65229 [file] [log] [blame]

Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	1	//===- TargetTransformInfoImpl.h --------------------------------- C++ --===//
				2	//
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame^]	3	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
				4	// See https://llvm.org/LICENSE.txt for license information.
				5	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	6	//
				7	//===----------------------------------------------------------------------===//
				8	/// \file
				9	/// This file provides helpers for the implementation of
				10	/// a TargetTransformInfo-conforming class.
				11	///
				12	//===----------------------------------------------------------------------===//
				13
				14	#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
				15	#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
				16
				17	#include "llvm/Analysis/ScalarEvolutionExpressions.h"
				18	#include "llvm/Analysis/TargetTransformInfo.h"
				19	#include "llvm/Analysis/VectorUtils.h"
				20	#include "llvm/IR/CallSite.h"
				21	#include "llvm/IR/DataLayout.h"
				22	#include "llvm/IR/Function.h"
				23	#include "llvm/IR/GetElementPtrTypeIterator.h"
				24	#include "llvm/IR/Operator.h"
				25	#include "llvm/IR/Type.h"
				26
				27	namespace llvm {
				28
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	29	/// Base class for use as a mix-in that aids implementing
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	30	/// a TargetTransformInfo-compatible class.
				31	class TargetTransformInfoImplBase {
				32	protected:
				33	typedef TargetTransformInfo TTI;
				34
				35	const DataLayout &DL;
				36
				37	explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
				38
				39	public:
				40	// Provide value semantics. MSVC requires that we spell all of these out.
				41	TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
				42	: DL(Arg.DL) {}
				43	TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
				44
				45	const DataLayout &getDataLayout() const { return DL; }
				46
				47	unsigned getOperationCost(unsigned Opcode, Type Ty, Type OpTy) {
				48	switch (Opcode) {
				49	default:
				50	// By default, just classify everything as 'basic'.
				51	return TTI::TCC_Basic;
				52
				53	case Instruction::GetElementPtr:
				54	llvm_unreachable("Use getGEPCost for GEP operations!");
				55
				56	case Instruction::BitCast:
				57	assert(OpTy && "Cast instructions must provide the operand type");
				58	if (Ty == OpTy \|\| (Ty->isPointerTy() && OpTy->isPointerTy()))
				59	// Identity and pointer-to-pointer casts are free.
				60	return TTI::TCC_Free;
				61
				62	// Otherwise, the default basic cost is used.
				63	return TTI::TCC_Basic;
				64
				65	case Instruction::FDiv:
				66	case Instruction::FRem:
				67	case Instruction::SDiv:
				68	case Instruction::SRem:
				69	case Instruction::UDiv:
				70	case Instruction::URem:
				71	return TTI::TCC_Expensive;
				72
				73	case Instruction::IntToPtr: {
				74	// An inttoptr cast is free so long as the input is a legal integer type
				75	// which doesn't contain values outside the range of a pointer.
				76	unsigned OpSize = OpTy->getScalarSizeInBits();
				77	if (DL.isLegalInteger(OpSize) &&
				78	OpSize <= DL.getPointerTypeSizeInBits(Ty))
				79	return TTI::TCC_Free;
				80
				81	// Otherwise it's not a no-op.
				82	return TTI::TCC_Basic;
				83	}
				84	case Instruction::PtrToInt: {
				85	// A ptrtoint cast is free so long as the result is large enough to store
				86	// the pointer, and a legal integer type.
				87	unsigned DestSize = Ty->getScalarSizeInBits();
				88	if (DL.isLegalInteger(DestSize) &&
				89	DestSize >= DL.getPointerTypeSizeInBits(OpTy))
				90	return TTI::TCC_Free;
				91
				92	// Otherwise it's not a no-op.
				93	return TTI::TCC_Basic;
				94	}
				95	case Instruction::Trunc:
				96	// trunc to a native type is free (assuming the target has compare and
				97	// shift-right of the same width).
				98	if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty)))
				99	return TTI::TCC_Free;
				100
				101	return TTI::TCC_Basic;
				102	}
				103	}
				104
				105	int getGEPCost(Type PointeeType, const Value Ptr,
				106	ArrayRef<const Value *> Operands) {
				107	// In the basic model, we just assume that all-constant GEPs will be folded
				108	// into their uses via addressing modes.
				109	for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
				110	if (!isa<Constant>(Operands[Idx]))
				111	return TTI::TCC_Basic;
				112
				113	return TTI::TCC_Free;
				114	}
				115
				116	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
				117	unsigned &JTSize) {
				118	JTSize = 0;
				119	return SI.getNumCases();
				120	}
				121
				122	int getExtCost(const Instruction I, const Value Src) {
				123	return TTI::TCC_Basic;
				124	}
				125
				126	unsigned getCallCost(FunctionType *FTy, int NumArgs) {
				127	assert(FTy && "FunctionType must be provided to this routine.");
				128
				129	// The target-independent implementation just measures the size of the
				130	// function by approximating that each argument will take on average one
				131	// instruction to prepare.
				132
				133	if (NumArgs < 0)
				134	// Set the argument number to the number of explicit arguments in the
				135	// function.
				136	NumArgs = FTy->getNumParams();
				137
				138	return TTI::TCC_Basic * (NumArgs + 1);
				139	}
				140
				141	unsigned getInliningThresholdMultiplier() { return 1; }
				142
				143	unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
				144	ArrayRef<Type *> ParamTys) {
				145	switch (IID) {
				146	default:
				147	// Intrinsics rarely (if ever) have normal argument setup constraints.
				148	// Model them as having a basic instruction cost.
				149	// FIXME: This is wrong for libc intrinsics.
				150	return TTI::TCC_Basic;
				151
				152	case Intrinsic::annotation:
				153	case Intrinsic::assume:
				154	case Intrinsic::sideeffect:
				155	case Intrinsic::dbg_declare:
				156	case Intrinsic::dbg_value:
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	157	case Intrinsic::dbg_label:
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	158	case Intrinsic::invariant_start:
				159	case Intrinsic::invariant_end:
Andrew Scull	0372a57	2018-11-16 15:47:06 +0000	[diff] [blame]	160	case Intrinsic::launder_invariant_group:
				161	case Intrinsic::strip_invariant_group:
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame^]	162	case Intrinsic::is_constant:
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	163	case Intrinsic::lifetime_start:
				164	case Intrinsic::lifetime_end:
				165	case Intrinsic::objectsize:
				166	case Intrinsic::ptr_annotation:
				167	case Intrinsic::var_annotation:
				168	case Intrinsic::experimental_gc_result:
				169	case Intrinsic::experimental_gc_relocate:
				170	case Intrinsic::coro_alloc:
				171	case Intrinsic::coro_begin:
				172	case Intrinsic::coro_free:
				173	case Intrinsic::coro_end:
				174	case Intrinsic::coro_frame:
				175	case Intrinsic::coro_size:
				176	case Intrinsic::coro_suspend:
				177	case Intrinsic::coro_param:
				178	case Intrinsic::coro_subfn_addr:
				179	// These intrinsics don't actually represent code after lowering.
				180	return TTI::TCC_Free;
				181	}
				182	}
				183
				184	bool hasBranchDivergence() { return false; }
				185
				186	bool isSourceOfDivergence(const Value *V) { return false; }
				187
				188	bool isAlwaysUniform(const Value *V) { return false; }
				189
				190	unsigned getFlatAddressSpace () {
				191	return -1;
				192	}
				193
				194	bool isLoweredToCall(const Function *F) {
				195	assert(F && "A concrete function must be provided to this routine.");
				196
				197	// FIXME: These should almost certainly not be handled here, and instead
				198	// handled with the help of TLI or the target itself. This was largely
				199	// ported from existing analysis heuristics here so that such refactorings
				200	// can take place in the future.
				201
				202	if (F->isIntrinsic())
				203	return false;
				204
				205	if (F->hasLocalLinkage() \|\| !F->hasName())
				206	return true;
				207
				208	StringRef Name = F->getName();
				209
				210	// These will all likely lower to a single selection DAG node.
				211	if (Name == "copysign" \|\| Name == "copysignf" \|\| Name == "copysignl" \|\|
				212	Name == "fabs" \|\| Name == "fabsf" \|\| Name == "fabsl" \|\| Name == "sin" \|\|
				213	Name == "fmin" \|\| Name == "fminf" \|\| Name == "fminl" \|\|
				214	Name == "fmax" \|\| Name == "fmaxf" \|\| Name == "fmaxl" \|\|
				215	Name == "sinf" \|\| Name == "sinl" \|\| Name == "cos" \|\| Name == "cosf" \|\|
				216	Name == "cosl" \|\| Name == "sqrt" \|\| Name == "sqrtf" \|\| Name == "sqrtl")
				217	return false;
				218
				219	// These are all likely to be optimized into something smaller.
				220	if (Name == "pow" \|\| Name == "powf" \|\| Name == "powl" \|\| Name == "exp2" \|\|
				221	Name == "exp2l" \|\| Name == "exp2f" \|\| Name == "floor" \|\|
				222	Name == "floorf" \|\| Name == "ceil" \|\| Name == "round" \|\|
				223	Name == "ffs" \|\| Name == "ffsl" \|\| Name == "abs" \|\| Name == "labs" \|\|
				224	Name == "llabs")
				225	return false;
				226
				227	return true;
				228	}
				229
				230	void getUnrollingPreferences(Loop *, ScalarEvolution &,
				231	TTI::UnrollingPreferences &) {}
				232
				233	bool isLegalAddImmediate(int64_t Imm) { return false; }
				234
				235	bool isLegalICmpImmediate(int64_t Imm) { return false; }
				236
				237	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
				238	bool HasBaseReg, int64_t Scale,
				239	unsigned AddrSpace, Instruction *I = nullptr) {
				240	// Guess that only reg and reg+reg addressing is allowed. This heuristic is
				241	// taken from the implementation of LSR.
				242	return !BaseGV && BaseOffset == 0 && (Scale == 0 \|\| Scale == 1);
				243	}
				244
				245	bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
				246	return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
				247	C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
				248	std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
				249	C2.ScaleCost, C2.ImmCost, C2.SetupCost);
				250	}
				251
				252	bool canMacroFuseCmp() { return false; }
				253
				254	bool shouldFavorPostInc() const { return false; }
				255
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame^]	256	bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
				257
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	258	bool isLegalMaskedStore(Type *DataType) { return false; }
				259
				260	bool isLegalMaskedLoad(Type *DataType) { return false; }
				261
				262	bool isLegalMaskedScatter(Type *DataType) { return false; }
				263
				264	bool isLegalMaskedGather(Type *DataType) { return false; }
				265
				266	bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
				267
				268	bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
				269
				270	bool prefersVectorizedAddressing() { return true; }
				271
				272	int getScalingFactorCost(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
				273	bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
				274	// Guess that all legal addressing mode are free.
				275	if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
				276	Scale, AddrSpace))
				277	return 0;
				278	return -1;
				279	}
				280
				281	bool LSRWithInstrQueries() { return false; }
				282
				283	bool isTruncateFree(Type Ty1, Type Ty2) { return false; }
				284
				285	bool isProfitableToHoist(Instruction *I) { return true; }
				286
				287	bool useAA() { return false; }
				288
				289	bool isTypeLegal(Type *Ty) { return false; }
				290
				291	unsigned getJumpBufAlignment() { return 0; }
				292
				293	unsigned getJumpBufSize() { return 0; }
				294
				295	bool shouldBuildLookupTables() { return true; }
				296	bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
				297
				298	bool useColdCCForColdCall(Function &F) { return false; }
				299
				300	unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
				301	return 0;
				302	}
				303
				304	unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
				305	unsigned VF) { return 0; }
				306
				307	bool supportsEfficientVectorElementLoadStore() { return false; }
				308
				309	bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
				310
				311	const TTI::MemCmpExpansionOptions *enableMemCmpExpansion(
				312	bool IsZeroCmp) const {
				313	return nullptr;
				314	}
				315
				316	bool enableInterleavedAccessVectorization() { return false; }
				317
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame^]	318	bool enableMaskedInterleavedAccessVectorization() { return false; }
				319
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	320	bool isFPVectorizationPotentiallyUnsafe() { return false; }
				321
				322	bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
				323	unsigned BitWidth,
				324	unsigned AddressSpace,
				325	unsigned Alignment,
				326	bool *Fast) { return false; }
				327
				328	TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
				329	return TTI::PSK_Software;
				330	}
				331
				332	bool haveFastSqrt(Type *Ty) { return false; }
				333
				334	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	335
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	336	unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
				337
				338	int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
				339	Type *Ty) {
				340	return 0;
				341	}
				342
				343	unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
				344
				345	unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
				346	Type *Ty) {
				347	return TTI::TCC_Free;
				348	}
				349
				350	unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
				351	Type *Ty) {
				352	return TTI::TCC_Free;
				353	}
				354
				355	unsigned getNumberOfRegisters(bool Vector) { return 8; }
				356
				357	unsigned getRegisterBitWidth(bool Vector) const { return 32; }
				358
				359	unsigned getMinVectorRegisterBitWidth() { return 128; }
				360
				361	bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
				362
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	363	unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
				364
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	365	bool
				366	shouldConsiderAddressTypePromotion(const Instruction &I,
				367	bool &AllowPromotionWithoutCommonHeader) {
				368	AllowPromotionWithoutCommonHeader = false;
				369	return false;
				370	}
				371
				372	unsigned getCacheLineSize() { return 0; }
				373
				374	llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) {
				375	switch (Level) {
				376	case TargetTransformInfo::CacheLevel::L1D:
				377	LLVM_FALLTHROUGH;
				378	case TargetTransformInfo::CacheLevel::L2D:
				379	return llvm::Optional<unsigned>();
				380	}
				381
				382	llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
				383	}
				384
				385	llvm::Optional<unsigned> getCacheAssociativity(
				386	TargetTransformInfo::CacheLevel Level) {
				387	switch (Level) {
				388	case TargetTransformInfo::CacheLevel::L1D:
				389	LLVM_FALLTHROUGH;
				390	case TargetTransformInfo::CacheLevel::L2D:
				391	return llvm::Optional<unsigned>();
				392	}
				393
				394	llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
				395	}
				396
				397	unsigned getPrefetchDistance() { return 0; }
				398
				399	unsigned getMinPrefetchStride() { return 1; }
				400
				401	unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
				402
				403	unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
				404
				405	unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
				406	TTI::OperandValueKind Opd1Info,
				407	TTI::OperandValueKind Opd2Info,
				408	TTI::OperandValueProperties Opd1PropInfo,
				409	TTI::OperandValueProperties Opd2PropInfo,
				410	ArrayRef<const Value *> Args) {
				411	return 1;
				412	}
				413
				414	unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index,
				415	Type *SubTp) {
				416	return 1;
				417	}
				418
				419	unsigned getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
				420	const Instruction *I) { return 1; }
				421
				422	unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
				423	VectorType *VecTy, unsigned Index) {
				424	return 1;
				425	}
				426
				427	unsigned getCFInstrCost(unsigned Opcode) { return 1; }
				428
				429	unsigned getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
				430	const Instruction *I) {
				431	return 1;
				432	}
				433
				434	unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
				435	return 1;
				436	}
				437
				438	unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				439	unsigned AddressSpace, const Instruction *I) {
				440	return 1;
				441	}
				442
				443	unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				444	unsigned AddressSpace) {
				445	return 1;
				446	}
				447
				448	unsigned getGatherScatterOpCost(unsigned Opcode, Type DataTy, Value Ptr,
				449	bool VariableMask,
				450	unsigned Alignment) {
				451	return 1;
				452	}
				453
				454	unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
				455	unsigned Factor,
				456	ArrayRef<unsigned> Indices,
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame^]	457	unsigned Alignment, unsigned AddressSpace,
				458	bool UseMaskForCond = false,
				459	bool UseMaskForGaps = false) {
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	460	return 1;
				461	}
				462
				463	unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
				464	ArrayRef<Type *> Tys, FastMathFlags FMF,
				465	unsigned ScalarizationCostPassed) {
				466	return 1;
				467	}
				468	unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
				469	ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
				470	return 1;
				471	}
				472
				473	unsigned getCallInstrCost(Function F, Type RetTy, ArrayRef<Type *> Tys) {
				474	return 1;
				475	}
				476
				477	unsigned getNumberOfParts(Type *Tp) { return 0; }
				478
				479	unsigned getAddressComputationCost(Type Tp, ScalarEvolution ,
				480	const SCEV *) {
				481	return 0;
				482	}
				483
				484	unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; }
				485
				486	unsigned getMinMaxReductionCost(Type , Type , bool, bool) { return 1; }
				487
				488	unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
				489
				490	bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) {
				491	return false;
				492	}
				493
				494	unsigned getAtomicMemIntrinsicMaxElementSize() const {
				495	// Note for overrides: You must ensure for all element unordered-atomic
				496	// memory intrinsics that all power-of-2 element sizes up to, and
				497	// including, the return value of this method have a corresponding
				498	// runtime lib call. These runtime lib call definitions can be found
				499	// in RuntimeLibcalls.h
				500	return 0;
				501	}
				502
				503	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
				504	Type *ExpectedType) {
				505	return nullptr;
				506	}
				507
				508	Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
				509	unsigned SrcAlign, unsigned DestAlign) const {
				510	return Type::getInt8Ty(Context);
				511	}
				512
				513	void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
				514	LLVMContext &Context,
				515	unsigned RemainingBytes,
				516	unsigned SrcAlign,
				517	unsigned DestAlign) const {
				518	for (unsigned i = 0; i != RemainingBytes; ++i)
				519	OpsOut.push_back(Type::getInt8Ty(Context));
				520	}
				521
				522	bool areInlineCompatible(const Function *Caller,
				523	const Function *Callee) const {
				524	return (Caller->getFnAttribute("target-cpu") ==
				525	Callee->getFnAttribute("target-cpu")) &&
				526	(Caller->getFnAttribute("target-features") ==
				527	Callee->getFnAttribute("target-features"));
				528	}
				529
Andrew Walbran	16937d0	2019-10-22 13:54:20 +0100	[diff] [blame^]	530	bool areFunctionArgsABICompatible(const Function Caller, const Function Callee,
				531	SmallPtrSetImpl<Argument *> &Args) const {
				532	return (Caller->getFnAttribute("target-cpu") ==
				533	Callee->getFnAttribute("target-cpu")) &&
				534	(Caller->getFnAttribute("target-features") ==
				535	Callee->getFnAttribute("target-features"));
				536	}
				537
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	538	bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
				539	const DataLayout &DL) const {
				540	return false;
				541	}
				542
				543	bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
				544	const DataLayout &DL) const {
				545	return false;
				546	}
				547
				548	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
				549
				550	bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
				551
				552	bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
				553
				554	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
				555	unsigned Alignment,
				556	unsigned AddrSpace) const {
				557	return true;
				558	}
				559
				560	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
				561	unsigned Alignment,
				562	unsigned AddrSpace) const {
				563	return true;
				564	}
				565
				566	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
				567	unsigned ChainSizeInBytes,
				568	VectorType *VecTy) const {
				569	return VF;
				570	}
				571
				572	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
				573	unsigned ChainSizeInBytes,
				574	VectorType *VecTy) const {
				575	return VF;
				576	}
				577
				578	bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
				579	TTI::ReductionFlags Flags) const {
				580	return false;
				581	}
				582
				583	bool shouldExpandReduction(const IntrinsicInst *II) const {
				584	return true;
				585	}
				586
				587	protected:
				588	// Obtain the minimum required size to hold the value (without the sign)
				589	// In case of a vector it returns the min required size for one element.
				590	unsigned minRequiredElementSize(const Value* Val, bool &isSigned) {
				591	if (isa<ConstantDataVector>(Val) \|\| isa<ConstantVector>(Val)) {
				592	const auto* VectorValue = cast<Constant>(Val);
				593
				594	// In case of a vector need to pick the max between the min
				595	// required size for each element
				596	auto *VT = cast<VectorType>(Val->getType());
				597
				598	// Assume unsigned elements
				599	isSigned = false;
				600
				601	// The max required size is the total vector width divided by num
				602	// of elements in the vector
				603	unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements();
				604
				605	unsigned MinRequiredSize = 0;
				606	for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
				607	if (auto* IntElement =
				608	dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
				609	bool signedElement = IntElement->getValue().isNegative();
				610	// Get the element min required size.
				611	unsigned ElementMinRequiredSize =
				612	IntElement->getValue().getMinSignedBits() - 1;
				613	// In case one element is signed then all the vector is signed.
				614	isSigned \|= signedElement;
				615	// Save the max required bit size between all the elements.
				616	MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
				617	}
				618	else {
				619	// not an int constant element
				620	return MaxRequiredSize;
				621	}
				622	}
				623	return MinRequiredSize;
				624	}
				625
				626	if (const auto* CI = dyn_cast<ConstantInt>(Val)) {
				627	isSigned = CI->getValue().isNegative();
				628	return CI->getValue().getMinSignedBits() - 1;
				629	}
				630
				631	if (const auto* Cast = dyn_cast<SExtInst>(Val)) {
				632	isSigned = true;
				633	return Cast->getSrcTy()->getScalarSizeInBits() - 1;
				634	}
				635
				636	if (const auto* Cast = dyn_cast<ZExtInst>(Val)) {
				637	isSigned = false;
				638	return Cast->getSrcTy()->getScalarSizeInBits();
				639	}
				640
				641	isSigned = false;
				642	return Val->getType()->getScalarSizeInBits();
				643	}
				644
				645	bool isStridedAccess(const SCEV *Ptr) {
				646	return Ptr && isa<SCEVAddRecExpr>(Ptr);
				647	}
				648
				649	const SCEVConstant getConstantStrideStep(ScalarEvolution SE,
				650	const SCEV *Ptr) {
				651	if (!isStridedAccess(Ptr))
				652	return nullptr;
				653	const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
				654	return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
				655	}
				656
				657	bool isConstantStridedAccessLessThan(ScalarEvolution SE, const SCEV Ptr,
				658	int64_t MergeDistance) {
				659	const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
				660	if (!Step)
				661	return false;
				662	APInt StrideVal = Step->getAPInt();
				663	if (StrideVal.getBitWidth() > 64)
				664	return false;
				665	// FIXME: Need to take absolute value for negative stride case.
				666	return StrideVal.getSExtValue() < MergeDistance;
				667	}
				668	};
				669
Andrew Scull	cdfcccc	2018-10-05 20:58:37 +0100	[diff] [blame]	670	/// CRTP base class for use as a mix-in that aids implementing
Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame]	671	/// a TargetTransformInfo-compatible class.
				672	template <typename T>
				673	class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
				674	private:
				675	typedef TargetTransformInfoImplBase BaseT;
				676
				677	protected:
				678	explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
				679
				680	public:
				681	using BaseT::getCallCost;
				682
				683	unsigned getCallCost(const Function *F, int NumArgs) {
				684	assert(F && "A concrete function must be provided to this routine.");
				685
				686	if (NumArgs < 0)
				687	// Set the argument number to the number of explicit arguments in the
				688	// function.
				689	NumArgs = F->arg_size();
				690
				691	if (Intrinsic::ID IID = F->getIntrinsicID()) {
				692	FunctionType *FTy = F->getFunctionType();
				693	SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
				694	return static_cast<T *>(this)
				695	->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys);
				696	}
				697
				698	if (!static_cast<T *>(this)->isLoweredToCall(F))
				699	return TTI::TCC_Basic; // Give a basic cost if it will be lowered
				700	// directly.
				701
				702	return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs);
				703	}
				704
				705	unsigned getCallCost(const Function F, ArrayRef<const Value > Arguments) {
				706	// Simply delegate to generic handling of the call.
				707	// FIXME: We should use instsimplify or something else to catch calls which
				708	// will constant fold with these arguments.
				709	return static_cast<T *>(this)->getCallCost(F, Arguments.size());
				710	}
				711
				712	using BaseT::getGEPCost;
				713
				714	int getGEPCost(Type PointeeType, const Value Ptr,
				715	ArrayRef<const Value *> Operands) {
				716	const GlobalValue *BaseGV = nullptr;
				717	if (Ptr != nullptr) {
				718	// TODO: will remove this when pointers have an opaque type.
				719	assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
				720	PointeeType &&
				721	"explicit pointee type doesn't match operand's pointee type");
				722	BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
				723	}
				724	bool HasBaseReg = (BaseGV == nullptr);
				725
				726	auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
				727	APInt BaseOffset(PtrSizeBits, 0);
				728	int64_t Scale = 0;
				729
				730	auto GTI = gep_type_begin(PointeeType, Operands);
				731	Type *TargetType = nullptr;
				732
				733	// Handle the case where the GEP instruction has a single operand,
				734	// the basis, therefore TargetType is a nullptr.
				735	if (Operands.empty())
				736	return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
				737
				738	for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
				739	TargetType = GTI.getIndexedType();
				740	// We assume that the cost of Scalar GEP with constant index and the
				741	// cost of Vector GEP with splat constant index are the same.
				742	const ConstantInt ConstIdx = dyn_cast<ConstantInt>(I);
				743	if (!ConstIdx)
				744	if (auto Splat = getSplatValue(*I))
				745	ConstIdx = dyn_cast<ConstantInt>(Splat);
				746	if (StructType *STy = GTI.getStructTypeOrNull()) {
				747	// For structures the index is always splat or scalar constant
				748	assert(ConstIdx && "Unexpected GEP index");
				749	uint64_t Field = ConstIdx->getZExtValue();
				750	BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
				751	} else {
				752	int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
				753	if (ConstIdx) {
				754	BaseOffset +=
				755	ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
				756	} else {
				757	// Needs scale register.
				758	if (Scale != 0)
				759	// No addressing mode takes two scale registers.
				760	return TTI::TCC_Basic;
				761	Scale = ElementSize;
				762	}
				763	}
				764	}
				765
				766	// Assumes the address space is 0 when Ptr is nullptr.
				767	unsigned AS =
				768	(Ptr == nullptr ? 0 : Ptr->getType()->getPointerAddressSpace());
				769
				770	if (static_cast<T *>(this)->isLegalAddressingMode(
				771	TargetType, const_cast<GlobalValue *>(BaseGV),
				772	BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale, AS))
				773	return TTI::TCC_Free;
				774	return TTI::TCC_Basic;
				775	}
				776
				777	using BaseT::getIntrinsicCost;
				778
				779	unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
				780	ArrayRef<const Value *> Arguments) {
				781	// Delegate to the generic intrinsic handling code. This mostly provides an
				782	// opportunity for targets to (for example) special case the cost of
				783	// certain intrinsics based on constants used as arguments.
				784	SmallVector<Type *, 8> ParamTys;
				785	ParamTys.reserve(Arguments.size());
				786	for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
				787	ParamTys.push_back(Arguments[Idx]->getType());
				788	return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys);
				789	}
				790
				791	unsigned getUserCost(const User U, ArrayRef<const Value > Operands) {
				792	if (isa<PHINode>(U))
				793	return TTI::TCC_Free; // Model all PHI nodes as free.
				794
				795	// Static alloca doesn't generate target instructions.
				796	if (auto *A = dyn_cast<AllocaInst>(U))
				797	if (A->isStaticAlloca())
				798	return TTI::TCC_Free;
				799
				800	if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
				801	return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
				802	GEP->getPointerOperand(),
				803	Operands.drop_front());
				804	}
				805
				806	if (auto CS = ImmutableCallSite(U)) {
				807	const Function *F = CS.getCalledFunction();
				808	if (!F) {
				809	// Just use the called value type.
				810	Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
				811	return static_cast<T *>(this)
				812	->getCallCost(cast<FunctionType>(FTy), CS.arg_size());
				813	}
				814
				815	SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
				816	return static_cast<T *>(this)->getCallCost(F, Arguments);
				817	}
				818
				819	if (const CastInst *CI = dyn_cast<CastInst>(U)) {
				820	// Result of a cmp instruction is often extended (to be used by other
				821	// cmp instructions, logical or return instructions). These are usually
				822	// nop on most sane targets.
				823	if (isa<CmpInst>(CI->getOperand(0)))
				824	return TTI::TCC_Free;
				825	if (isa<SExtInst>(CI) \|\| isa<ZExtInst>(CI) \|\| isa<FPExtInst>(CI))
				826	return static_cast<T *>(this)->getExtCost(CI, Operands.back());
				827	}
				828
				829	return static_cast<T *>(this)->getOperationCost(
				830	Operator::getOpcode(U), U->getType(),
				831	U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
				832	}
				833
				834	int getInstructionLatency(const Instruction *I) {
				835	SmallVector<const Value *, 4> Operands(I->value_op_begin(),
				836	I->value_op_end());
				837	if (getUserCost(I, Operands) == TTI::TCC_Free)
				838	return 0;
				839
				840	if (isa<LoadInst>(I))
				841	return 4;
				842
				843	Type *DstTy = I->getType();
				844
				845	// Usually an intrinsic is a simple instruction.
				846	// A real function call is much slower.
				847	if (auto *CI = dyn_cast<CallInst>(I)) {
				848	const Function *F = CI->getCalledFunction();
				849	if (!F \|\| static_cast<T *>(this)->isLoweredToCall(F))
				850	return 40;
				851	// Some intrinsics return a value and a flag, we use the value type
				852	// to decide its latency.
				853	if (StructType* StructTy = dyn_cast<StructType>(DstTy))
				854	DstTy = StructTy->getElementType(0);
				855	// Fall through to simple instructions.
				856	}
				857
				858	if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
				859	DstTy = VectorTy->getElementType();
				860	if (DstTy->isFloatingPointTy())
				861	return 3;
				862
				863	return 1;
				864	}
				865	};
				866	}
				867
				868	#endif