Blame - linux-x64/clang/include/llvm/Analysis/TargetTransformInfo.h - hafnium/prebuilts

blob: 9e9c661e14f48463ba5644e82dfbdba76defd896 [file] [log] [blame]

Andrew Scull	5e1ddfa	2018-08-14 10:06:54 +0100	[diff] [blame^]	1	//===- TargetTransformInfo.h ------------------------------------- C++ --===//
				2	//
				3	// The LLVM Compiler Infrastructure
				4	//
				5	// This file is distributed under the University of Illinois Open Source
				6	// License. See LICENSE.TXT for details.
				7	//
				8	//===----------------------------------------------------------------------===//
				9	/// \file
				10	/// This pass exposes codegen information to IR-level passes. Every
				11	/// transformation that uses codegen information is broken into three parts:
				12	/// 1. The IR-level analysis pass.
				13	/// 2. The IR-level transformation interface which provides the needed
				14	/// information.
				15	/// 3. Codegen-level implementation which uses target-specific hooks.
				16	///
				17	/// This file defines #2, which is the interface that IR-level transformations
				18	/// use for querying the codegen.
				19	///
				20	//===----------------------------------------------------------------------===//
				21
				22	#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
				23	#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
				24
				25	#include "llvm/ADT/Optional.h"
				26	#include "llvm/IR/Operator.h"
				27	#include "llvm/IR/PassManager.h"
				28	#include "llvm/Pass.h"
				29	#include "llvm/Support/AtomicOrdering.h"
				30	#include "llvm/Support/DataTypes.h"
				31	#include <functional>
				32
				33	namespace llvm {
				34
				35	namespace Intrinsic {
				36	enum ID : unsigned;
				37	}
				38
				39	class Function;
				40	class GlobalValue;
				41	class IntrinsicInst;
				42	class LoadInst;
				43	class Loop;
				44	class SCEV;
				45	class ScalarEvolution;
				46	class StoreInst;
				47	class SwitchInst;
				48	class Type;
				49	class User;
				50	class Value;
				51
				52	/// \brief Information about a load/store intrinsic defined by the target.
				53	struct MemIntrinsicInfo {
				54	/// This is the pointer that the intrinsic is loading from or storing to.
				55	/// If this is non-null, then analysis/optimization passes can assume that
				56	/// this intrinsic is functionally equivalent to a load/store from this
				57	/// pointer.
				58	Value *PtrVal = nullptr;
				59
				60	// Ordering for atomic operations.
				61	AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
				62
				63	// Same Id is set by the target for corresponding load/store intrinsics.
				64	unsigned short MatchingId = 0;
				65
				66	bool ReadMem = false;
				67	bool WriteMem = false;
				68	bool IsVolatile = false;
				69
				70	bool isUnordered() const {
				71	return (Ordering == AtomicOrdering::NotAtomic \|\|
				72	Ordering == AtomicOrdering::Unordered) && !IsVolatile;
				73	}
				74	};
				75
				76	/// \brief This pass provides access to the codegen interfaces that are needed
				77	/// for IR-level transformations.
				78	class TargetTransformInfo {
				79	public:
				80	/// \brief Construct a TTI object using a type implementing the \c Concept
				81	/// API below.
				82	///
				83	/// This is used by targets to construct a TTI wrapping their target-specific
				84	/// implementaion that encodes appropriate costs for their target.
				85	template <typename T> TargetTransformInfo(T Impl);
				86
				87	/// \brief Construct a baseline TTI object using a minimal implementation of
				88	/// the \c Concept API below.
				89	///
				90	/// The TTI implementation will reflect the information in the DataLayout
				91	/// provided if non-null.
				92	explicit TargetTransformInfo(const DataLayout &DL);
				93
				94	// Provide move semantics.
				95	TargetTransformInfo(TargetTransformInfo &&Arg);
				96	TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
				97
				98	// We need to define the destructor out-of-line to define our sub-classes
				99	// out-of-line.
				100	~TargetTransformInfo();
				101
				102	/// \brief Handle the invalidation of this information.
				103	///
				104	/// When used as a result of \c TargetIRAnalysis this method will be called
				105	/// when the function this was computed for changes. When it returns false,
				106	/// the information is preserved across those changes.
				107	bool invalidate(Function &, const PreservedAnalyses &,
				108	FunctionAnalysisManager::Invalidator &) {
				109	// FIXME: We should probably in some way ensure that the subtarget
				110	// information for a function hasn't changed.
				111	return false;
				112	}
				113
				114	/// \name Generic Target Information
				115	/// @{
				116
				117	/// \brief The kind of cost model.
				118	///
				119	/// There are several different cost models that can be customized by the
				120	/// target. The normalization of each cost model may be target specific.
				121	enum TargetCostKind {
				122	TCK_RecipThroughput, ///< Reciprocal throughput.
				123	TCK_Latency, ///< The latency of instruction.
				124	TCK_CodeSize ///< Instruction code size.
				125	};
				126
				127	/// \brief Query the cost of a specified instruction.
				128	///
				129	/// Clients should use this interface to query the cost of an existing
				130	/// instruction. The instruction must have a valid parent (basic block).
				131	///
				132	/// Note, this method does not cache the cost calculation and it
				133	/// can be expensive in some cases.
				134	int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
				135	switch (kind){
				136	case TCK_RecipThroughput:
				137	return getInstructionThroughput(I);
				138
				139	case TCK_Latency:
				140	return getInstructionLatency(I);
				141
				142	case TCK_CodeSize:
				143	return getUserCost(I);
				144	}
				145	llvm_unreachable("Unknown instruction cost kind");
				146	}
				147
				148	/// \brief Underlying constants for 'cost' values in this interface.
				149	///
				150	/// Many APIs in this interface return a cost. This enum defines the
				151	/// fundamental values that should be used to interpret (and produce) those
				152	/// costs. The costs are returned as an int rather than a member of this
				153	/// enumeration because it is expected that the cost of one IR instruction
				154	/// may have a multiplicative factor to it or otherwise won't fit directly
				155	/// into the enum. Moreover, it is common to sum or average costs which works
				156	/// better as simple integral values. Thus this enum only provides constants.
				157	/// Also note that the returned costs are signed integers to make it natural
				158	/// to add, subtract, and test with zero (a common boundary condition). It is
				159	/// not expected that 2^32 is a realistic cost to be modeling at any point.
				160	///
				161	/// Note that these costs should usually reflect the intersection of code-size
				162	/// cost and execution cost. A free instruction is typically one that folds
				163	/// into another instruction. For example, reg-to-reg moves can often be
				164	/// skipped by renaming the registers in the CPU, but they still are encoded
				165	/// and thus wouldn't be considered 'free' here.
				166	enum TargetCostConstants {
				167	TCC_Free = 0, ///< Expected to fold away in lowering.
				168	TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
				169	TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
				170	};
				171
				172	/// \brief Estimate the cost of a specific operation when lowered.
				173	///
				174	/// Note that this is designed to work on an arbitrary synthetic opcode, and
				175	/// thus work for hypothetical queries before an instruction has even been
				176	/// formed. However, this does not work for GEPs, and must not be called
				177	/// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
				178	/// analyzing a GEP's cost required more information.
				179	///
				180	/// Typically only the result type is required, and the operand type can be
				181	/// omitted. However, if the opcode is one of the cast instructions, the
				182	/// operand type is required.
				183	///
				184	/// The returned cost is defined in terms of \c TargetCostConstants, see its
				185	/// comments for a detailed explanation of the cost values.
				186	int getOperationCost(unsigned Opcode, Type Ty, Type OpTy = nullptr) const;
				187
				188	/// \brief Estimate the cost of a GEP operation when lowered.
				189	///
				190	/// The contract for this function is the same as \c getOperationCost except
				191	/// that it supports an interface that provides extra information specific to
				192	/// the GEP operation.
				193	int getGEPCost(Type PointeeType, const Value Ptr,
				194	ArrayRef<const Value *> Operands) const;
				195
				196	/// \brief Estimate the cost of a EXT operation when lowered.
				197	///
				198	/// The contract for this function is the same as \c getOperationCost except
				199	/// that it supports an interface that provides extra information specific to
				200	/// the EXT operation.
				201	int getExtCost(const Instruction I, const Value Src) const;
				202
				203	/// \brief Estimate the cost of a function call when lowered.
				204	///
				205	/// The contract for this is the same as \c getOperationCost except that it
				206	/// supports an interface that provides extra information specific to call
				207	/// instructions.
				208	///
				209	/// This is the most basic query for estimating call cost: it only knows the
				210	/// function type and (potentially) the number of arguments at the call site.
				211	/// The latter is only interesting for varargs function types.
				212	int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
				213
				214	/// \brief Estimate the cost of calling a specific function when lowered.
				215	///
				216	/// This overload adds the ability to reason about the particular function
				217	/// being called in the event it is a library call with special lowering.
				218	int getCallCost(const Function *F, int NumArgs = -1) const;
				219
				220	/// \brief Estimate the cost of calling a specific function when lowered.
				221	///
				222	/// This overload allows specifying a set of candidate argument values.
				223	int getCallCost(const Function F, ArrayRef<const Value > Arguments) const;
				224
				225	/// \returns A value by which our inlining threshold should be multiplied.
				226	/// This is primarily used to bump up the inlining threshold wholesale on
				227	/// targets where calls are unusually expensive.
				228	///
				229	/// TODO: This is a rather blunt instrument. Perhaps altering the costs of
				230	/// individual classes of instructions would be better.
				231	unsigned getInliningThresholdMultiplier() const;
				232
				233	/// \brief Estimate the cost of an intrinsic when lowered.
				234	///
				235	/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
				236	int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
				237	ArrayRef<Type *> ParamTys) const;
				238
				239	/// \brief Estimate the cost of an intrinsic when lowered.
				240	///
				241	/// Mirrors the \c getCallCost method but uses an intrinsic identifier.
				242	int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
				243	ArrayRef<const Value *> Arguments) const;
				244
				245	/// \return The estimated number of case clusters when lowering \p 'SI'.
				246	/// \p JTSize Set a jump table size only when \p SI is suitable for a jump
				247	/// table.
				248	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
				249	unsigned &JTSize) const;
				250
				251	/// \brief Estimate the cost of a given IR user when lowered.
				252	///
				253	/// This can estimate the cost of either a ConstantExpr or Instruction when
				254	/// lowered. It has two primary advantages over the \c getOperationCost and
				255	/// \c getGEPCost above, and one significant disadvantage: it can only be
				256	/// used when the IR construct has already been formed.
				257	///
				258	/// The advantages are that it can inspect the SSA use graph to reason more
				259	/// accurately about the cost. For example, all-constant-GEPs can often be
				260	/// folded into a load or other instruction, but if they are used in some
				261	/// other context they may not be folded. This routine can distinguish such
				262	/// cases.
				263	///
				264	/// \p Operands is a list of operands which can be a result of transformations
				265	/// of the current operands. The number of the operands on the list must equal
				266	/// to the number of the current operands the IR user has. Their order on the
				267	/// list must be the same as the order of the current operands the IR user
				268	/// has.
				269	///
				270	/// The returned cost is defined in terms of \c TargetCostConstants, see its
				271	/// comments for a detailed explanation of the cost values.
				272	int getUserCost(const User U, ArrayRef<const Value > Operands) const;
				273
				274	/// \brief This is a helper function which calls the two-argument getUserCost
				275	/// with \p Operands which are the current operands U has.
				276	int getUserCost(const User *U) const {
				277	SmallVector<const Value *, 4> Operands(U->value_op_begin(),
				278	U->value_op_end());
				279	return getUserCost(U, Operands);
				280	}
				281
				282	/// \brief Return true if branch divergence exists.
				283	///
				284	/// Branch divergence has a significantly negative impact on GPU performance
				285	/// when threads in the same wavefront take different paths due to conditional
				286	/// branches.
				287	bool hasBranchDivergence() const;
				288
				289	/// \brief Returns whether V is a source of divergence.
				290	///
				291	/// This function provides the target-dependent information for
				292	/// the target-independent DivergenceAnalysis. DivergenceAnalysis first
				293	/// builds the dependency graph, and then runs the reachability algorithm
				294	/// starting with the sources of divergence.
				295	bool isSourceOfDivergence(const Value *V) const;
				296
				297	// \brief Returns true for the target specific
				298	// set of operations which produce uniform result
				299	// even taking non-unform arguments
				300	bool isAlwaysUniform(const Value *V) const;
				301
				302	/// Returns the address space ID for a target's 'flat' address space. Note
				303	/// this is not necessarily the same as addrspace(0), which LLVM sometimes
				304	/// refers to as the generic address space. The flat address space is a
				305	/// generic address space that can be used access multiple segments of memory
				306	/// with different address spaces. Access of a memory location through a
				307	/// pointer with this address space is expected to be legal but slower
				308	/// compared to the same memory location accessed through a pointer with a
				309	/// different address space.
				310	//
				311	/// This is for targets with different pointer representations which can
				312	/// be converted with the addrspacecast instruction. If a pointer is converted
				313	/// to this address space, optimizations should attempt to replace the access
				314	/// with the source address space.
				315	///
				316	/// \returns ~0u if the target does not have such a flat address space to
				317	/// optimize away.
				318	unsigned getFlatAddressSpace() const;
				319
				320	/// \brief Test whether calls to a function lower to actual program function
				321	/// calls.
				322	///
				323	/// The idea is to test whether the program is likely to require a 'call'
				324	/// instruction or equivalent in order to call the given function.
				325	///
				326	/// FIXME: It's not clear that this is a good or useful query API. Client's
				327	/// should probably move to simpler cost metrics using the above.
				328	/// Alternatively, we could split the cost interface into distinct code-size
				329	/// and execution-speed costs. This would allow modelling the core of this
				330	/// query more accurately as a call is a single small instruction, but
				331	/// incurs significant execution cost.
				332	bool isLoweredToCall(const Function *F) const;
				333
				334	struct LSRCost {
				335	/// TODO: Some of these could be merged. Also, a lexical ordering
				336	/// isn't always optimal.
				337	unsigned Insns;
				338	unsigned NumRegs;
				339	unsigned AddRecCost;
				340	unsigned NumIVMuls;
				341	unsigned NumBaseAdds;
				342	unsigned ImmCost;
				343	unsigned SetupCost;
				344	unsigned ScaleCost;
				345	};
				346
				347	/// Parameters that control the generic loop unrolling transformation.
				348	struct UnrollingPreferences {
				349	/// The cost threshold for the unrolled loop. Should be relative to the
				350	/// getUserCost values returned by this API, and the expectation is that
				351	/// the unrolled loop's instructions when run through that interface should
				352	/// not exceed this cost. However, this is only an estimate. Also, specific
				353	/// loops may be unrolled even with a cost above this threshold if deemed
				354	/// profitable. Set this to UINT_MAX to disable the loop body cost
				355	/// restriction.
				356	unsigned Threshold;
				357	/// If complete unrolling will reduce the cost of the loop, we will boost
				358	/// the Threshold by a certain percent to allow more aggressive complete
				359	/// unrolling. This value provides the maximum boost percentage that we
				360	/// can apply to Threshold (The value should be no less than 100).
				361	/// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
				362	/// MaxPercentThresholdBoost / 100)
				363	/// E.g. if complete unrolling reduces the loop execution time by 50%
				364	/// then we boost the threshold by the factor of 2x. If unrolling is not
				365	/// expected to reduce the running time, then we do not increase the
				366	/// threshold.
				367	unsigned MaxPercentThresholdBoost;
				368	/// The cost threshold for the unrolled loop when optimizing for size (set
				369	/// to UINT_MAX to disable).
				370	unsigned OptSizeThreshold;
				371	/// The cost threshold for the unrolled loop, like Threshold, but used
				372	/// for partial/runtime unrolling (set to UINT_MAX to disable).
				373	unsigned PartialThreshold;
				374	/// The cost threshold for the unrolled loop when optimizing for size, like
				375	/// OptSizeThreshold, but used for partial/runtime unrolling (set to
				376	/// UINT_MAX to disable).
				377	unsigned PartialOptSizeThreshold;
				378	/// A forced unrolling factor (the number of concatenated bodies of the
				379	/// original loop in the unrolled loop body). When set to 0, the unrolling
				380	/// transformation will select an unrolling factor based on the current cost
				381	/// threshold and other factors.
				382	unsigned Count;
				383	/// A forced peeling factor (the number of bodied of the original loop
				384	/// that should be peeled off before the loop body). When set to 0, the
				385	/// unrolling transformation will select a peeling factor based on profile
				386	/// information and other factors.
				387	unsigned PeelCount;
				388	/// Default unroll count for loops with run-time trip count.
				389	unsigned DefaultUnrollRuntimeCount;
				390	// Set the maximum unrolling factor. The unrolling factor may be selected
				391	// using the appropriate cost threshold, but may not exceed this number
				392	// (set to UINT_MAX to disable). This does not apply in cases where the
				393	// loop is being fully unrolled.
				394	unsigned MaxCount;
				395	/// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
				396	/// applies even if full unrolling is selected. This allows a target to fall
				397	/// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
				398	unsigned FullUnrollMaxCount;
				399	// Represents number of instructions optimized when "back edge"
				400	// becomes "fall through" in unrolled loop.
				401	// For now we count a conditional branch on a backedge and a comparison
				402	// feeding it.
				403	unsigned BEInsns;
				404	/// Allow partial unrolling (unrolling of loops to expand the size of the
				405	/// loop body, not only to eliminate small constant-trip-count loops).
				406	bool Partial;
				407	/// Allow runtime unrolling (unrolling of loops to expand the size of the
				408	/// loop body even when the number of loop iterations is not known at
				409	/// compile time).
				410	bool Runtime;
				411	/// Allow generation of a loop remainder (extra iterations after unroll).
				412	bool AllowRemainder;
				413	/// Allow emitting expensive instructions (such as divisions) when computing
				414	/// the trip count of a loop for runtime unrolling.
				415	bool AllowExpensiveTripCount;
				416	/// Apply loop unroll on any kind of loop
				417	/// (mainly to loops that fail runtime unrolling).
				418	bool Force;
				419	/// Allow using trip count upper bound to unroll loops.
				420	bool UpperBound;
				421	/// Allow peeling off loop iterations for loops with low dynamic tripcount.
				422	bool AllowPeeling;
				423	/// Allow unrolling of all the iterations of the runtime loop remainder.
				424	bool UnrollRemainder;
				425	};
				426
				427	/// \brief Get target-customized preferences for the generic loop unrolling
				428	/// transformation. The caller will initialize UP with the current
				429	/// target-independent defaults.
				430	void getUnrollingPreferences(Loop *L, ScalarEvolution &,
				431	UnrollingPreferences &UP) const;
				432
				433	/// @}
				434
				435	/// \name Scalar Target Information
				436	/// @{
				437
				438	/// \brief Flags indicating the kind of support for population count.
				439	///
				440	/// Compared to the SW implementation, HW support is supposed to
				441	/// significantly boost the performance when the population is dense, and it
				442	/// may or may not degrade performance if the population is sparse. A HW
				443	/// support is considered as "Fast" if it can outperform, or is on a par
				444	/// with, SW implementation when the population is sparse; otherwise, it is
				445	/// considered as "Slow".
				446	enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
				447
				448	/// \brief Return true if the specified immediate is legal add immediate, that
				449	/// is the target has add instructions which can add a register with the
				450	/// immediate without having to materialize the immediate into a register.
				451	bool isLegalAddImmediate(int64_t Imm) const;
				452
				453	/// \brief Return true if the specified immediate is legal icmp immediate,
				454	/// that is the target has icmp instructions which can compare a register
				455	/// against the immediate without having to materialize the immediate into a
				456	/// register.
				457	bool isLegalICmpImmediate(int64_t Imm) const;
				458
				459	/// \brief Return true if the addressing mode represented by AM is legal for
				460	/// this target, for a load/store of the specified type.
				461	/// The type may be VoidTy, in which case only return true if the addressing
				462	/// mode is legal for a load/store of any legal type.
				463	/// If target returns true in LSRWithInstrQueries(), I may be valid.
				464	/// TODO: Handle pre/postinc as well.
				465	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
				466	bool HasBaseReg, int64_t Scale,
				467	unsigned AddrSpace = 0,
				468	Instruction *I = nullptr) const;
				469
				470	/// \brief Return true if LSR cost of C1 is lower than C1.
				471	bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
				472	TargetTransformInfo::LSRCost &C2) const;
				473
				474	/// Return true if the target can fuse a compare and branch.
				475	/// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
				476	/// calculation for the instructions in a loop.
				477	bool canMacroFuseCmp() const;
				478
				479	/// \return True is LSR should make efforts to create/preserve post-inc
				480	/// addressing mode expressions.
				481	bool shouldFavorPostInc() const;
				482
				483	/// \brief Return true if the target supports masked load/store
				484	/// AVX2 and AVX-512 targets allow masks for consecutive load and store
				485	bool isLegalMaskedStore(Type *DataType) const;
				486	bool isLegalMaskedLoad(Type *DataType) const;
				487
				488	/// \brief Return true if the target supports masked gather/scatter
				489	/// AVX-512 fully supports gather and scatter for vectors with 32 and 64
				490	/// bits scalar type.
				491	bool isLegalMaskedScatter(Type *DataType) const;
				492	bool isLegalMaskedGather(Type *DataType) const;
				493
				494	/// Return true if the target has a unified operation to calculate division
				495	/// and remainder. If so, the additional implicit multiplication and
				496	/// subtraction required to calculate a remainder from division are free. This
				497	/// can enable more aggressive transformations for division and remainder than
				498	/// would typically be allowed using throughput or size cost models.
				499	bool hasDivRemOp(Type *DataType, bool IsSigned) const;
				500
				501	/// Return true if the given instruction (assumed to be a memory access
				502	/// instruction) has a volatile variant. If that's the case then we can avoid
				503	/// addrspacecast to generic AS for volatile loads/stores. Default
				504	/// implementation returns false, which prevents address space inference for
				505	/// volatile loads/stores.
				506	bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
				507
				508	/// Return true if target doesn't mind addresses in vectors.
				509	bool prefersVectorizedAddressing() const;
				510
				511	/// \brief Return the cost of the scaling factor used in the addressing
				512	/// mode represented by AM for this target, for a load/store
				513	/// of the specified type.
				514	/// If the AM is supported, the return value must be >= 0.
				515	/// If the AM is not supported, it returns a negative value.
				516	/// TODO: Handle pre/postinc as well.
				517	int getScalingFactorCost(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
				518	bool HasBaseReg, int64_t Scale,
				519	unsigned AddrSpace = 0) const;
				520
				521	/// \brief Return true if the loop strength reduce pass should make
				522	/// Instruction* based TTI queries to isLegalAddressingMode(). This is
				523	/// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
				524	/// immediate offset and no index register.
				525	bool LSRWithInstrQueries() const;
				526
				527	/// \brief Return true if it's free to truncate a value of type Ty1 to type
				528	/// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
				529	/// by referencing its sub-register AX.
				530	bool isTruncateFree(Type Ty1, Type Ty2) const;
				531
				532	/// \brief Return true if it is profitable to hoist instruction in the
				533	/// then/else to before if.
				534	bool isProfitableToHoist(Instruction *I) const;
				535
				536	bool useAA() const;
				537
				538	/// \brief Return true if this type is legal.
				539	bool isTypeLegal(Type *Ty) const;
				540
				541	/// \brief Returns the target's jmp_buf alignment in bytes.
				542	unsigned getJumpBufAlignment() const;
				543
				544	/// \brief Returns the target's jmp_buf size in bytes.
				545	unsigned getJumpBufSize() const;
				546
				547	/// \brief Return true if switches should be turned into lookup tables for the
				548	/// target.
				549	bool shouldBuildLookupTables() const;
				550
				551	/// \brief Return true if switches should be turned into lookup tables
				552	/// containing this constant value for the target.
				553	bool shouldBuildLookupTablesForConstant(Constant *C) const;
				554
				555	/// \brief Return true if the input function which is cold at all call sites,
				556	/// should use coldcc calling convention.
				557	bool useColdCCForColdCall(Function &F) const;
				558
				559	unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
				560
				561	unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
				562	unsigned VF) const;
				563
				564	/// If target has efficient vector element load/store instructions, it can
				565	/// return true here so that insertion/extraction costs are not added to
				566	/// the scalarization cost of a load/store.
				567	bool supportsEfficientVectorElementLoadStore() const;
				568
				569	/// \brief Don't restrict interleaved unrolling to small loops.
				570	bool enableAggressiveInterleaving(bool LoopHasReductions) const;
				571
				572	/// \brief If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
				573	/// true if this is the expansion of memcmp(p1, p2, s) == 0.
				574	struct MemCmpExpansionOptions {
				575	// The list of available load sizes (in bytes), sorted in decreasing order.
				576	SmallVector<unsigned, 8> LoadSizes;
				577	};
				578	const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
				579
				580	/// \brief Enable matching of interleaved access groups.
				581	bool enableInterleavedAccessVectorization() const;
				582
				583	/// \brief Indicate that it is potentially unsafe to automatically vectorize
				584	/// floating-point operations because the semantics of vector and scalar
				585	/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
				586	/// does not support IEEE-754 denormal numbers, while depending on the
				587	/// platform, scalar floating-point math does.
				588	/// This applies to floating-point math operations and calls, not memory
				589	/// operations, shuffles, or casts.
				590	bool isFPVectorizationPotentiallyUnsafe() const;
				591
				592	/// \brief Determine if the target supports unaligned memory accesses.
				593	bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
				594	unsigned BitWidth, unsigned AddressSpace = 0,
				595	unsigned Alignment = 1,
				596	bool *Fast = nullptr) const;
				597
				598	/// \brief Return hardware support for population count.
				599	PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
				600
				601	/// \brief Return true if the hardware has a fast square-root instruction.
				602	bool haveFastSqrt(Type *Ty) const;
				603
				604	/// Return true if it is faster to check if a floating-point value is NaN
				605	/// (or not-NaN) versus a comparison against a constant FP zero value.
				606	/// Targets should override this if materializing a 0.0 for comparison is
				607	/// generally as cheap as checking for ordered/unordered.
				608	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
				609
				610	/// \brief Return the expected cost of supporting the floating point operation
				611	/// of the specified type.
				612	int getFPOpCost(Type *Ty) const;
				613
				614	/// \brief Return the expected cost of materializing for the given integer
				615	/// immediate of the specified type.
				616	int getIntImmCost(const APInt &Imm, Type *Ty) const;
				617
				618	/// \brief Return the expected cost of materialization for the given integer
				619	/// immediate of the specified type for a given instruction. The cost can be
				620	/// zero if the immediate can be folded into the specified instruction.
				621	int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
				622	Type *Ty) const;
				623	int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
				624	Type *Ty) const;
				625
				626	/// \brief Return the expected cost for the given integer when optimising
				627	/// for size. This is different than the other integer immediate cost
				628	/// functions in that it is subtarget agnostic. This is useful when you e.g.
				629	/// target one ISA such as Aarch32 but smaller encodings could be possible
				630	/// with another such as Thumb. This return value is used as a penalty when
				631	/// the total costs for a constant is calculated (the bigger the cost, the
				632	/// more beneficial constant hoisting is).
				633	int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
				634	Type *Ty) const;
				635	/// @}
				636
				637	/// \name Vector Target Information
				638	/// @{
				639
				640	/// \brief The various kinds of shuffle patterns for vector queries.
				641	enum ShuffleKind {
				642	SK_Broadcast, ///< Broadcast element 0 to all other elements.
				643	SK_Reverse, ///< Reverse the order of the vector.
				644	SK_Alternate, ///< Choose alternate elements from vector.
				645	SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
				646	SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
				647	SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
				648	///< with any shuffle mask.
				649	SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
				650	///< shuffle mask.
				651	};
				652
				653	/// \brief Additional information about an operand's possible values.
				654	enum OperandValueKind {
				655	OK_AnyValue, // Operand can have any value.
				656	OK_UniformValue, // Operand is uniform (splat of a value).
				657	OK_UniformConstantValue, // Operand is uniform constant.
				658	OK_NonUniformConstantValue // Operand is a non uniform constant value.
				659	};
				660
				661	/// \brief Additional properties of an operand's values.
				662	enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
				663
				664	/// \return The number of scalar or vector registers that the target has.
				665	/// If 'Vectors' is true, it returns the number of vector registers. If it is
				666	/// set to false, it returns the number of scalar registers.
				667	unsigned getNumberOfRegisters(bool Vector) const;
				668
				669	/// \return The width of the largest scalar or vector register type.
				670	unsigned getRegisterBitWidth(bool Vector) const;
				671
				672	/// \return The width of the smallest vector register type.
				673	unsigned getMinVectorRegisterBitWidth() const;
				674
				675	/// \return True if the vectorization factor should be chosen to
				676	/// make the vector of the smallest element type match the size of a
				677	/// vector register. For wider element types, this could result in
				678	/// creating vectors that span multiple vector registers.
				679	/// If false, the vectorization factor will be chosen based on the
				680	/// size of the widest element type.
				681	bool shouldMaximizeVectorBandwidth(bool OptSize) const;
				682
				683	/// \return True if it should be considered for address type promotion.
				684	/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
				685	/// profitable without finding other extensions fed by the same input.
				686	bool shouldConsiderAddressTypePromotion(
				687	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
				688
				689	/// \return The size of a cache line in bytes.
				690	unsigned getCacheLineSize() const;
				691
				692	/// The possible cache levels
				693	enum class CacheLevel {
				694	L1D, // The L1 data cache
				695	L2D, // The L2 data cache
				696
				697	// We currently do not model L3 caches, as their sizes differ widely between
				698	// microarchitectures. Also, we currently do not have a use for L3 cache
				699	// size modeling yet.
				700	};
				701
				702	/// \return The size of the cache level in bytes, if available.
				703	llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
				704
				705	/// \return The associativity of the cache level, if available.
				706	llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
				707
				708	/// \return How much before a load we should place the prefetch instruction.
				709	/// This is currently measured in number of instructions.
				710	unsigned getPrefetchDistance() const;
				711
				712	/// \return Some HW prefetchers can handle accesses up to a certain constant
				713	/// stride. This is the minimum stride in bytes where it makes sense to start
				714	/// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
				715	unsigned getMinPrefetchStride() const;
				716
				717	/// \return The maximum number of iterations to prefetch ahead. If the
				718	/// required number of iterations is more than this number, no prefetching is
				719	/// performed.
				720	unsigned getMaxPrefetchIterationsAhead() const;
				721
				722	/// \return The maximum interleave factor that any transform should try to
				723	/// perform for this target. This number depends on the level of parallelism
				724	/// and the number of execution units in the CPU.
				725	unsigned getMaxInterleaveFactor(unsigned VF) const;
				726
				727	/// This is an approximation of reciprocal throughput of a math/logic op.
				728	/// A higher cost indicates less expected throughput.
				729	/// From Agner Fog's guides, reciprocal throughput is "the average number of
				730	/// clock cycles per instruction when the instructions are not part of a
				731	/// limiting dependency chain."
				732	/// Therefore, costs should be scaled to account for multiple execution units
				733	/// on the target that can process this type of instruction. For example, if
				734	/// there are 5 scalar integer units and 2 vector integer units that can
				735	/// calculate an 'add' in a single cycle, this model should indicate that the
				736	/// cost of the vector add instruction is 2.5 times the cost of the scalar
				737	/// add instruction.
				738	/// \p Args is an optional argument which holds the instruction operands
				739	/// values so the TTI can analyze those values searching for special
				740	/// cases or optimizations based on those values.
				741	int getArithmeticInstrCost(
				742	unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
				743	OperandValueKind Opd2Info = OK_AnyValue,
				744	OperandValueProperties Opd1PropInfo = OP_None,
				745	OperandValueProperties Opd2PropInfo = OP_None,
				746	ArrayRef<const Value > Args = ArrayRef<const Value >()) const;
				747
				748	/// \return The cost of a shuffle instruction of kind Kind and of type Tp.
				749	/// The index and subtype parameters are used by the subvector insertion and
				750	/// extraction shuffle kinds.
				751	int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
				752	Type *SubTp = nullptr) const;
				753
				754	/// \return The expected cost of cast instructions, such as bitcast, trunc,
				755	/// zext, etc. If there is an existing instruction that holds Opcode, it
				756	/// may be passed in the 'I' parameter.
				757	int getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
				758	const Instruction *I = nullptr) const;
				759
				760	/// \return The expected cost of a sign- or zero-extended vector extract. Use
				761	/// -1 to indicate that there is no information about the index value.
				762	int getExtractWithExtendCost(unsigned Opcode, Type Dst, VectorType VecTy,
				763	unsigned Index = -1) const;
				764
				765	/// \return The expected cost of control-flow related instructions such as
				766	/// Phi, Ret, Br.
				767	int getCFInstrCost(unsigned Opcode) const;
				768
				769	/// \returns The expected cost of compare and select instructions. If there
				770	/// is an existing instruction that holds Opcode, it may be passed in the
				771	/// 'I' parameter.
				772	int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
				773	Type CondTy = nullptr, const Instruction I = nullptr) const;
				774
				775	/// \return The expected cost of vector Insert and Extract.
				776	/// Use -1 to indicate that there is no information on the index value.
				777	int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
				778
				779	/// \return The cost of Load and Store instructions.
				780	int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				781	unsigned AddressSpace, const Instruction *I = nullptr) const;
				782
				783	/// \return The cost of masked Load and Store instructions.
				784	int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				785	unsigned AddressSpace) const;
				786
				787	/// \return The cost of Gather or Scatter operation
				788	/// \p Opcode - is a type of memory access Load or Store
				789	/// \p DataTy - a vector type of the data to be loaded or stored
				790	/// \p Ptr - pointer [or vector of pointers] - address[es] in memory
				791	/// \p VariableMask - true when the memory access is predicated with a mask
				792	/// that is not a compile-time constant
				793	/// \p Alignment - alignment of single element
				794	int getGatherScatterOpCost(unsigned Opcode, Type DataTy, Value Ptr,
				795	bool VariableMask, unsigned Alignment) const;
				796
				797	/// \return The cost of the interleaved memory operation.
				798	/// \p Opcode is the memory operation code
				799	/// \p VecTy is the vector type of the interleaved access.
				800	/// \p Factor is the interleave factor
				801	/// \p Indices is the indices for interleaved load members (as interleaved
				802	/// load allows gaps)
				803	/// \p Alignment is the alignment of the memory operation
				804	/// \p AddressSpace is address space of the pointer.
				805	int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
				806	ArrayRef<unsigned> Indices, unsigned Alignment,
				807	unsigned AddressSpace) const;
				808
				809	/// \brief Calculate the cost of performing a vector reduction.
				810	///
				811	/// This is the cost of reducing the vector value of type \p Ty to a scalar
				812	/// value using the operation denoted by \p Opcode. The form of the reduction
				813	/// can either be a pairwise reduction or a reduction that splits the vector
				814	/// at every reduction level.
				815	///
				816	/// Pairwise:
				817	/// (v0, v1, v2, v3)
				818	/// ((v0+v1), (v2+v3), undef, undef)
				819	/// Split:
				820	/// (v0, v1, v2, v3)
				821	/// ((v0+v2), (v1+v3), undef, undef)
				822	int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
				823	bool IsPairwiseForm) const;
				824	int getMinMaxReductionCost(Type Ty, Type CondTy, bool IsPairwiseForm,
				825	bool IsUnsigned) const;
				826
				827	/// \returns The cost of Intrinsic instructions. Analyses the real arguments.
				828	/// Three cases are handled: 1. scalar instruction 2. vector instruction
				829	/// 3. scalar instruction which is to be vectorized with VF.
				830	int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
				831	ArrayRef<Value *> Args, FastMathFlags FMF,
				832	unsigned VF = 1) const;
				833
				834	/// \returns The cost of Intrinsic instructions. Types analysis only.
				835	/// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
				836	/// arguments and the return value will be computed based on types.
				837	int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
				838	ArrayRef<Type *> Tys, FastMathFlags FMF,
				839	unsigned ScalarizationCostPassed = UINT_MAX) const;
				840
				841	/// \returns The cost of Call instructions.
				842	int getCallInstrCost(Function F, Type RetTy, ArrayRef<Type *> Tys) const;
				843
				844	/// \returns The number of pieces into which the provided type must be
				845	/// split during legalization. Zero is returned when the answer is unknown.
				846	unsigned getNumberOfParts(Type *Tp) const;
				847
				848	/// \returns The cost of the address computation. For most targets this can be
				849	/// merged into the instruction indexing mode. Some targets might want to
				850	/// distinguish between address computation for memory operations on vector
				851	/// types and scalar types. Such targets should override this function.
				852	/// The 'SE' parameter holds pointer for the scalar evolution object which
				853	/// is used in order to get the Ptr step value in case of constant stride.
				854	/// The 'Ptr' parameter holds SCEV of the access pointer.
				855	int getAddressComputationCost(Type Ty, ScalarEvolution SE = nullptr,
				856	const SCEV *Ptr = nullptr) const;
				857
				858	/// \returns The cost, if any, of keeping values of the given types alive
				859	/// over a callsite.
				860	///
				861	/// Some types may require the use of register classes that do not have
				862	/// any callee-saved registers, so would require a spill and fill.
				863	unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
				864
				865	/// \returns True if the intrinsic is a supported memory intrinsic. Info
				866	/// will contain additional information - whether the intrinsic may write
				867	/// or read to memory, volatility and the pointer. Info is undefined
				868	/// if false is returned.
				869	bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
				870
				871	/// \returns The maximum element size, in bytes, for an element
				872	/// unordered-atomic memory intrinsic.
				873	unsigned getAtomicMemIntrinsicMaxElementSize() const;
				874
				875	/// \returns A value which is the result of the given memory intrinsic. New
				876	/// instructions may be created to extract the result from the given intrinsic
				877	/// memory operation. Returns nullptr if the target cannot create a result
				878	/// from the given intrinsic.
				879	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
				880	Type *ExpectedType) const;
				881
				882	/// \returns The type to use in a loop expansion of a memcpy call.
				883	Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
				884	unsigned SrcAlign, unsigned DestAlign) const;
				885
				886	/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
				887	/// \param RemainingBytes The number of bytes to copy.
				888	///
				889	/// Calculates the operand types to use when copying \p RemainingBytes of
				890	/// memory, where source and destination alignments are \p SrcAlign and
				891	/// \p DestAlign respectively.
				892	void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
				893	LLVMContext &Context,
				894	unsigned RemainingBytes,
				895	unsigned SrcAlign,
				896	unsigned DestAlign) const;
				897
				898	/// \returns True if the two functions have compatible attributes for inlining
				899	/// purposes.
				900	bool areInlineCompatible(const Function *Caller,
				901	const Function *Callee) const;
				902
				903	/// \brief The type of load/store indexing.
				904	enum MemIndexedMode {
				905	MIM_Unindexed, ///< No indexing.
				906	MIM_PreInc, ///< Pre-incrementing.
				907	MIM_PreDec, ///< Pre-decrementing.
				908	MIM_PostInc, ///< Post-incrementing.
				909	MIM_PostDec ///< Post-decrementing.
				910	};
				911
				912	/// \returns True if the specified indexed load for the given type is legal.
				913	bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
				914
				915	/// \returns True if the specified indexed store for the given type is legal.
				916	bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
				917
				918	/// \returns The bitwidth of the largest vector type that should be used to
				919	/// load/store in the given address space.
				920	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
				921
				922	/// \returns True if the load instruction is legal to vectorize.
				923	bool isLegalToVectorizeLoad(LoadInst *LI) const;
				924
				925	/// \returns True if the store instruction is legal to vectorize.
				926	bool isLegalToVectorizeStore(StoreInst *SI) const;
				927
				928	/// \returns True if it is legal to vectorize the given load chain.
				929	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
				930	unsigned Alignment,
				931	unsigned AddrSpace) const;
				932
				933	/// \returns True if it is legal to vectorize the given store chain.
				934	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
				935	unsigned Alignment,
				936	unsigned AddrSpace) const;
				937
				938	/// \returns The new vector factor value if the target doesn't support \p
				939	/// SizeInBytes loads or has a better vector factor.
				940	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
				941	unsigned ChainSizeInBytes,
				942	VectorType *VecTy) const;
				943
				944	/// \returns The new vector factor value if the target doesn't support \p
				945	/// SizeInBytes stores or has a better vector factor.
				946	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
				947	unsigned ChainSizeInBytes,
				948	VectorType *VecTy) const;
				949
				950	/// Flags describing the kind of vector reduction.
				951	struct ReductionFlags {
				952	ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
				953	bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
				954	bool IsSigned; ///< Whether the operation is a signed int reduction.
				955	bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
				956	};
				957
				958	/// \returns True if the target wants to handle the given reduction idiom in
				959	/// the intrinsics form instead of the shuffle form.
				960	bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
				961	ReductionFlags Flags) const;
				962
				963	/// \returns True if the target wants to expand the given reduction intrinsic
				964	/// into a shuffle sequence.
				965	bool shouldExpandReduction(const IntrinsicInst *II) const;
				966	/// @}
				967
				968	private:
				969	/// \brief Estimate the latency of specified instruction.
				970	/// Returns 1 as the default value.
				971	int getInstructionLatency(const Instruction *I) const;
				972
				973	/// \brief Returns the expected throughput cost of the instruction.
				974	/// Returns -1 if the cost is unknown.
				975	int getInstructionThroughput(const Instruction *I) const;
				976
				977	/// \brief The abstract base class used to type erase specific TTI
				978	/// implementations.
				979	class Concept;
				980
				981	/// \brief The template model for the base class which wraps a concrete
				982	/// implementation in a type erased interface.
				983	template <typename T> class Model;
				984
				985	std::unique_ptr<Concept> TTIImpl;
				986	};
				987
				988	class TargetTransformInfo::Concept {
				989	public:
				990	virtual ~Concept() = 0;
				991	virtual const DataLayout &getDataLayout() const = 0;
				992	virtual int getOperationCost(unsigned Opcode, Type Ty, Type OpTy) = 0;
				993	virtual int getGEPCost(Type PointeeType, const Value Ptr,
				994	ArrayRef<const Value *> Operands) = 0;
				995	virtual int getExtCost(const Instruction I, const Value Src) = 0;
				996	virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
				997	virtual int getCallCost(const Function *F, int NumArgs) = 0;
				998	virtual int getCallCost(const Function *F,
				999	ArrayRef<const Value *> Arguments) = 0;
				1000	virtual unsigned getInliningThresholdMultiplier() = 0;
				1001	virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
				1002	ArrayRef<Type *> ParamTys) = 0;
				1003	virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
				1004	ArrayRef<const Value *> Arguments) = 0;
				1005	virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
				1006	unsigned &JTSize) = 0;
				1007	virtual int
				1008	getUserCost(const User U, ArrayRef<const Value > Operands) = 0;
				1009	virtual bool hasBranchDivergence() = 0;
				1010	virtual bool isSourceOfDivergence(const Value *V) = 0;
				1011	virtual bool isAlwaysUniform(const Value *V) = 0;
				1012	virtual unsigned getFlatAddressSpace() = 0;
				1013	virtual bool isLoweredToCall(const Function *F) = 0;
				1014	virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
				1015	UnrollingPreferences &UP) = 0;
				1016	virtual bool isLegalAddImmediate(int64_t Imm) = 0;
				1017	virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
				1018	virtual bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV,
				1019	int64_t BaseOffset, bool HasBaseReg,
				1020	int64_t Scale,
				1021	unsigned AddrSpace,
				1022	Instruction *I) = 0;
				1023	virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
				1024	TargetTransformInfo::LSRCost &C2) = 0;
				1025	virtual bool canMacroFuseCmp() = 0;
				1026	virtual bool shouldFavorPostInc() const = 0;
				1027	virtual bool isLegalMaskedStore(Type *DataType) = 0;
				1028	virtual bool isLegalMaskedLoad(Type *DataType) = 0;
				1029	virtual bool isLegalMaskedScatter(Type *DataType) = 0;
				1030	virtual bool isLegalMaskedGather(Type *DataType) = 0;
				1031	virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
				1032	virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
				1033	virtual bool prefersVectorizedAddressing() = 0;
				1034	virtual int getScalingFactorCost(Type Ty, GlobalValue BaseGV,
				1035	int64_t BaseOffset, bool HasBaseReg,
				1036	int64_t Scale, unsigned AddrSpace) = 0;
				1037	virtual bool LSRWithInstrQueries() = 0;
				1038	virtual bool isTruncateFree(Type Ty1, Type Ty2) = 0;
				1039	virtual bool isProfitableToHoist(Instruction *I) = 0;
				1040	virtual bool useAA() = 0;
				1041	virtual bool isTypeLegal(Type *Ty) = 0;
				1042	virtual unsigned getJumpBufAlignment() = 0;
				1043	virtual unsigned getJumpBufSize() = 0;
				1044	virtual bool shouldBuildLookupTables() = 0;
				1045	virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
				1046	virtual bool useColdCCForColdCall(Function &F) = 0;
				1047	virtual unsigned
				1048	getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
				1049	virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
				1050	unsigned VF) = 0;
				1051	virtual bool supportsEfficientVectorElementLoadStore() = 0;
				1052	virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
				1053	virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
				1054	bool IsZeroCmp) const = 0;
				1055	virtual bool enableInterleavedAccessVectorization() = 0;
				1056	virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
				1057	virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
				1058	unsigned BitWidth,
				1059	unsigned AddressSpace,
				1060	unsigned Alignment,
				1061	bool *Fast) = 0;
				1062	virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
				1063	virtual bool haveFastSqrt(Type *Ty) = 0;
				1064	virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
				1065	virtual int getFPOpCost(Type *Ty) = 0;
				1066	virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
				1067	Type *Ty) = 0;
				1068	virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
				1069	virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
				1070	Type *Ty) = 0;
				1071	virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
				1072	Type *Ty) = 0;
				1073	virtual unsigned getNumberOfRegisters(bool Vector) = 0;
				1074	virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
				1075	virtual unsigned getMinVectorRegisterBitWidth() = 0;
				1076	virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
				1077	virtual bool shouldConsiderAddressTypePromotion(
				1078	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
				1079	virtual unsigned getCacheLineSize() = 0;
				1080	virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
				1081	virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
				1082	virtual unsigned getPrefetchDistance() = 0;
				1083	virtual unsigned getMinPrefetchStride() = 0;
				1084	virtual unsigned getMaxPrefetchIterationsAhead() = 0;
				1085	virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
				1086	virtual unsigned
				1087	getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
				1088	OperandValueKind Opd2Info,
				1089	OperandValueProperties Opd1PropInfo,
				1090	OperandValueProperties Opd2PropInfo,
				1091	ArrayRef<const Value *> Args) = 0;
				1092	virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
				1093	Type *SubTp) = 0;
				1094	virtual int getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
				1095	const Instruction *I) = 0;
				1096	virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
				1097	VectorType *VecTy, unsigned Index) = 0;
				1098	virtual int getCFInstrCost(unsigned Opcode) = 0;
				1099	virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
				1100	Type CondTy, const Instruction I) = 0;
				1101	virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
				1102	unsigned Index) = 0;
				1103	virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				1104	unsigned AddressSpace, const Instruction *I) = 0;
				1105	virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
				1106	unsigned Alignment,
				1107	unsigned AddressSpace) = 0;
				1108	virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
				1109	Value *Ptr, bool VariableMask,
				1110	unsigned Alignment) = 0;
				1111	virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
				1112	unsigned Factor,
				1113	ArrayRef<unsigned> Indices,
				1114	unsigned Alignment,
				1115	unsigned AddressSpace) = 0;
				1116	virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
				1117	bool IsPairwiseForm) = 0;
				1118	virtual int getMinMaxReductionCost(Type Ty, Type CondTy,
				1119	bool IsPairwiseForm, bool IsUnsigned) = 0;
				1120	virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
				1121	ArrayRef<Type *> Tys, FastMathFlags FMF,
				1122	unsigned ScalarizationCostPassed) = 0;
				1123	virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
				1124	ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
				1125	virtual int getCallInstrCost(Function F, Type RetTy,
				1126	ArrayRef<Type *> Tys) = 0;
				1127	virtual unsigned getNumberOfParts(Type *Tp) = 0;
				1128	virtual int getAddressComputationCost(Type Ty, ScalarEvolution SE,
				1129	const SCEV *Ptr) = 0;
				1130	virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
				1131	virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
				1132	MemIntrinsicInfo &Info) = 0;
				1133	virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
				1134	virtual Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
				1135	Type *ExpectedType) = 0;
				1136	virtual Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
				1137	unsigned SrcAlign,
				1138	unsigned DestAlign) const = 0;
				1139	virtual void getMemcpyLoopResidualLoweringType(
				1140	SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
				1141	unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
				1142	virtual bool areInlineCompatible(const Function *Caller,
				1143	const Function *Callee) const = 0;
				1144	virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
				1145	virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
				1146	virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
				1147	virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
				1148	virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
				1149	virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
				1150	unsigned Alignment,
				1151	unsigned AddrSpace) const = 0;
				1152	virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
				1153	unsigned Alignment,
				1154	unsigned AddrSpace) const = 0;
				1155	virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
				1156	unsigned ChainSizeInBytes,
				1157	VectorType *VecTy) const = 0;
				1158	virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
				1159	unsigned ChainSizeInBytes,
				1160	VectorType *VecTy) const = 0;
				1161	virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
				1162	ReductionFlags) const = 0;
				1163	virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
				1164	virtual int getInstructionLatency(const Instruction *I) = 0;
				1165	};
				1166
				1167	template <typename T>
				1168	class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
				1169	T Impl;
				1170
				1171	public:
				1172	Model(T Impl) : Impl(std::move(Impl)) {}
				1173	~Model() override {}
				1174
				1175	const DataLayout &getDataLayout() const override {
				1176	return Impl.getDataLayout();
				1177	}
				1178
				1179	int getOperationCost(unsigned Opcode, Type Ty, Type OpTy) override {
				1180	return Impl.getOperationCost(Opcode, Ty, OpTy);
				1181	}
				1182	int getGEPCost(Type PointeeType, const Value Ptr,
				1183	ArrayRef<const Value *> Operands) override {
				1184	return Impl.getGEPCost(PointeeType, Ptr, Operands);
				1185	}
				1186	int getExtCost(const Instruction I, const Value Src) override {
				1187	return Impl.getExtCost(I, Src);
				1188	}
				1189	int getCallCost(FunctionType *FTy, int NumArgs) override {
				1190	return Impl.getCallCost(FTy, NumArgs);
				1191	}
				1192	int getCallCost(const Function *F, int NumArgs) override {
				1193	return Impl.getCallCost(F, NumArgs);
				1194	}
				1195	int getCallCost(const Function *F,
				1196	ArrayRef<const Value *> Arguments) override {
				1197	return Impl.getCallCost(F, Arguments);
				1198	}
				1199	unsigned getInliningThresholdMultiplier() override {
				1200	return Impl.getInliningThresholdMultiplier();
				1201	}
				1202	int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
				1203	ArrayRef<Type *> ParamTys) override {
				1204	return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
				1205	}
				1206	int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
				1207	ArrayRef<const Value *> Arguments) override {
				1208	return Impl.getIntrinsicCost(IID, RetTy, Arguments);
				1209	}
				1210	int getUserCost(const User U, ArrayRef<const Value > Operands) override {
				1211	return Impl.getUserCost(U, Operands);
				1212	}
				1213	bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
				1214	bool isSourceOfDivergence(const Value *V) override {
				1215	return Impl.isSourceOfDivergence(V);
				1216	}
				1217
				1218	bool isAlwaysUniform(const Value *V) override {
				1219	return Impl.isAlwaysUniform(V);
				1220	}
				1221
				1222	unsigned getFlatAddressSpace() override {
				1223	return Impl.getFlatAddressSpace();
				1224	}
				1225
				1226	bool isLoweredToCall(const Function *F) override {
				1227	return Impl.isLoweredToCall(F);
				1228	}
				1229	void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
				1230	UnrollingPreferences &UP) override {
				1231	return Impl.getUnrollingPreferences(L, SE, UP);
				1232	}
				1233	bool isLegalAddImmediate(int64_t Imm) override {
				1234	return Impl.isLegalAddImmediate(Imm);
				1235	}
				1236	bool isLegalICmpImmediate(int64_t Imm) override {
				1237	return Impl.isLegalICmpImmediate(Imm);
				1238	}
				1239	bool isLegalAddressingMode(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
				1240	bool HasBaseReg, int64_t Scale,
				1241	unsigned AddrSpace,
				1242	Instruction *I) override {
				1243	return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
				1244	Scale, AddrSpace, I);
				1245	}
				1246	bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
				1247	TargetTransformInfo::LSRCost &C2) override {
				1248	return Impl.isLSRCostLess(C1, C2);
				1249	}
				1250	bool canMacroFuseCmp() override {
				1251	return Impl.canMacroFuseCmp();
				1252	}
				1253	bool shouldFavorPostInc() const override {
				1254	return Impl.shouldFavorPostInc();
				1255	}
				1256	bool isLegalMaskedStore(Type *DataType) override {
				1257	return Impl.isLegalMaskedStore(DataType);
				1258	}
				1259	bool isLegalMaskedLoad(Type *DataType) override {
				1260	return Impl.isLegalMaskedLoad(DataType);
				1261	}
				1262	bool isLegalMaskedScatter(Type *DataType) override {
				1263	return Impl.isLegalMaskedScatter(DataType);
				1264	}
				1265	bool isLegalMaskedGather(Type *DataType) override {
				1266	return Impl.isLegalMaskedGather(DataType);
				1267	}
				1268	bool hasDivRemOp(Type *DataType, bool IsSigned) override {
				1269	return Impl.hasDivRemOp(DataType, IsSigned);
				1270	}
				1271	bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
				1272	return Impl.hasVolatileVariant(I, AddrSpace);
				1273	}
				1274	bool prefersVectorizedAddressing() override {
				1275	return Impl.prefersVectorizedAddressing();
				1276	}
				1277	int getScalingFactorCost(Type Ty, GlobalValue BaseGV, int64_t BaseOffset,
				1278	bool HasBaseReg, int64_t Scale,
				1279	unsigned AddrSpace) override {
				1280	return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
				1281	Scale, AddrSpace);
				1282	}
				1283	bool LSRWithInstrQueries() override {
				1284	return Impl.LSRWithInstrQueries();
				1285	}
				1286	bool isTruncateFree(Type Ty1, Type Ty2) override {
				1287	return Impl.isTruncateFree(Ty1, Ty2);
				1288	}
				1289	bool isProfitableToHoist(Instruction *I) override {
				1290	return Impl.isProfitableToHoist(I);
				1291	}
				1292	bool useAA() override { return Impl.useAA(); }
				1293	bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
				1294	unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
				1295	unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
				1296	bool shouldBuildLookupTables() override {
				1297	return Impl.shouldBuildLookupTables();
				1298	}
				1299	bool shouldBuildLookupTablesForConstant(Constant *C) override {
				1300	return Impl.shouldBuildLookupTablesForConstant(C);
				1301	}
				1302	bool useColdCCForColdCall(Function &F) override {
				1303	return Impl.useColdCCForColdCall(F);
				1304	}
				1305
				1306	unsigned getScalarizationOverhead(Type *Ty, bool Insert,
				1307	bool Extract) override {
				1308	return Impl.getScalarizationOverhead(Ty, Insert, Extract);
				1309	}
				1310	unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
				1311	unsigned VF) override {
				1312	return Impl.getOperandsScalarizationOverhead(Args, VF);
				1313	}
				1314
				1315	bool supportsEfficientVectorElementLoadStore() override {
				1316	return Impl.supportsEfficientVectorElementLoadStore();
				1317	}
				1318
				1319	bool enableAggressiveInterleaving(bool LoopHasReductions) override {
				1320	return Impl.enableAggressiveInterleaving(LoopHasReductions);
				1321	}
				1322	const MemCmpExpansionOptions *enableMemCmpExpansion(
				1323	bool IsZeroCmp) const override {
				1324	return Impl.enableMemCmpExpansion(IsZeroCmp);
				1325	}
				1326	bool enableInterleavedAccessVectorization() override {
				1327	return Impl.enableInterleavedAccessVectorization();
				1328	}
				1329	bool isFPVectorizationPotentiallyUnsafe() override {
				1330	return Impl.isFPVectorizationPotentiallyUnsafe();
				1331	}
				1332	bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
				1333	unsigned BitWidth, unsigned AddressSpace,
				1334	unsigned Alignment, bool *Fast) override {
				1335	return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
				1336	Alignment, Fast);
				1337	}
				1338	PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
				1339	return Impl.getPopcntSupport(IntTyWidthInBit);
				1340	}
				1341	bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
				1342
				1343	bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
				1344	return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
				1345	}
				1346
				1347	int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
				1348
				1349	int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
				1350	Type *Ty) override {
				1351	return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
				1352	}
				1353	int getIntImmCost(const APInt &Imm, Type *Ty) override {
				1354	return Impl.getIntImmCost(Imm, Ty);
				1355	}
				1356	int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
				1357	Type *Ty) override {
				1358	return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
				1359	}
				1360	int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
				1361	Type *Ty) override {
				1362	return Impl.getIntImmCost(IID, Idx, Imm, Ty);
				1363	}
				1364	unsigned getNumberOfRegisters(bool Vector) override {
				1365	return Impl.getNumberOfRegisters(Vector);
				1366	}
				1367	unsigned getRegisterBitWidth(bool Vector) const override {
				1368	return Impl.getRegisterBitWidth(Vector);
				1369	}
				1370	unsigned getMinVectorRegisterBitWidth() override {
				1371	return Impl.getMinVectorRegisterBitWidth();
				1372	}
				1373	bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
				1374	return Impl.shouldMaximizeVectorBandwidth(OptSize);
				1375	}
				1376	bool shouldConsiderAddressTypePromotion(
				1377	const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
				1378	return Impl.shouldConsiderAddressTypePromotion(
				1379	I, AllowPromotionWithoutCommonHeader);
				1380	}
				1381	unsigned getCacheLineSize() override {
				1382	return Impl.getCacheLineSize();
				1383	}
				1384	llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
				1385	return Impl.getCacheSize(Level);
				1386	}
				1387	llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
				1388	return Impl.getCacheAssociativity(Level);
				1389	}
				1390	unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
				1391	unsigned getMinPrefetchStride() override {
				1392	return Impl.getMinPrefetchStride();
				1393	}
				1394	unsigned getMaxPrefetchIterationsAhead() override {
				1395	return Impl.getMaxPrefetchIterationsAhead();
				1396	}
				1397	unsigned getMaxInterleaveFactor(unsigned VF) override {
				1398	return Impl.getMaxInterleaveFactor(VF);
				1399	}
				1400	unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
				1401	unsigned &JTSize) override {
				1402	return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
				1403	}
				1404	unsigned
				1405	getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
				1406	OperandValueKind Opd2Info,
				1407	OperandValueProperties Opd1PropInfo,
				1408	OperandValueProperties Opd2PropInfo,
				1409	ArrayRef<const Value *> Args) override {
				1410	return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
				1411	Opd1PropInfo, Opd2PropInfo, Args);
				1412	}
				1413	int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
				1414	Type *SubTp) override {
				1415	return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
				1416	}
				1417	int getCastInstrCost(unsigned Opcode, Type Dst, Type Src,
				1418	const Instruction *I) override {
				1419	return Impl.getCastInstrCost(Opcode, Dst, Src, I);
				1420	}
				1421	int getExtractWithExtendCost(unsigned Opcode, Type Dst, VectorType VecTy,
				1422	unsigned Index) override {
				1423	return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
				1424	}
				1425	int getCFInstrCost(unsigned Opcode) override {
				1426	return Impl.getCFInstrCost(Opcode);
				1427	}
				1428	int getCmpSelInstrCost(unsigned Opcode, Type ValTy, Type CondTy,
				1429	const Instruction *I) override {
				1430	return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
				1431	}
				1432	int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
				1433	return Impl.getVectorInstrCost(Opcode, Val, Index);
				1434	}
				1435	int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				1436	unsigned AddressSpace, const Instruction *I) override {
				1437	return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
				1438	}
				1439	int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
				1440	unsigned AddressSpace) override {
				1441	return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
				1442	}
				1443	int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
				1444	Value *Ptr, bool VariableMask,
				1445	unsigned Alignment) override {
				1446	return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
				1447	Alignment);
				1448	}
				1449	int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
				1450	ArrayRef<unsigned> Indices, unsigned Alignment,
				1451	unsigned AddressSpace) override {
				1452	return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
				1453	Alignment, AddressSpace);
				1454	}
				1455	int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
				1456	bool IsPairwiseForm) override {
				1457	return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
				1458	}
				1459	int getMinMaxReductionCost(Type Ty, Type CondTy,
				1460	bool IsPairwiseForm, bool IsUnsigned) override {
				1461	return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
				1462	}
				1463	int getIntrinsicInstrCost(Intrinsic::ID ID, Type RetTy, ArrayRef<Type > Tys,
				1464	FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
				1465	return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
				1466	ScalarizationCostPassed);
				1467	}
				1468	int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
				1469	ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
				1470	return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
				1471	}
				1472	int getCallInstrCost(Function F, Type RetTy,
				1473	ArrayRef<Type *> Tys) override {
				1474	return Impl.getCallInstrCost(F, RetTy, Tys);
				1475	}
				1476	unsigned getNumberOfParts(Type *Tp) override {
				1477	return Impl.getNumberOfParts(Tp);
				1478	}
				1479	int getAddressComputationCost(Type Ty, ScalarEvolution SE,
				1480	const SCEV *Ptr) override {
				1481	return Impl.getAddressComputationCost(Ty, SE, Ptr);
				1482	}
				1483	unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
				1484	return Impl.getCostOfKeepingLiveOverCall(Tys);
				1485	}
				1486	bool getTgtMemIntrinsic(IntrinsicInst *Inst,
				1487	MemIntrinsicInfo &Info) override {
				1488	return Impl.getTgtMemIntrinsic(Inst, Info);
				1489	}
				1490	unsigned getAtomicMemIntrinsicMaxElementSize() const override {
				1491	return Impl.getAtomicMemIntrinsicMaxElementSize();
				1492	}
				1493	Value getOrCreateResultFromMemIntrinsic(IntrinsicInst Inst,
				1494	Type *ExpectedType) override {
				1495	return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
				1496	}
				1497	Type getMemcpyLoopLoweringType(LLVMContext &Context, Value Length,
				1498	unsigned SrcAlign,
				1499	unsigned DestAlign) const override {
				1500	return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
				1501	}
				1502	void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
				1503	LLVMContext &Context,
				1504	unsigned RemainingBytes,
				1505	unsigned SrcAlign,
				1506	unsigned DestAlign) const override {
				1507	Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
				1508	SrcAlign, DestAlign);
				1509	}
				1510	bool areInlineCompatible(const Function *Caller,
				1511	const Function *Callee) const override {
				1512	return Impl.areInlineCompatible(Caller, Callee);
				1513	}
				1514	bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
				1515	return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
				1516	}
				1517	bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
				1518	return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
				1519	}
				1520	unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
				1521	return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
				1522	}
				1523	bool isLegalToVectorizeLoad(LoadInst *LI) const override {
				1524	return Impl.isLegalToVectorizeLoad(LI);
				1525	}
				1526	bool isLegalToVectorizeStore(StoreInst *SI) const override {
				1527	return Impl.isLegalToVectorizeStore(SI);
				1528	}
				1529	bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
				1530	unsigned Alignment,
				1531	unsigned AddrSpace) const override {
				1532	return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
				1533	AddrSpace);
				1534	}
				1535	bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
				1536	unsigned Alignment,
				1537	unsigned AddrSpace) const override {
				1538	return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
				1539	AddrSpace);
				1540	}
				1541	unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
				1542	unsigned ChainSizeInBytes,
				1543	VectorType *VecTy) const override {
				1544	return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
				1545	}
				1546	unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
				1547	unsigned ChainSizeInBytes,
				1548	VectorType *VecTy) const override {
				1549	return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
				1550	}
				1551	bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
				1552	ReductionFlags Flags) const override {
				1553	return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
				1554	}
				1555	bool shouldExpandReduction(const IntrinsicInst *II) const override {
				1556	return Impl.shouldExpandReduction(II);
				1557	}
				1558	int getInstructionLatency(const Instruction *I) override {
				1559	return Impl.getInstructionLatency(I);
				1560	}
				1561	};
				1562
				1563	template <typename T>
				1564	TargetTransformInfo::TargetTransformInfo(T Impl)
				1565	: TTIImpl(new Model<T>(Impl)) {}
				1566
				1567	/// \brief Analysis pass providing the \c TargetTransformInfo.
				1568	///
				1569	/// The core idea of the TargetIRAnalysis is to expose an interface through
				1570	/// which LLVM targets can analyze and provide information about the middle
				1571	/// end's target-independent IR. This supports use cases such as target-aware
				1572	/// cost modeling of IR constructs.
				1573	///
				1574	/// This is a function analysis because much of the cost modeling for targets
				1575	/// is done in a subtarget specific way and LLVM supports compiling different
				1576	/// functions targeting different subtargets in order to support runtime
				1577	/// dispatch according to the observed subtarget.
				1578	class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
				1579	public:
				1580	typedef TargetTransformInfo Result;
				1581
				1582	/// \brief Default construct a target IR analysis.
				1583	///
				1584	/// This will use the module's datalayout to construct a baseline
				1585	/// conservative TTI result.
				1586	TargetIRAnalysis();
				1587
				1588	/// \brief Construct an IR analysis pass around a target-provide callback.
				1589	///
				1590	/// The callback will be called with a particular function for which the TTI
				1591	/// is needed and must return a TTI object for that function.
				1592	TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
				1593
				1594	// Value semantics. We spell out the constructors for MSVC.
				1595	TargetIRAnalysis(const TargetIRAnalysis &Arg)
				1596	: TTICallback(Arg.TTICallback) {}
				1597	TargetIRAnalysis(TargetIRAnalysis &&Arg)
				1598	: TTICallback(std::move(Arg.TTICallback)) {}
				1599	TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
				1600	TTICallback = RHS.TTICallback;
				1601	return *this;
				1602	}
				1603	TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
				1604	TTICallback = std::move(RHS.TTICallback);
				1605	return *this;
				1606	}
				1607
				1608	Result run(const Function &F, FunctionAnalysisManager &);
				1609
				1610	private:
				1611	friend AnalysisInfoMixin<TargetIRAnalysis>;
				1612	static AnalysisKey Key;
				1613
				1614	/// \brief The callback used to produce a result.
				1615	///
				1616	/// We use a completely opaque callback so that targets can provide whatever
				1617	/// mechanism they desire for constructing the TTI for a given function.
				1618	///
				1619	/// FIXME: Should we really use std::function? It's relatively inefficient.
				1620	/// It might be possible to arrange for even stateful callbacks to outlive
				1621	/// the analysis and thus use a function_ref which would be lighter weight.
				1622	/// This may also be less error prone as the callback is likely to reference
				1623	/// the external TargetMachine, and that reference needs to never dangle.
				1624	std::function<Result(const Function &)> TTICallback;
				1625
				1626	/// \brief Helper function used as the callback in the default constructor.
				1627	static Result getDefaultTTI(const Function &F);
				1628	};
				1629
				1630	/// \brief Wrapper pass for TargetTransformInfo.
				1631	///
				1632	/// This pass can be constructed from a TTI object which it stores internally
				1633	/// and is queried by passes.
				1634	class TargetTransformInfoWrapperPass : public ImmutablePass {
				1635	TargetIRAnalysis TIRA;
				1636	Optional<TargetTransformInfo> TTI;
				1637
				1638	virtual void anchor();
				1639
				1640	public:
				1641	static char ID;
				1642
				1643	/// \brief We must provide a default constructor for the pass but it should
				1644	/// never be used.
				1645	///
				1646	/// Use the constructor below or call one of the creation routines.
				1647	TargetTransformInfoWrapperPass();
				1648
				1649	explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
				1650
				1651	TargetTransformInfo &getTTI(const Function &F);
				1652	};
				1653
				1654	/// \brief Create an analysis pass wrapper around a TTI object.
				1655	///
				1656	/// This analysis pass just holds the TTI instance and makes it available to
				1657	/// clients.
				1658	ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
				1659
				1660	} // End llvm namespace
				1661
				1662	#endif