blob: 9e9c661e14f48463ba5644e82dfbdba76defd896 [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//
2//
3// The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9/// \file
10/// This pass exposes codegen information to IR-level passes. Every
11/// transformation that uses codegen information is broken into three parts:
12/// 1. The IR-level analysis pass.
13/// 2. The IR-level transformation interface which provides the needed
14/// information.
15/// 3. Codegen-level implementation which uses target-specific hooks.
16///
17/// This file defines #2, which is the interface that IR-level transformations
18/// use for querying the codegen.
19///
20//===----------------------------------------------------------------------===//
21
22#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
23#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H
24
25#include "llvm/ADT/Optional.h"
26#include "llvm/IR/Operator.h"
27#include "llvm/IR/PassManager.h"
28#include "llvm/Pass.h"
29#include "llvm/Support/AtomicOrdering.h"
30#include "llvm/Support/DataTypes.h"
31#include <functional>
32
33namespace llvm {
34
35namespace Intrinsic {
36enum ID : unsigned;
37}
38
39class Function;
40class GlobalValue;
41class IntrinsicInst;
42class LoadInst;
43class Loop;
44class SCEV;
45class ScalarEvolution;
46class StoreInst;
47class SwitchInst;
48class Type;
49class User;
50class Value;
51
52/// \brief Information about a load/store intrinsic defined by the target.
53struct MemIntrinsicInfo {
54 /// This is the pointer that the intrinsic is loading from or storing to.
55 /// If this is non-null, then analysis/optimization passes can assume that
56 /// this intrinsic is functionally equivalent to a load/store from this
57 /// pointer.
58 Value *PtrVal = nullptr;
59
60 // Ordering for atomic operations.
61 AtomicOrdering Ordering = AtomicOrdering::NotAtomic;
62
63 // Same Id is set by the target for corresponding load/store intrinsics.
64 unsigned short MatchingId = 0;
65
66 bool ReadMem = false;
67 bool WriteMem = false;
68 bool IsVolatile = false;
69
70 bool isUnordered() const {
71 return (Ordering == AtomicOrdering::NotAtomic ||
72 Ordering == AtomicOrdering::Unordered) && !IsVolatile;
73 }
74};
75
76/// \brief This pass provides access to the codegen interfaces that are needed
77/// for IR-level transformations.
78class TargetTransformInfo {
79public:
80 /// \brief Construct a TTI object using a type implementing the \c Concept
81 /// API below.
82 ///
83 /// This is used by targets to construct a TTI wrapping their target-specific
84 /// implementaion that encodes appropriate costs for their target.
85 template <typename T> TargetTransformInfo(T Impl);
86
87 /// \brief Construct a baseline TTI object using a minimal implementation of
88 /// the \c Concept API below.
89 ///
90 /// The TTI implementation will reflect the information in the DataLayout
91 /// provided if non-null.
92 explicit TargetTransformInfo(const DataLayout &DL);
93
94 // Provide move semantics.
95 TargetTransformInfo(TargetTransformInfo &&Arg);
96 TargetTransformInfo &operator=(TargetTransformInfo &&RHS);
97
98 // We need to define the destructor out-of-line to define our sub-classes
99 // out-of-line.
100 ~TargetTransformInfo();
101
102 /// \brief Handle the invalidation of this information.
103 ///
104 /// When used as a result of \c TargetIRAnalysis this method will be called
105 /// when the function this was computed for changes. When it returns false,
106 /// the information is preserved across those changes.
107 bool invalidate(Function &, const PreservedAnalyses &,
108 FunctionAnalysisManager::Invalidator &) {
109 // FIXME: We should probably in some way ensure that the subtarget
110 // information for a function hasn't changed.
111 return false;
112 }
113
114 /// \name Generic Target Information
115 /// @{
116
117 /// \brief The kind of cost model.
118 ///
119 /// There are several different cost models that can be customized by the
120 /// target. The normalization of each cost model may be target specific.
121 enum TargetCostKind {
122 TCK_RecipThroughput, ///< Reciprocal throughput.
123 TCK_Latency, ///< The latency of instruction.
124 TCK_CodeSize ///< Instruction code size.
125 };
126
127 /// \brief Query the cost of a specified instruction.
128 ///
129 /// Clients should use this interface to query the cost of an existing
130 /// instruction. The instruction must have a valid parent (basic block).
131 ///
132 /// Note, this method does not cache the cost calculation and it
133 /// can be expensive in some cases.
134 int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {
135 switch (kind){
136 case TCK_RecipThroughput:
137 return getInstructionThroughput(I);
138
139 case TCK_Latency:
140 return getInstructionLatency(I);
141
142 case TCK_CodeSize:
143 return getUserCost(I);
144 }
145 llvm_unreachable("Unknown instruction cost kind");
146 }
147
148 /// \brief Underlying constants for 'cost' values in this interface.
149 ///
150 /// Many APIs in this interface return a cost. This enum defines the
151 /// fundamental values that should be used to interpret (and produce) those
152 /// costs. The costs are returned as an int rather than a member of this
153 /// enumeration because it is expected that the cost of one IR instruction
154 /// may have a multiplicative factor to it or otherwise won't fit directly
155 /// into the enum. Moreover, it is common to sum or average costs which works
156 /// better as simple integral values. Thus this enum only provides constants.
157 /// Also note that the returned costs are signed integers to make it natural
158 /// to add, subtract, and test with zero (a common boundary condition). It is
159 /// not expected that 2^32 is a realistic cost to be modeling at any point.
160 ///
161 /// Note that these costs should usually reflect the intersection of code-size
162 /// cost and execution cost. A free instruction is typically one that folds
163 /// into another instruction. For example, reg-to-reg moves can often be
164 /// skipped by renaming the registers in the CPU, but they still are encoded
165 /// and thus wouldn't be considered 'free' here.
166 enum TargetCostConstants {
167 TCC_Free = 0, ///< Expected to fold away in lowering.
168 TCC_Basic = 1, ///< The cost of a typical 'add' instruction.
169 TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.
170 };
171
172 /// \brief Estimate the cost of a specific operation when lowered.
173 ///
174 /// Note that this is designed to work on an arbitrary synthetic opcode, and
175 /// thus work for hypothetical queries before an instruction has even been
176 /// formed. However, this does *not* work for GEPs, and must not be called
177 /// for a GEP instruction. Instead, use the dedicated getGEPCost interface as
178 /// analyzing a GEP's cost required more information.
179 ///
180 /// Typically only the result type is required, and the operand type can be
181 /// omitted. However, if the opcode is one of the cast instructions, the
182 /// operand type is required.
183 ///
184 /// The returned cost is defined in terms of \c TargetCostConstants, see its
185 /// comments for a detailed explanation of the cost values.
186 int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;
187
188 /// \brief Estimate the cost of a GEP operation when lowered.
189 ///
190 /// The contract for this function is the same as \c getOperationCost except
191 /// that it supports an interface that provides extra information specific to
192 /// the GEP operation.
193 int getGEPCost(Type *PointeeType, const Value *Ptr,
194 ArrayRef<const Value *> Operands) const;
195
196 /// \brief Estimate the cost of a EXT operation when lowered.
197 ///
198 /// The contract for this function is the same as \c getOperationCost except
199 /// that it supports an interface that provides extra information specific to
200 /// the EXT operation.
201 int getExtCost(const Instruction *I, const Value *Src) const;
202
203 /// \brief Estimate the cost of a function call when lowered.
204 ///
205 /// The contract for this is the same as \c getOperationCost except that it
206 /// supports an interface that provides extra information specific to call
207 /// instructions.
208 ///
209 /// This is the most basic query for estimating call cost: it only knows the
210 /// function type and (potentially) the number of arguments at the call site.
211 /// The latter is only interesting for varargs function types.
212 int getCallCost(FunctionType *FTy, int NumArgs = -1) const;
213
214 /// \brief Estimate the cost of calling a specific function when lowered.
215 ///
216 /// This overload adds the ability to reason about the particular function
217 /// being called in the event it is a library call with special lowering.
218 int getCallCost(const Function *F, int NumArgs = -1) const;
219
220 /// \brief Estimate the cost of calling a specific function when lowered.
221 ///
222 /// This overload allows specifying a set of candidate argument values.
223 int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;
224
225 /// \returns A value by which our inlining threshold should be multiplied.
226 /// This is primarily used to bump up the inlining threshold wholesale on
227 /// targets where calls are unusually expensive.
228 ///
229 /// TODO: This is a rather blunt instrument. Perhaps altering the costs of
230 /// individual classes of instructions would be better.
231 unsigned getInliningThresholdMultiplier() const;
232
233 /// \brief Estimate the cost of an intrinsic when lowered.
234 ///
235 /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
236 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
237 ArrayRef<Type *> ParamTys) const;
238
239 /// \brief Estimate the cost of an intrinsic when lowered.
240 ///
241 /// Mirrors the \c getCallCost method but uses an intrinsic identifier.
242 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
243 ArrayRef<const Value *> Arguments) const;
244
245 /// \return The estimated number of case clusters when lowering \p 'SI'.
246 /// \p JTSize Set a jump table size only when \p SI is suitable for a jump
247 /// table.
248 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
249 unsigned &JTSize) const;
250
251 /// \brief Estimate the cost of a given IR user when lowered.
252 ///
253 /// This can estimate the cost of either a ConstantExpr or Instruction when
254 /// lowered. It has two primary advantages over the \c getOperationCost and
255 /// \c getGEPCost above, and one significant disadvantage: it can only be
256 /// used when the IR construct has already been formed.
257 ///
258 /// The advantages are that it can inspect the SSA use graph to reason more
259 /// accurately about the cost. For example, all-constant-GEPs can often be
260 /// folded into a load or other instruction, but if they are used in some
261 /// other context they may not be folded. This routine can distinguish such
262 /// cases.
263 ///
264 /// \p Operands is a list of operands which can be a result of transformations
265 /// of the current operands. The number of the operands on the list must equal
266 /// to the number of the current operands the IR user has. Their order on the
267 /// list must be the same as the order of the current operands the IR user
268 /// has.
269 ///
270 /// The returned cost is defined in terms of \c TargetCostConstants, see its
271 /// comments for a detailed explanation of the cost values.
272 int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;
273
274 /// \brief This is a helper function which calls the two-argument getUserCost
275 /// with \p Operands which are the current operands U has.
276 int getUserCost(const User *U) const {
277 SmallVector<const Value *, 4> Operands(U->value_op_begin(),
278 U->value_op_end());
279 return getUserCost(U, Operands);
280 }
281
282 /// \brief Return true if branch divergence exists.
283 ///
284 /// Branch divergence has a significantly negative impact on GPU performance
285 /// when threads in the same wavefront take different paths due to conditional
286 /// branches.
287 bool hasBranchDivergence() const;
288
289 /// \brief Returns whether V is a source of divergence.
290 ///
291 /// This function provides the target-dependent information for
292 /// the target-independent DivergenceAnalysis. DivergenceAnalysis first
293 /// builds the dependency graph, and then runs the reachability algorithm
294 /// starting with the sources of divergence.
295 bool isSourceOfDivergence(const Value *V) const;
296
297 // \brief Returns true for the target specific
298 // set of operations which produce uniform result
299 // even taking non-unform arguments
300 bool isAlwaysUniform(const Value *V) const;
301
302 /// Returns the address space ID for a target's 'flat' address space. Note
303 /// this is not necessarily the same as addrspace(0), which LLVM sometimes
304 /// refers to as the generic address space. The flat address space is a
305 /// generic address space that can be used access multiple segments of memory
306 /// with different address spaces. Access of a memory location through a
307 /// pointer with this address space is expected to be legal but slower
308 /// compared to the same memory location accessed through a pointer with a
309 /// different address space.
310 //
311 /// This is for targets with different pointer representations which can
312 /// be converted with the addrspacecast instruction. If a pointer is converted
313 /// to this address space, optimizations should attempt to replace the access
314 /// with the source address space.
315 ///
316 /// \returns ~0u if the target does not have such a flat address space to
317 /// optimize away.
318 unsigned getFlatAddressSpace() const;
319
320 /// \brief Test whether calls to a function lower to actual program function
321 /// calls.
322 ///
323 /// The idea is to test whether the program is likely to require a 'call'
324 /// instruction or equivalent in order to call the given function.
325 ///
326 /// FIXME: It's not clear that this is a good or useful query API. Client's
327 /// should probably move to simpler cost metrics using the above.
328 /// Alternatively, we could split the cost interface into distinct code-size
329 /// and execution-speed costs. This would allow modelling the core of this
330 /// query more accurately as a call is a single small instruction, but
331 /// incurs significant execution cost.
332 bool isLoweredToCall(const Function *F) const;
333
334 struct LSRCost {
335 /// TODO: Some of these could be merged. Also, a lexical ordering
336 /// isn't always optimal.
337 unsigned Insns;
338 unsigned NumRegs;
339 unsigned AddRecCost;
340 unsigned NumIVMuls;
341 unsigned NumBaseAdds;
342 unsigned ImmCost;
343 unsigned SetupCost;
344 unsigned ScaleCost;
345 };
346
347 /// Parameters that control the generic loop unrolling transformation.
348 struct UnrollingPreferences {
349 /// The cost threshold for the unrolled loop. Should be relative to the
350 /// getUserCost values returned by this API, and the expectation is that
351 /// the unrolled loop's instructions when run through that interface should
352 /// not exceed this cost. However, this is only an estimate. Also, specific
353 /// loops may be unrolled even with a cost above this threshold if deemed
354 /// profitable. Set this to UINT_MAX to disable the loop body cost
355 /// restriction.
356 unsigned Threshold;
357 /// If complete unrolling will reduce the cost of the loop, we will boost
358 /// the Threshold by a certain percent to allow more aggressive complete
359 /// unrolling. This value provides the maximum boost percentage that we
360 /// can apply to Threshold (The value should be no less than 100).
361 /// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,
362 /// MaxPercentThresholdBoost / 100)
363 /// E.g. if complete unrolling reduces the loop execution time by 50%
364 /// then we boost the threshold by the factor of 2x. If unrolling is not
365 /// expected to reduce the running time, then we do not increase the
366 /// threshold.
367 unsigned MaxPercentThresholdBoost;
368 /// The cost threshold for the unrolled loop when optimizing for size (set
369 /// to UINT_MAX to disable).
370 unsigned OptSizeThreshold;
371 /// The cost threshold for the unrolled loop, like Threshold, but used
372 /// for partial/runtime unrolling (set to UINT_MAX to disable).
373 unsigned PartialThreshold;
374 /// The cost threshold for the unrolled loop when optimizing for size, like
375 /// OptSizeThreshold, but used for partial/runtime unrolling (set to
376 /// UINT_MAX to disable).
377 unsigned PartialOptSizeThreshold;
378 /// A forced unrolling factor (the number of concatenated bodies of the
379 /// original loop in the unrolled loop body). When set to 0, the unrolling
380 /// transformation will select an unrolling factor based on the current cost
381 /// threshold and other factors.
382 unsigned Count;
383 /// A forced peeling factor (the number of bodied of the original loop
384 /// that should be peeled off before the loop body). When set to 0, the
385 /// unrolling transformation will select a peeling factor based on profile
386 /// information and other factors.
387 unsigned PeelCount;
388 /// Default unroll count for loops with run-time trip count.
389 unsigned DefaultUnrollRuntimeCount;
390 // Set the maximum unrolling factor. The unrolling factor may be selected
391 // using the appropriate cost threshold, but may not exceed this number
392 // (set to UINT_MAX to disable). This does not apply in cases where the
393 // loop is being fully unrolled.
394 unsigned MaxCount;
395 /// Set the maximum unrolling factor for full unrolling. Like MaxCount, but
396 /// applies even if full unrolling is selected. This allows a target to fall
397 /// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.
398 unsigned FullUnrollMaxCount;
399 // Represents number of instructions optimized when "back edge"
400 // becomes "fall through" in unrolled loop.
401 // For now we count a conditional branch on a backedge and a comparison
402 // feeding it.
403 unsigned BEInsns;
404 /// Allow partial unrolling (unrolling of loops to expand the size of the
405 /// loop body, not only to eliminate small constant-trip-count loops).
406 bool Partial;
407 /// Allow runtime unrolling (unrolling of loops to expand the size of the
408 /// loop body even when the number of loop iterations is not known at
409 /// compile time).
410 bool Runtime;
411 /// Allow generation of a loop remainder (extra iterations after unroll).
412 bool AllowRemainder;
413 /// Allow emitting expensive instructions (such as divisions) when computing
414 /// the trip count of a loop for runtime unrolling.
415 bool AllowExpensiveTripCount;
416 /// Apply loop unroll on any kind of loop
417 /// (mainly to loops that fail runtime unrolling).
418 bool Force;
419 /// Allow using trip count upper bound to unroll loops.
420 bool UpperBound;
421 /// Allow peeling off loop iterations for loops with low dynamic tripcount.
422 bool AllowPeeling;
423 /// Allow unrolling of all the iterations of the runtime loop remainder.
424 bool UnrollRemainder;
425 };
426
427 /// \brief Get target-customized preferences for the generic loop unrolling
428 /// transformation. The caller will initialize UP with the current
429 /// target-independent defaults.
430 void getUnrollingPreferences(Loop *L, ScalarEvolution &,
431 UnrollingPreferences &UP) const;
432
433 /// @}
434
435 /// \name Scalar Target Information
436 /// @{
437
438 /// \brief Flags indicating the kind of support for population count.
439 ///
440 /// Compared to the SW implementation, HW support is supposed to
441 /// significantly boost the performance when the population is dense, and it
442 /// may or may not degrade performance if the population is sparse. A HW
443 /// support is considered as "Fast" if it can outperform, or is on a par
444 /// with, SW implementation when the population is sparse; otherwise, it is
445 /// considered as "Slow".
446 enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };
447
448 /// \brief Return true if the specified immediate is legal add immediate, that
449 /// is the target has add instructions which can add a register with the
450 /// immediate without having to materialize the immediate into a register.
451 bool isLegalAddImmediate(int64_t Imm) const;
452
453 /// \brief Return true if the specified immediate is legal icmp immediate,
454 /// that is the target has icmp instructions which can compare a register
455 /// against the immediate without having to materialize the immediate into a
456 /// register.
457 bool isLegalICmpImmediate(int64_t Imm) const;
458
459 /// \brief Return true if the addressing mode represented by AM is legal for
460 /// this target, for a load/store of the specified type.
461 /// The type may be VoidTy, in which case only return true if the addressing
462 /// mode is legal for a load/store of any legal type.
463 /// If target returns true in LSRWithInstrQueries(), I may be valid.
464 /// TODO: Handle pre/postinc as well.
465 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
466 bool HasBaseReg, int64_t Scale,
467 unsigned AddrSpace = 0,
468 Instruction *I = nullptr) const;
469
470 /// \brief Return true if LSR cost of C1 is lower than C1.
471 bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
472 TargetTransformInfo::LSRCost &C2) const;
473
474 /// Return true if the target can fuse a compare and branch.
475 /// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost
476 /// calculation for the instructions in a loop.
477 bool canMacroFuseCmp() const;
478
479 /// \return True is LSR should make efforts to create/preserve post-inc
480 /// addressing mode expressions.
481 bool shouldFavorPostInc() const;
482
483 /// \brief Return true if the target supports masked load/store
484 /// AVX2 and AVX-512 targets allow masks for consecutive load and store
485 bool isLegalMaskedStore(Type *DataType) const;
486 bool isLegalMaskedLoad(Type *DataType) const;
487
488 /// \brief Return true if the target supports masked gather/scatter
489 /// AVX-512 fully supports gather and scatter for vectors with 32 and 64
490 /// bits scalar type.
491 bool isLegalMaskedScatter(Type *DataType) const;
492 bool isLegalMaskedGather(Type *DataType) const;
493
494 /// Return true if the target has a unified operation to calculate division
495 /// and remainder. If so, the additional implicit multiplication and
496 /// subtraction required to calculate a remainder from division are free. This
497 /// can enable more aggressive transformations for division and remainder than
498 /// would typically be allowed using throughput or size cost models.
499 bool hasDivRemOp(Type *DataType, bool IsSigned) const;
500
501 /// Return true if the given instruction (assumed to be a memory access
502 /// instruction) has a volatile variant. If that's the case then we can avoid
503 /// addrspacecast to generic AS for volatile loads/stores. Default
504 /// implementation returns false, which prevents address space inference for
505 /// volatile loads/stores.
506 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;
507
508 /// Return true if target doesn't mind addresses in vectors.
509 bool prefersVectorizedAddressing() const;
510
511 /// \brief Return the cost of the scaling factor used in the addressing
512 /// mode represented by AM for this target, for a load/store
513 /// of the specified type.
514 /// If the AM is supported, the return value must be >= 0.
515 /// If the AM is not supported, it returns a negative value.
516 /// TODO: Handle pre/postinc as well.
517 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
518 bool HasBaseReg, int64_t Scale,
519 unsigned AddrSpace = 0) const;
520
521 /// \brief Return true if the loop strength reduce pass should make
522 /// Instruction* based TTI queries to isLegalAddressingMode(). This is
523 /// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned
524 /// immediate offset and no index register.
525 bool LSRWithInstrQueries() const;
526
527 /// \brief Return true if it's free to truncate a value of type Ty1 to type
528 /// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16
529 /// by referencing its sub-register AX.
530 bool isTruncateFree(Type *Ty1, Type *Ty2) const;
531
532 /// \brief Return true if it is profitable to hoist instruction in the
533 /// then/else to before if.
534 bool isProfitableToHoist(Instruction *I) const;
535
536 bool useAA() const;
537
538 /// \brief Return true if this type is legal.
539 bool isTypeLegal(Type *Ty) const;
540
541 /// \brief Returns the target's jmp_buf alignment in bytes.
542 unsigned getJumpBufAlignment() const;
543
544 /// \brief Returns the target's jmp_buf size in bytes.
545 unsigned getJumpBufSize() const;
546
547 /// \brief Return true if switches should be turned into lookup tables for the
548 /// target.
549 bool shouldBuildLookupTables() const;
550
551 /// \brief Return true if switches should be turned into lookup tables
552 /// containing this constant value for the target.
553 bool shouldBuildLookupTablesForConstant(Constant *C) const;
554
555 /// \brief Return true if the input function which is cold at all call sites,
556 /// should use coldcc calling convention.
557 bool useColdCCForColdCall(Function &F) const;
558
559 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;
560
561 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
562 unsigned VF) const;
563
564 /// If target has efficient vector element load/store instructions, it can
565 /// return true here so that insertion/extraction costs are not added to
566 /// the scalarization cost of a load/store.
567 bool supportsEfficientVectorElementLoadStore() const;
568
569 /// \brief Don't restrict interleaved unrolling to small loops.
570 bool enableAggressiveInterleaving(bool LoopHasReductions) const;
571
572 /// \brief If not nullptr, enable inline expansion of memcmp. IsZeroCmp is
573 /// true if this is the expansion of memcmp(p1, p2, s) == 0.
574 struct MemCmpExpansionOptions {
575 // The list of available load sizes (in bytes), sorted in decreasing order.
576 SmallVector<unsigned, 8> LoadSizes;
577 };
578 const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;
579
580 /// \brief Enable matching of interleaved access groups.
581 bool enableInterleavedAccessVectorization() const;
582
583 /// \brief Indicate that it is potentially unsafe to automatically vectorize
584 /// floating-point operations because the semantics of vector and scalar
585 /// floating-point semantics may differ. For example, ARM NEON v7 SIMD math
586 /// does not support IEEE-754 denormal numbers, while depending on the
587 /// platform, scalar floating-point math does.
588 /// This applies to floating-point math operations and calls, not memory
589 /// operations, shuffles, or casts.
590 bool isFPVectorizationPotentiallyUnsafe() const;
591
592 /// \brief Determine if the target supports unaligned memory accesses.
593 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
594 unsigned BitWidth, unsigned AddressSpace = 0,
595 unsigned Alignment = 1,
596 bool *Fast = nullptr) const;
597
598 /// \brief Return hardware support for population count.
599 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;
600
601 /// \brief Return true if the hardware has a fast square-root instruction.
602 bool haveFastSqrt(Type *Ty) const;
603
604 /// Return true if it is faster to check if a floating-point value is NaN
605 /// (or not-NaN) versus a comparison against a constant FP zero value.
606 /// Targets should override this if materializing a 0.0 for comparison is
607 /// generally as cheap as checking for ordered/unordered.
608 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;
609
610 /// \brief Return the expected cost of supporting the floating point operation
611 /// of the specified type.
612 int getFPOpCost(Type *Ty) const;
613
614 /// \brief Return the expected cost of materializing for the given integer
615 /// immediate of the specified type.
616 int getIntImmCost(const APInt &Imm, Type *Ty) const;
617
618 /// \brief Return the expected cost of materialization for the given integer
619 /// immediate of the specified type for a given instruction. The cost can be
620 /// zero if the immediate can be folded into the specified instruction.
621 int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
622 Type *Ty) const;
623 int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
624 Type *Ty) const;
625
626 /// \brief Return the expected cost for the given integer when optimising
627 /// for size. This is different than the other integer immediate cost
628 /// functions in that it is subtarget agnostic. This is useful when you e.g.
629 /// target one ISA such as Aarch32 but smaller encodings could be possible
630 /// with another such as Thumb. This return value is used as a penalty when
631 /// the total costs for a constant is calculated (the bigger the cost, the
632 /// more beneficial constant hoisting is).
633 int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
634 Type *Ty) const;
635 /// @}
636
637 /// \name Vector Target Information
638 /// @{
639
640 /// \brief The various kinds of shuffle patterns for vector queries.
641 enum ShuffleKind {
642 SK_Broadcast, ///< Broadcast element 0 to all other elements.
643 SK_Reverse, ///< Reverse the order of the vector.
644 SK_Alternate, ///< Choose alternate elements from vector.
645 SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.
646 SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.
647 SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one
648 ///< with any shuffle mask.
649 SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any
650 ///< shuffle mask.
651 };
652
653 /// \brief Additional information about an operand's possible values.
654 enum OperandValueKind {
655 OK_AnyValue, // Operand can have any value.
656 OK_UniformValue, // Operand is uniform (splat of a value).
657 OK_UniformConstantValue, // Operand is uniform constant.
658 OK_NonUniformConstantValue // Operand is a non uniform constant value.
659 };
660
661 /// \brief Additional properties of an operand's values.
662 enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };
663
664 /// \return The number of scalar or vector registers that the target has.
665 /// If 'Vectors' is true, it returns the number of vector registers. If it is
666 /// set to false, it returns the number of scalar registers.
667 unsigned getNumberOfRegisters(bool Vector) const;
668
669 /// \return The width of the largest scalar or vector register type.
670 unsigned getRegisterBitWidth(bool Vector) const;
671
672 /// \return The width of the smallest vector register type.
673 unsigned getMinVectorRegisterBitWidth() const;
674
675 /// \return True if the vectorization factor should be chosen to
676 /// make the vector of the smallest element type match the size of a
677 /// vector register. For wider element types, this could result in
678 /// creating vectors that span multiple vector registers.
679 /// If false, the vectorization factor will be chosen based on the
680 /// size of the widest element type.
681 bool shouldMaximizeVectorBandwidth(bool OptSize) const;
682
683 /// \return True if it should be considered for address type promotion.
684 /// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is
685 /// profitable without finding other extensions fed by the same input.
686 bool shouldConsiderAddressTypePromotion(
687 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;
688
689 /// \return The size of a cache line in bytes.
690 unsigned getCacheLineSize() const;
691
692 /// The possible cache levels
693 enum class CacheLevel {
694 L1D, // The L1 data cache
695 L2D, // The L2 data cache
696
697 // We currently do not model L3 caches, as their sizes differ widely between
698 // microarchitectures. Also, we currently do not have a use for L3 cache
699 // size modeling yet.
700 };
701
702 /// \return The size of the cache level in bytes, if available.
703 llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;
704
705 /// \return The associativity of the cache level, if available.
706 llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;
707
708 /// \return How much before a load we should place the prefetch instruction.
709 /// This is currently measured in number of instructions.
710 unsigned getPrefetchDistance() const;
711
712 /// \return Some HW prefetchers can handle accesses up to a certain constant
713 /// stride. This is the minimum stride in bytes where it makes sense to start
714 /// adding SW prefetches. The default is 1, i.e. prefetch with any stride.
715 unsigned getMinPrefetchStride() const;
716
717 /// \return The maximum number of iterations to prefetch ahead. If the
718 /// required number of iterations is more than this number, no prefetching is
719 /// performed.
720 unsigned getMaxPrefetchIterationsAhead() const;
721
722 /// \return The maximum interleave factor that any transform should try to
723 /// perform for this target. This number depends on the level of parallelism
724 /// and the number of execution units in the CPU.
725 unsigned getMaxInterleaveFactor(unsigned VF) const;
726
727 /// This is an approximation of reciprocal throughput of a math/logic op.
728 /// A higher cost indicates less expected throughput.
729 /// From Agner Fog's guides, reciprocal throughput is "the average number of
730 /// clock cycles per instruction when the instructions are not part of a
731 /// limiting dependency chain."
732 /// Therefore, costs should be scaled to account for multiple execution units
733 /// on the target that can process this type of instruction. For example, if
734 /// there are 5 scalar integer units and 2 vector integer units that can
735 /// calculate an 'add' in a single cycle, this model should indicate that the
736 /// cost of the vector add instruction is 2.5 times the cost of the scalar
737 /// add instruction.
738 /// \p Args is an optional argument which holds the instruction operands
739 /// values so the TTI can analyze those values searching for special
740 /// cases or optimizations based on those values.
741 int getArithmeticInstrCost(
742 unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,
743 OperandValueKind Opd2Info = OK_AnyValue,
744 OperandValueProperties Opd1PropInfo = OP_None,
745 OperandValueProperties Opd2PropInfo = OP_None,
746 ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;
747
748 /// \return The cost of a shuffle instruction of kind Kind and of type Tp.
749 /// The index and subtype parameters are used by the subvector insertion and
750 /// extraction shuffle kinds.
751 int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,
752 Type *SubTp = nullptr) const;
753
754 /// \return The expected cost of cast instructions, such as bitcast, trunc,
755 /// zext, etc. If there is an existing instruction that holds Opcode, it
756 /// may be passed in the 'I' parameter.
757 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
758 const Instruction *I = nullptr) const;
759
760 /// \return The expected cost of a sign- or zero-extended vector extract. Use
761 /// -1 to indicate that there is no information about the index value.
762 int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
763 unsigned Index = -1) const;
764
765 /// \return The expected cost of control-flow related instructions such as
766 /// Phi, Ret, Br.
767 int getCFInstrCost(unsigned Opcode) const;
768
769 /// \returns The expected cost of compare and select instructions. If there
770 /// is an existing instruction that holds Opcode, it may be passed in the
771 /// 'I' parameter.
772 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
773 Type *CondTy = nullptr, const Instruction *I = nullptr) const;
774
775 /// \return The expected cost of vector Insert and Extract.
776 /// Use -1 to indicate that there is no information on the index value.
777 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;
778
779 /// \return The cost of Load and Store instructions.
780 int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
781 unsigned AddressSpace, const Instruction *I = nullptr) const;
782
783 /// \return The cost of masked Load and Store instructions.
784 int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
785 unsigned AddressSpace) const;
786
787 /// \return The cost of Gather or Scatter operation
788 /// \p Opcode - is a type of memory access Load or Store
789 /// \p DataTy - a vector type of the data to be loaded or stored
790 /// \p Ptr - pointer [or vector of pointers] - address[es] in memory
791 /// \p VariableMask - true when the memory access is predicated with a mask
792 /// that is not a compile-time constant
793 /// \p Alignment - alignment of single element
794 int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
795 bool VariableMask, unsigned Alignment) const;
796
797 /// \return The cost of the interleaved memory operation.
798 /// \p Opcode is the memory operation code
799 /// \p VecTy is the vector type of the interleaved access.
800 /// \p Factor is the interleave factor
801 /// \p Indices is the indices for interleaved load members (as interleaved
802 /// load allows gaps)
803 /// \p Alignment is the alignment of the memory operation
804 /// \p AddressSpace is address space of the pointer.
805 int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
806 ArrayRef<unsigned> Indices, unsigned Alignment,
807 unsigned AddressSpace) const;
808
809 /// \brief Calculate the cost of performing a vector reduction.
810 ///
811 /// This is the cost of reducing the vector value of type \p Ty to a scalar
812 /// value using the operation denoted by \p Opcode. The form of the reduction
813 /// can either be a pairwise reduction or a reduction that splits the vector
814 /// at every reduction level.
815 ///
816 /// Pairwise:
817 /// (v0, v1, v2, v3)
818 /// ((v0+v1), (v2+v3), undef, undef)
819 /// Split:
820 /// (v0, v1, v2, v3)
821 /// ((v0+v2), (v1+v3), undef, undef)
822 int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
823 bool IsPairwiseForm) const;
824 int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,
825 bool IsUnsigned) const;
826
827 /// \returns The cost of Intrinsic instructions. Analyses the real arguments.
828 /// Three cases are handled: 1. scalar instruction 2. vector instruction
829 /// 3. scalar instruction which is to be vectorized with VF.
830 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
831 ArrayRef<Value *> Args, FastMathFlags FMF,
832 unsigned VF = 1) const;
833
834 /// \returns The cost of Intrinsic instructions. Types analysis only.
835 /// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the
836 /// arguments and the return value will be computed based on types.
837 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
838 ArrayRef<Type *> Tys, FastMathFlags FMF,
839 unsigned ScalarizationCostPassed = UINT_MAX) const;
840
841 /// \returns The cost of Call instructions.
842 int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;
843
844 /// \returns The number of pieces into which the provided type must be
845 /// split during legalization. Zero is returned when the answer is unknown.
846 unsigned getNumberOfParts(Type *Tp) const;
847
848 /// \returns The cost of the address computation. For most targets this can be
849 /// merged into the instruction indexing mode. Some targets might want to
850 /// distinguish between address computation for memory operations on vector
851 /// types and scalar types. Such targets should override this function.
852 /// The 'SE' parameter holds pointer for the scalar evolution object which
853 /// is used in order to get the Ptr step value in case of constant stride.
854 /// The 'Ptr' parameter holds SCEV of the access pointer.
855 int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,
856 const SCEV *Ptr = nullptr) const;
857
858 /// \returns The cost, if any, of keeping values of the given types alive
859 /// over a callsite.
860 ///
861 /// Some types may require the use of register classes that do not have
862 /// any callee-saved registers, so would require a spill and fill.
863 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;
864
865 /// \returns True if the intrinsic is a supported memory intrinsic. Info
866 /// will contain additional information - whether the intrinsic may write
867 /// or read to memory, volatility and the pointer. Info is undefined
868 /// if false is returned.
869 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;
870
871 /// \returns The maximum element size, in bytes, for an element
872 /// unordered-atomic memory intrinsic.
873 unsigned getAtomicMemIntrinsicMaxElementSize() const;
874
875 /// \returns A value which is the result of the given memory intrinsic. New
876 /// instructions may be created to extract the result from the given intrinsic
877 /// memory operation. Returns nullptr if the target cannot create a result
878 /// from the given intrinsic.
879 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
880 Type *ExpectedType) const;
881
882 /// \returns The type to use in a loop expansion of a memcpy call.
883 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
884 unsigned SrcAlign, unsigned DestAlign) const;
885
886 /// \param[out] OpsOut The operand types to copy RemainingBytes of memory.
887 /// \param RemainingBytes The number of bytes to copy.
888 ///
889 /// Calculates the operand types to use when copying \p RemainingBytes of
890 /// memory, where source and destination alignments are \p SrcAlign and
891 /// \p DestAlign respectively.
892 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
893 LLVMContext &Context,
894 unsigned RemainingBytes,
895 unsigned SrcAlign,
896 unsigned DestAlign) const;
897
898 /// \returns True if the two functions have compatible attributes for inlining
899 /// purposes.
900 bool areInlineCompatible(const Function *Caller,
901 const Function *Callee) const;
902
903 /// \brief The type of load/store indexing.
904 enum MemIndexedMode {
905 MIM_Unindexed, ///< No indexing.
906 MIM_PreInc, ///< Pre-incrementing.
907 MIM_PreDec, ///< Pre-decrementing.
908 MIM_PostInc, ///< Post-incrementing.
909 MIM_PostDec ///< Post-decrementing.
910 };
911
912 /// \returns True if the specified indexed load for the given type is legal.
913 bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;
914
915 /// \returns True if the specified indexed store for the given type is legal.
916 bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;
917
918 /// \returns The bitwidth of the largest vector type that should be used to
919 /// load/store in the given address space.
920 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;
921
922 /// \returns True if the load instruction is legal to vectorize.
923 bool isLegalToVectorizeLoad(LoadInst *LI) const;
924
925 /// \returns True if the store instruction is legal to vectorize.
926 bool isLegalToVectorizeStore(StoreInst *SI) const;
927
928 /// \returns True if it is legal to vectorize the given load chain.
929 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
930 unsigned Alignment,
931 unsigned AddrSpace) const;
932
933 /// \returns True if it is legal to vectorize the given store chain.
934 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
935 unsigned Alignment,
936 unsigned AddrSpace) const;
937
938 /// \returns The new vector factor value if the target doesn't support \p
939 /// SizeInBytes loads or has a better vector factor.
940 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
941 unsigned ChainSizeInBytes,
942 VectorType *VecTy) const;
943
944 /// \returns The new vector factor value if the target doesn't support \p
945 /// SizeInBytes stores or has a better vector factor.
946 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
947 unsigned ChainSizeInBytes,
948 VectorType *VecTy) const;
949
950 /// Flags describing the kind of vector reduction.
951 struct ReductionFlags {
952 ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}
953 bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.
954 bool IsSigned; ///< Whether the operation is a signed int reduction.
955 bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.
956 };
957
958 /// \returns True if the target wants to handle the given reduction idiom in
959 /// the intrinsics form instead of the shuffle form.
960 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
961 ReductionFlags Flags) const;
962
963 /// \returns True if the target wants to expand the given reduction intrinsic
964 /// into a shuffle sequence.
965 bool shouldExpandReduction(const IntrinsicInst *II) const;
966 /// @}
967
968private:
969 /// \brief Estimate the latency of specified instruction.
970 /// Returns 1 as the default value.
971 int getInstructionLatency(const Instruction *I) const;
972
973 /// \brief Returns the expected throughput cost of the instruction.
974 /// Returns -1 if the cost is unknown.
975 int getInstructionThroughput(const Instruction *I) const;
976
977 /// \brief The abstract base class used to type erase specific TTI
978 /// implementations.
979 class Concept;
980
981 /// \brief The template model for the base class which wraps a concrete
982 /// implementation in a type erased interface.
983 template <typename T> class Model;
984
985 std::unique_ptr<Concept> TTIImpl;
986};
987
988class TargetTransformInfo::Concept {
989public:
990 virtual ~Concept() = 0;
991 virtual const DataLayout &getDataLayout() const = 0;
992 virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;
993 virtual int getGEPCost(Type *PointeeType, const Value *Ptr,
994 ArrayRef<const Value *> Operands) = 0;
995 virtual int getExtCost(const Instruction *I, const Value *Src) = 0;
996 virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;
997 virtual int getCallCost(const Function *F, int NumArgs) = 0;
998 virtual int getCallCost(const Function *F,
999 ArrayRef<const Value *> Arguments) = 0;
1000 virtual unsigned getInliningThresholdMultiplier() = 0;
1001 virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1002 ArrayRef<Type *> ParamTys) = 0;
1003 virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1004 ArrayRef<const Value *> Arguments) = 0;
1005 virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1006 unsigned &JTSize) = 0;
1007 virtual int
1008 getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;
1009 virtual bool hasBranchDivergence() = 0;
1010 virtual bool isSourceOfDivergence(const Value *V) = 0;
1011 virtual bool isAlwaysUniform(const Value *V) = 0;
1012 virtual unsigned getFlatAddressSpace() = 0;
1013 virtual bool isLoweredToCall(const Function *F) = 0;
1014 virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,
1015 UnrollingPreferences &UP) = 0;
1016 virtual bool isLegalAddImmediate(int64_t Imm) = 0;
1017 virtual bool isLegalICmpImmediate(int64_t Imm) = 0;
1018 virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,
1019 int64_t BaseOffset, bool HasBaseReg,
1020 int64_t Scale,
1021 unsigned AddrSpace,
1022 Instruction *I) = 0;
1023 virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1024 TargetTransformInfo::LSRCost &C2) = 0;
1025 virtual bool canMacroFuseCmp() = 0;
1026 virtual bool shouldFavorPostInc() const = 0;
1027 virtual bool isLegalMaskedStore(Type *DataType) = 0;
1028 virtual bool isLegalMaskedLoad(Type *DataType) = 0;
1029 virtual bool isLegalMaskedScatter(Type *DataType) = 0;
1030 virtual bool isLegalMaskedGather(Type *DataType) = 0;
1031 virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;
1032 virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;
1033 virtual bool prefersVectorizedAddressing() = 0;
1034 virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,
1035 int64_t BaseOffset, bool HasBaseReg,
1036 int64_t Scale, unsigned AddrSpace) = 0;
1037 virtual bool LSRWithInstrQueries() = 0;
1038 virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;
1039 virtual bool isProfitableToHoist(Instruction *I) = 0;
1040 virtual bool useAA() = 0;
1041 virtual bool isTypeLegal(Type *Ty) = 0;
1042 virtual unsigned getJumpBufAlignment() = 0;
1043 virtual unsigned getJumpBufSize() = 0;
1044 virtual bool shouldBuildLookupTables() = 0;
1045 virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;
1046 virtual bool useColdCCForColdCall(Function &F) = 0;
1047 virtual unsigned
1048 getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;
1049 virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1050 unsigned VF) = 0;
1051 virtual bool supportsEfficientVectorElementLoadStore() = 0;
1052 virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;
1053 virtual const MemCmpExpansionOptions *enableMemCmpExpansion(
1054 bool IsZeroCmp) const = 0;
1055 virtual bool enableInterleavedAccessVectorization() = 0;
1056 virtual bool isFPVectorizationPotentiallyUnsafe() = 0;
1057 virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1058 unsigned BitWidth,
1059 unsigned AddressSpace,
1060 unsigned Alignment,
1061 bool *Fast) = 0;
1062 virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;
1063 virtual bool haveFastSqrt(Type *Ty) = 0;
1064 virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;
1065 virtual int getFPOpCost(Type *Ty) = 0;
1066 virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1067 Type *Ty) = 0;
1068 virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;
1069 virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1070 Type *Ty) = 0;
1071 virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1072 Type *Ty) = 0;
1073 virtual unsigned getNumberOfRegisters(bool Vector) = 0;
1074 virtual unsigned getRegisterBitWidth(bool Vector) const = 0;
1075 virtual unsigned getMinVectorRegisterBitWidth() = 0;
1076 virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;
1077 virtual bool shouldConsiderAddressTypePromotion(
1078 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;
1079 virtual unsigned getCacheLineSize() = 0;
1080 virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;
1081 virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;
1082 virtual unsigned getPrefetchDistance() = 0;
1083 virtual unsigned getMinPrefetchStride() = 0;
1084 virtual unsigned getMaxPrefetchIterationsAhead() = 0;
1085 virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;
1086 virtual unsigned
1087 getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1088 OperandValueKind Opd2Info,
1089 OperandValueProperties Opd1PropInfo,
1090 OperandValueProperties Opd2PropInfo,
1091 ArrayRef<const Value *> Args) = 0;
1092 virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1093 Type *SubTp) = 0;
1094 virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1095 const Instruction *I) = 0;
1096 virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,
1097 VectorType *VecTy, unsigned Index) = 0;
1098 virtual int getCFInstrCost(unsigned Opcode) = 0;
1099 virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,
1100 Type *CondTy, const Instruction *I) = 0;
1101 virtual int getVectorInstrCost(unsigned Opcode, Type *Val,
1102 unsigned Index) = 0;
1103 virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1104 unsigned AddressSpace, const Instruction *I) = 0;
1105 virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,
1106 unsigned Alignment,
1107 unsigned AddressSpace) = 0;
1108 virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1109 Value *Ptr, bool VariableMask,
1110 unsigned Alignment) = 0;
1111 virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
1112 unsigned Factor,
1113 ArrayRef<unsigned> Indices,
1114 unsigned Alignment,
1115 unsigned AddressSpace) = 0;
1116 virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1117 bool IsPairwiseForm) = 0;
1118 virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1119 bool IsPairwiseForm, bool IsUnsigned) = 0;
1120 virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1121 ArrayRef<Type *> Tys, FastMathFlags FMF,
1122 unsigned ScalarizationCostPassed) = 0;
1123 virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1124 ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;
1125 virtual int getCallInstrCost(Function *F, Type *RetTy,
1126 ArrayRef<Type *> Tys) = 0;
1127 virtual unsigned getNumberOfParts(Type *Tp) = 0;
1128 virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1129 const SCEV *Ptr) = 0;
1130 virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;
1131 virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1132 MemIntrinsicInfo &Info) = 0;
1133 virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;
1134 virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1135 Type *ExpectedType) = 0;
1136 virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1137 unsigned SrcAlign,
1138 unsigned DestAlign) const = 0;
1139 virtual void getMemcpyLoopResidualLoweringType(
1140 SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,
1141 unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;
1142 virtual bool areInlineCompatible(const Function *Caller,
1143 const Function *Callee) const = 0;
1144 virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;
1145 virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;
1146 virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;
1147 virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;
1148 virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;
1149 virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1150 unsigned Alignment,
1151 unsigned AddrSpace) const = 0;
1152 virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1153 unsigned Alignment,
1154 unsigned AddrSpace) const = 0;
1155 virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1156 unsigned ChainSizeInBytes,
1157 VectorType *VecTy) const = 0;
1158 virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1159 unsigned ChainSizeInBytes,
1160 VectorType *VecTy) const = 0;
1161 virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1162 ReductionFlags) const = 0;
1163 virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;
1164 virtual int getInstructionLatency(const Instruction *I) = 0;
1165};
1166
1167template <typename T>
1168class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {
1169 T Impl;
1170
1171public:
1172 Model(T Impl) : Impl(std::move(Impl)) {}
1173 ~Model() override {}
1174
1175 const DataLayout &getDataLayout() const override {
1176 return Impl.getDataLayout();
1177 }
1178
1179 int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {
1180 return Impl.getOperationCost(Opcode, Ty, OpTy);
1181 }
1182 int getGEPCost(Type *PointeeType, const Value *Ptr,
1183 ArrayRef<const Value *> Operands) override {
1184 return Impl.getGEPCost(PointeeType, Ptr, Operands);
1185 }
1186 int getExtCost(const Instruction *I, const Value *Src) override {
1187 return Impl.getExtCost(I, Src);
1188 }
1189 int getCallCost(FunctionType *FTy, int NumArgs) override {
1190 return Impl.getCallCost(FTy, NumArgs);
1191 }
1192 int getCallCost(const Function *F, int NumArgs) override {
1193 return Impl.getCallCost(F, NumArgs);
1194 }
1195 int getCallCost(const Function *F,
1196 ArrayRef<const Value *> Arguments) override {
1197 return Impl.getCallCost(F, Arguments);
1198 }
1199 unsigned getInliningThresholdMultiplier() override {
1200 return Impl.getInliningThresholdMultiplier();
1201 }
1202 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1203 ArrayRef<Type *> ParamTys) override {
1204 return Impl.getIntrinsicCost(IID, RetTy, ParamTys);
1205 }
1206 int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
1207 ArrayRef<const Value *> Arguments) override {
1208 return Impl.getIntrinsicCost(IID, RetTy, Arguments);
1209 }
1210 int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {
1211 return Impl.getUserCost(U, Operands);
1212 }
1213 bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }
1214 bool isSourceOfDivergence(const Value *V) override {
1215 return Impl.isSourceOfDivergence(V);
1216 }
1217
1218 bool isAlwaysUniform(const Value *V) override {
1219 return Impl.isAlwaysUniform(V);
1220 }
1221
1222 unsigned getFlatAddressSpace() override {
1223 return Impl.getFlatAddressSpace();
1224 }
1225
1226 bool isLoweredToCall(const Function *F) override {
1227 return Impl.isLoweredToCall(F);
1228 }
1229 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
1230 UnrollingPreferences &UP) override {
1231 return Impl.getUnrollingPreferences(L, SE, UP);
1232 }
1233 bool isLegalAddImmediate(int64_t Imm) override {
1234 return Impl.isLegalAddImmediate(Imm);
1235 }
1236 bool isLegalICmpImmediate(int64_t Imm) override {
1237 return Impl.isLegalICmpImmediate(Imm);
1238 }
1239 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1240 bool HasBaseReg, int64_t Scale,
1241 unsigned AddrSpace,
1242 Instruction *I) override {
1243 return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
1244 Scale, AddrSpace, I);
1245 }
1246 bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,
1247 TargetTransformInfo::LSRCost &C2) override {
1248 return Impl.isLSRCostLess(C1, C2);
1249 }
1250 bool canMacroFuseCmp() override {
1251 return Impl.canMacroFuseCmp();
1252 }
1253 bool shouldFavorPostInc() const override {
1254 return Impl.shouldFavorPostInc();
1255 }
1256 bool isLegalMaskedStore(Type *DataType) override {
1257 return Impl.isLegalMaskedStore(DataType);
1258 }
1259 bool isLegalMaskedLoad(Type *DataType) override {
1260 return Impl.isLegalMaskedLoad(DataType);
1261 }
1262 bool isLegalMaskedScatter(Type *DataType) override {
1263 return Impl.isLegalMaskedScatter(DataType);
1264 }
1265 bool isLegalMaskedGather(Type *DataType) override {
1266 return Impl.isLegalMaskedGather(DataType);
1267 }
1268 bool hasDivRemOp(Type *DataType, bool IsSigned) override {
1269 return Impl.hasDivRemOp(DataType, IsSigned);
1270 }
1271 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {
1272 return Impl.hasVolatileVariant(I, AddrSpace);
1273 }
1274 bool prefersVectorizedAddressing() override {
1275 return Impl.prefersVectorizedAddressing();
1276 }
1277 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
1278 bool HasBaseReg, int64_t Scale,
1279 unsigned AddrSpace) override {
1280 return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,
1281 Scale, AddrSpace);
1282 }
1283 bool LSRWithInstrQueries() override {
1284 return Impl.LSRWithInstrQueries();
1285 }
1286 bool isTruncateFree(Type *Ty1, Type *Ty2) override {
1287 return Impl.isTruncateFree(Ty1, Ty2);
1288 }
1289 bool isProfitableToHoist(Instruction *I) override {
1290 return Impl.isProfitableToHoist(I);
1291 }
1292 bool useAA() override { return Impl.useAA(); }
1293 bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }
1294 unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }
1295 unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }
1296 bool shouldBuildLookupTables() override {
1297 return Impl.shouldBuildLookupTables();
1298 }
1299 bool shouldBuildLookupTablesForConstant(Constant *C) override {
1300 return Impl.shouldBuildLookupTablesForConstant(C);
1301 }
1302 bool useColdCCForColdCall(Function &F) override {
1303 return Impl.useColdCCForColdCall(F);
1304 }
1305
1306 unsigned getScalarizationOverhead(Type *Ty, bool Insert,
1307 bool Extract) override {
1308 return Impl.getScalarizationOverhead(Ty, Insert, Extract);
1309 }
1310 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
1311 unsigned VF) override {
1312 return Impl.getOperandsScalarizationOverhead(Args, VF);
1313 }
1314
1315 bool supportsEfficientVectorElementLoadStore() override {
1316 return Impl.supportsEfficientVectorElementLoadStore();
1317 }
1318
1319 bool enableAggressiveInterleaving(bool LoopHasReductions) override {
1320 return Impl.enableAggressiveInterleaving(LoopHasReductions);
1321 }
1322 const MemCmpExpansionOptions *enableMemCmpExpansion(
1323 bool IsZeroCmp) const override {
1324 return Impl.enableMemCmpExpansion(IsZeroCmp);
1325 }
1326 bool enableInterleavedAccessVectorization() override {
1327 return Impl.enableInterleavedAccessVectorization();
1328 }
1329 bool isFPVectorizationPotentiallyUnsafe() override {
1330 return Impl.isFPVectorizationPotentiallyUnsafe();
1331 }
1332 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
1333 unsigned BitWidth, unsigned AddressSpace,
1334 unsigned Alignment, bool *Fast) override {
1335 return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,
1336 Alignment, Fast);
1337 }
1338 PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {
1339 return Impl.getPopcntSupport(IntTyWidthInBit);
1340 }
1341 bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }
1342
1343 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {
1344 return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);
1345 }
1346
1347 int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }
1348
1349 int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1350 Type *Ty) override {
1351 return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);
1352 }
1353 int getIntImmCost(const APInt &Imm, Type *Ty) override {
1354 return Impl.getIntImmCost(Imm, Ty);
1355 }
1356 int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,
1357 Type *Ty) override {
1358 return Impl.getIntImmCost(Opc, Idx, Imm, Ty);
1359 }
1360 int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
1361 Type *Ty) override {
1362 return Impl.getIntImmCost(IID, Idx, Imm, Ty);
1363 }
1364 unsigned getNumberOfRegisters(bool Vector) override {
1365 return Impl.getNumberOfRegisters(Vector);
1366 }
1367 unsigned getRegisterBitWidth(bool Vector) const override {
1368 return Impl.getRegisterBitWidth(Vector);
1369 }
1370 unsigned getMinVectorRegisterBitWidth() override {
1371 return Impl.getMinVectorRegisterBitWidth();
1372 }
1373 bool shouldMaximizeVectorBandwidth(bool OptSize) const override {
1374 return Impl.shouldMaximizeVectorBandwidth(OptSize);
1375 }
1376 bool shouldConsiderAddressTypePromotion(
1377 const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {
1378 return Impl.shouldConsiderAddressTypePromotion(
1379 I, AllowPromotionWithoutCommonHeader);
1380 }
1381 unsigned getCacheLineSize() override {
1382 return Impl.getCacheLineSize();
1383 }
1384 llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {
1385 return Impl.getCacheSize(Level);
1386 }
1387 llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {
1388 return Impl.getCacheAssociativity(Level);
1389 }
1390 unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }
1391 unsigned getMinPrefetchStride() override {
1392 return Impl.getMinPrefetchStride();
1393 }
1394 unsigned getMaxPrefetchIterationsAhead() override {
1395 return Impl.getMaxPrefetchIterationsAhead();
1396 }
1397 unsigned getMaxInterleaveFactor(unsigned VF) override {
1398 return Impl.getMaxInterleaveFactor(VF);
1399 }
1400 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
1401 unsigned &JTSize) override {
1402 return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);
1403 }
1404 unsigned
1405 getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,
1406 OperandValueKind Opd2Info,
1407 OperandValueProperties Opd1PropInfo,
1408 OperandValueProperties Opd2PropInfo,
1409 ArrayRef<const Value *> Args) override {
1410 return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,
1411 Opd1PropInfo, Opd2PropInfo, Args);
1412 }
1413 int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,
1414 Type *SubTp) override {
1415 return Impl.getShuffleCost(Kind, Tp, Index, SubTp);
1416 }
1417 int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
1418 const Instruction *I) override {
1419 return Impl.getCastInstrCost(Opcode, Dst, Src, I);
1420 }
1421 int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,
1422 unsigned Index) override {
1423 return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);
1424 }
1425 int getCFInstrCost(unsigned Opcode) override {
1426 return Impl.getCFInstrCost(Opcode);
1427 }
1428 int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
1429 const Instruction *I) override {
1430 return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);
1431 }
1432 int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {
1433 return Impl.getVectorInstrCost(Opcode, Val, Index);
1434 }
1435 int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1436 unsigned AddressSpace, const Instruction *I) override {
1437 return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);
1438 }
1439 int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
1440 unsigned AddressSpace) override {
1441 return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);
1442 }
1443 int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1444 Value *Ptr, bool VariableMask,
1445 unsigned Alignment) override {
1446 return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,
1447 Alignment);
1448 }
1449 int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,
1450 ArrayRef<unsigned> Indices, unsigned Alignment,
1451 unsigned AddressSpace) override {
1452 return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,
1453 Alignment, AddressSpace);
1454 }
1455 int getArithmeticReductionCost(unsigned Opcode, Type *Ty,
1456 bool IsPairwiseForm) override {
1457 return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);
1458 }
1459 int getMinMaxReductionCost(Type *Ty, Type *CondTy,
1460 bool IsPairwiseForm, bool IsUnsigned) override {
1461 return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);
1462 }
1463 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,
1464 FastMathFlags FMF, unsigned ScalarizationCostPassed) override {
1465 return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,
1466 ScalarizationCostPassed);
1467 }
1468 int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
1469 ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {
1470 return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);
1471 }
1472 int getCallInstrCost(Function *F, Type *RetTy,
1473 ArrayRef<Type *> Tys) override {
1474 return Impl.getCallInstrCost(F, RetTy, Tys);
1475 }
1476 unsigned getNumberOfParts(Type *Tp) override {
1477 return Impl.getNumberOfParts(Tp);
1478 }
1479 int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,
1480 const SCEV *Ptr) override {
1481 return Impl.getAddressComputationCost(Ty, SE, Ptr);
1482 }
1483 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {
1484 return Impl.getCostOfKeepingLiveOverCall(Tys);
1485 }
1486 bool getTgtMemIntrinsic(IntrinsicInst *Inst,
1487 MemIntrinsicInfo &Info) override {
1488 return Impl.getTgtMemIntrinsic(Inst, Info);
1489 }
1490 unsigned getAtomicMemIntrinsicMaxElementSize() const override {
1491 return Impl.getAtomicMemIntrinsicMaxElementSize();
1492 }
1493 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
1494 Type *ExpectedType) override {
1495 return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);
1496 }
1497 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
1498 unsigned SrcAlign,
1499 unsigned DestAlign) const override {
1500 return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);
1501 }
1502 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
1503 LLVMContext &Context,
1504 unsigned RemainingBytes,
1505 unsigned SrcAlign,
1506 unsigned DestAlign) const override {
1507 Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,
1508 SrcAlign, DestAlign);
1509 }
1510 bool areInlineCompatible(const Function *Caller,
1511 const Function *Callee) const override {
1512 return Impl.areInlineCompatible(Caller, Callee);
1513 }
1514 bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {
1515 return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());
1516 }
1517 bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {
1518 return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());
1519 }
1520 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {
1521 return Impl.getLoadStoreVecRegBitWidth(AddrSpace);
1522 }
1523 bool isLegalToVectorizeLoad(LoadInst *LI) const override {
1524 return Impl.isLegalToVectorizeLoad(LI);
1525 }
1526 bool isLegalToVectorizeStore(StoreInst *SI) const override {
1527 return Impl.isLegalToVectorizeStore(SI);
1528 }
1529 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
1530 unsigned Alignment,
1531 unsigned AddrSpace) const override {
1532 return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,
1533 AddrSpace);
1534 }
1535 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
1536 unsigned Alignment,
1537 unsigned AddrSpace) const override {
1538 return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,
1539 AddrSpace);
1540 }
1541 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
1542 unsigned ChainSizeInBytes,
1543 VectorType *VecTy) const override {
1544 return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);
1545 }
1546 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
1547 unsigned ChainSizeInBytes,
1548 VectorType *VecTy) const override {
1549 return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);
1550 }
1551 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
1552 ReductionFlags Flags) const override {
1553 return Impl.useReductionIntrinsic(Opcode, Ty, Flags);
1554 }
1555 bool shouldExpandReduction(const IntrinsicInst *II) const override {
1556 return Impl.shouldExpandReduction(II);
1557 }
1558 int getInstructionLatency(const Instruction *I) override {
1559 return Impl.getInstructionLatency(I);
1560 }
1561};
1562
1563template <typename T>
1564TargetTransformInfo::TargetTransformInfo(T Impl)
1565 : TTIImpl(new Model<T>(Impl)) {}
1566
1567/// \brief Analysis pass providing the \c TargetTransformInfo.
1568///
1569/// The core idea of the TargetIRAnalysis is to expose an interface through
1570/// which LLVM targets can analyze and provide information about the middle
1571/// end's target-independent IR. This supports use cases such as target-aware
1572/// cost modeling of IR constructs.
1573///
1574/// This is a function analysis because much of the cost modeling for targets
1575/// is done in a subtarget specific way and LLVM supports compiling different
1576/// functions targeting different subtargets in order to support runtime
1577/// dispatch according to the observed subtarget.
1578class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {
1579public:
1580 typedef TargetTransformInfo Result;
1581
1582 /// \brief Default construct a target IR analysis.
1583 ///
1584 /// This will use the module's datalayout to construct a baseline
1585 /// conservative TTI result.
1586 TargetIRAnalysis();
1587
1588 /// \brief Construct an IR analysis pass around a target-provide callback.
1589 ///
1590 /// The callback will be called with a particular function for which the TTI
1591 /// is needed and must return a TTI object for that function.
1592 TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);
1593
1594 // Value semantics. We spell out the constructors for MSVC.
1595 TargetIRAnalysis(const TargetIRAnalysis &Arg)
1596 : TTICallback(Arg.TTICallback) {}
1597 TargetIRAnalysis(TargetIRAnalysis &&Arg)
1598 : TTICallback(std::move(Arg.TTICallback)) {}
1599 TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {
1600 TTICallback = RHS.TTICallback;
1601 return *this;
1602 }
1603 TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {
1604 TTICallback = std::move(RHS.TTICallback);
1605 return *this;
1606 }
1607
1608 Result run(const Function &F, FunctionAnalysisManager &);
1609
1610private:
1611 friend AnalysisInfoMixin<TargetIRAnalysis>;
1612 static AnalysisKey Key;
1613
1614 /// \brief The callback used to produce a result.
1615 ///
1616 /// We use a completely opaque callback so that targets can provide whatever
1617 /// mechanism they desire for constructing the TTI for a given function.
1618 ///
1619 /// FIXME: Should we really use std::function? It's relatively inefficient.
1620 /// It might be possible to arrange for even stateful callbacks to outlive
1621 /// the analysis and thus use a function_ref which would be lighter weight.
1622 /// This may also be less error prone as the callback is likely to reference
1623 /// the external TargetMachine, and that reference needs to never dangle.
1624 std::function<Result(const Function &)> TTICallback;
1625
1626 /// \brief Helper function used as the callback in the default constructor.
1627 static Result getDefaultTTI(const Function &F);
1628};
1629
1630/// \brief Wrapper pass for TargetTransformInfo.
1631///
1632/// This pass can be constructed from a TTI object which it stores internally
1633/// and is queried by passes.
1634class TargetTransformInfoWrapperPass : public ImmutablePass {
1635 TargetIRAnalysis TIRA;
1636 Optional<TargetTransformInfo> TTI;
1637
1638 virtual void anchor();
1639
1640public:
1641 static char ID;
1642
1643 /// \brief We must provide a default constructor for the pass but it should
1644 /// never be used.
1645 ///
1646 /// Use the constructor below or call one of the creation routines.
1647 TargetTransformInfoWrapperPass();
1648
1649 explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1650
1651 TargetTransformInfo &getTTI(const Function &F);
1652};
1653
1654/// \brief Create an analysis pass wrapper around a TTI object.
1655///
1656/// This analysis pass just holds the TTI instance and makes it available to
1657/// clients.
1658ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);
1659
1660} // End llvm namespace
1661
1662#endif