blob: a9383e795fca43d91163a31751d3bb62b725e7d4 [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001//===- TargetTransformInfoImpl.h --------------------------------*- C++ -*-===//
2//
Andrew Walbran16937d02019-10-22 13:54:20 +01003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01006//
7//===----------------------------------------------------------------------===//
8/// \file
9/// This file provides helpers for the implementation of
10/// a TargetTransformInfo-conforming class.
11///
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
15#define LLVM_ANALYSIS_TARGETTRANSFORMINFOIMPL_H
16
17#include "llvm/Analysis/ScalarEvolutionExpressions.h"
18#include "llvm/Analysis/TargetTransformInfo.h"
19#include "llvm/Analysis/VectorUtils.h"
20#include "llvm/IR/CallSite.h"
21#include "llvm/IR/DataLayout.h"
22#include "llvm/IR/Function.h"
23#include "llvm/IR/GetElementPtrTypeIterator.h"
24#include "llvm/IR/Operator.h"
25#include "llvm/IR/Type.h"
26
27namespace llvm {
28
Andrew Scullcdfcccc2018-10-05 20:58:37 +010029/// Base class for use as a mix-in that aids implementing
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010030/// a TargetTransformInfo-compatible class.
31class TargetTransformInfoImplBase {
32protected:
33 typedef TargetTransformInfo TTI;
34
35 const DataLayout &DL;
36
37 explicit TargetTransformInfoImplBase(const DataLayout &DL) : DL(DL) {}
38
39public:
40 // Provide value semantics. MSVC requires that we spell all of these out.
41 TargetTransformInfoImplBase(const TargetTransformInfoImplBase &Arg)
42 : DL(Arg.DL) {}
43 TargetTransformInfoImplBase(TargetTransformInfoImplBase &&Arg) : DL(Arg.DL) {}
44
45 const DataLayout &getDataLayout() const { return DL; }
46
47 unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) {
48 switch (Opcode) {
49 default:
50 // By default, just classify everything as 'basic'.
51 return TTI::TCC_Basic;
52
53 case Instruction::GetElementPtr:
54 llvm_unreachable("Use getGEPCost for GEP operations!");
55
56 case Instruction::BitCast:
57 assert(OpTy && "Cast instructions must provide the operand type");
58 if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy()))
59 // Identity and pointer-to-pointer casts are free.
60 return TTI::TCC_Free;
61
62 // Otherwise, the default basic cost is used.
63 return TTI::TCC_Basic;
64
65 case Instruction::FDiv:
66 case Instruction::FRem:
67 case Instruction::SDiv:
68 case Instruction::SRem:
69 case Instruction::UDiv:
70 case Instruction::URem:
71 return TTI::TCC_Expensive;
72
73 case Instruction::IntToPtr: {
74 // An inttoptr cast is free so long as the input is a legal integer type
75 // which doesn't contain values outside the range of a pointer.
76 unsigned OpSize = OpTy->getScalarSizeInBits();
77 if (DL.isLegalInteger(OpSize) &&
78 OpSize <= DL.getPointerTypeSizeInBits(Ty))
79 return TTI::TCC_Free;
80
81 // Otherwise it's not a no-op.
82 return TTI::TCC_Basic;
83 }
84 case Instruction::PtrToInt: {
85 // A ptrtoint cast is free so long as the result is large enough to store
86 // the pointer, and a legal integer type.
87 unsigned DestSize = Ty->getScalarSizeInBits();
88 if (DL.isLegalInteger(DestSize) &&
89 DestSize >= DL.getPointerTypeSizeInBits(OpTy))
90 return TTI::TCC_Free;
91
92 // Otherwise it's not a no-op.
93 return TTI::TCC_Basic;
94 }
95 case Instruction::Trunc:
96 // trunc to a native type is free (assuming the target has compare and
97 // shift-right of the same width).
98 if (DL.isLegalInteger(DL.getTypeSizeInBits(Ty)))
99 return TTI::TCC_Free;
100
101 return TTI::TCC_Basic;
102 }
103 }
104
105 int getGEPCost(Type *PointeeType, const Value *Ptr,
106 ArrayRef<const Value *> Operands) {
107 // In the basic model, we just assume that all-constant GEPs will be folded
108 // into their uses via addressing modes.
109 for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx)
110 if (!isa<Constant>(Operands[Idx]))
111 return TTI::TCC_Basic;
112
113 return TTI::TCC_Free;
114 }
115
116 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
117 unsigned &JTSize) {
118 JTSize = 0;
119 return SI.getNumCases();
120 }
121
122 int getExtCost(const Instruction *I, const Value *Src) {
123 return TTI::TCC_Basic;
124 }
125
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100126 unsigned getCallCost(FunctionType *FTy, int NumArgs, const User *U) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100127 assert(FTy && "FunctionType must be provided to this routine.");
128
129 // The target-independent implementation just measures the size of the
130 // function by approximating that each argument will take on average one
131 // instruction to prepare.
132
133 if (NumArgs < 0)
134 // Set the argument number to the number of explicit arguments in the
135 // function.
136 NumArgs = FTy->getNumParams();
137
138 return TTI::TCC_Basic * (NumArgs + 1);
139 }
140
141 unsigned getInliningThresholdMultiplier() { return 1; }
142
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100143 unsigned getMemcpyCost(const Instruction *I) {
144 return TTI::TCC_Expensive;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100145 }
146
147 bool hasBranchDivergence() { return false; }
148
149 bool isSourceOfDivergence(const Value *V) { return false; }
150
151 bool isAlwaysUniform(const Value *V) { return false; }
152
153 unsigned getFlatAddressSpace () {
154 return -1;
155 }
156
157 bool isLoweredToCall(const Function *F) {
158 assert(F && "A concrete function must be provided to this routine.");
159
160 // FIXME: These should almost certainly not be handled here, and instead
161 // handled with the help of TLI or the target itself. This was largely
162 // ported from existing analysis heuristics here so that such refactorings
163 // can take place in the future.
164
165 if (F->isIntrinsic())
166 return false;
167
168 if (F->hasLocalLinkage() || !F->hasName())
169 return true;
170
171 StringRef Name = F->getName();
172
173 // These will all likely lower to a single selection DAG node.
174 if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" ||
175 Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" ||
176 Name == "fmin" || Name == "fminf" || Name == "fminl" ||
177 Name == "fmax" || Name == "fmaxf" || Name == "fmaxl" ||
178 Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" ||
179 Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl")
180 return false;
181
182 // These are all likely to be optimized into something smaller.
183 if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" ||
184 Name == "exp2l" || Name == "exp2f" || Name == "floor" ||
185 Name == "floorf" || Name == "ceil" || Name == "round" ||
186 Name == "ffs" || Name == "ffsl" || Name == "abs" || Name == "labs" ||
187 Name == "llabs")
188 return false;
189
190 return true;
191 }
192
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100193 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
194 AssumptionCache &AC,
195 TargetLibraryInfo *LibInfo,
196 HardwareLoopInfo &HWLoopInfo) {
197 return false;
198 }
199
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100200 void getUnrollingPreferences(Loop *, ScalarEvolution &,
201 TTI::UnrollingPreferences &) {}
202
203 bool isLegalAddImmediate(int64_t Imm) { return false; }
204
205 bool isLegalICmpImmediate(int64_t Imm) { return false; }
206
207 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
208 bool HasBaseReg, int64_t Scale,
209 unsigned AddrSpace, Instruction *I = nullptr) {
210 // Guess that only reg and reg+reg addressing is allowed. This heuristic is
211 // taken from the implementation of LSR.
212 return !BaseGV && BaseOffset == 0 && (Scale == 0 || Scale == 1);
213 }
214
215 bool isLSRCostLess(TTI::LSRCost &C1, TTI::LSRCost &C2) {
216 return std::tie(C1.NumRegs, C1.AddRecCost, C1.NumIVMuls, C1.NumBaseAdds,
217 C1.ScaleCost, C1.ImmCost, C1.SetupCost) <
218 std::tie(C2.NumRegs, C2.AddRecCost, C2.NumIVMuls, C2.NumBaseAdds,
219 C2.ScaleCost, C2.ImmCost, C2.SetupCost);
220 }
221
222 bool canMacroFuseCmp() { return false; }
223
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100224 bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,
225 DominatorTree *DT, AssumptionCache *AC,
226 TargetLibraryInfo *LibInfo) {
227 return false;
228 }
229
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100230 bool shouldFavorPostInc() const { return false; }
231
Andrew Walbran16937d02019-10-22 13:54:20 +0100232 bool shouldFavorBackedgeIndex(const Loop *L) const { return false; }
233
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100234 bool isLegalMaskedStore(Type *DataType) { return false; }
235
236 bool isLegalMaskedLoad(Type *DataType) { return false; }
237
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100238 bool isLegalNTStore(Type *DataType, unsigned Alignment) {
239 // By default, assume nontemporal memory stores are available for stores
240 // that are aligned and have a size that is a power of 2.
241 unsigned DataSize = DL.getTypeStoreSize(DataType);
242 return Alignment >= DataSize && isPowerOf2_32(DataSize);
243 }
244
245 bool isLegalNTLoad(Type *DataType, unsigned Alignment) {
246 // By default, assume nontemporal memory loads are available for loads that
247 // are aligned and have a size that is a power of 2.
248 unsigned DataSize = DL.getTypeStoreSize(DataType);
249 return Alignment >= DataSize && isPowerOf2_32(DataSize);
250 }
251
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100252 bool isLegalMaskedScatter(Type *DataType) { return false; }
253
254 bool isLegalMaskedGather(Type *DataType) { return false; }
255
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100256 bool isLegalMaskedCompressStore(Type *DataType) { return false; }
257
258 bool isLegalMaskedExpandLoad(Type *DataType) { return false; }
259
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100260 bool hasDivRemOp(Type *DataType, bool IsSigned) { return false; }
261
262 bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) { return false; }
263
264 bool prefersVectorizedAddressing() { return true; }
265
266 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
267 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
268 // Guess that all legal addressing mode are free.
269 if (isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,
270 Scale, AddrSpace))
271 return 0;
272 return -1;
273 }
274
275 bool LSRWithInstrQueries() { return false; }
276
277 bool isTruncateFree(Type *Ty1, Type *Ty2) { return false; }
278
279 bool isProfitableToHoist(Instruction *I) { return true; }
280
281 bool useAA() { return false; }
282
283 bool isTypeLegal(Type *Ty) { return false; }
284
285 unsigned getJumpBufAlignment() { return 0; }
286
287 unsigned getJumpBufSize() { return 0; }
288
289 bool shouldBuildLookupTables() { return true; }
290 bool shouldBuildLookupTablesForConstant(Constant *C) { return true; }
291
292 bool useColdCCForColdCall(Function &F) { return false; }
293
294 unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) {
295 return 0;
296 }
297
298 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
299 unsigned VF) { return 0; }
300
301 bool supportsEfficientVectorElementLoadStore() { return false; }
302
303 bool enableAggressiveInterleaving(bool LoopHasReductions) { return false; }
304
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100305 TTI::MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,
306 bool IsZeroCmp) const {
307 return {};
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100308 }
309
310 bool enableInterleavedAccessVectorization() { return false; }
311
Andrew Walbran16937d02019-10-22 13:54:20 +0100312 bool enableMaskedInterleavedAccessVectorization() { return false; }
313
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100314 bool isFPVectorizationPotentiallyUnsafe() { return false; }
315
316 bool allowsMisalignedMemoryAccesses(LLVMContext &Context,
317 unsigned BitWidth,
318 unsigned AddressSpace,
319 unsigned Alignment,
320 bool *Fast) { return false; }
321
322 TTI::PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) {
323 return TTI::PSK_Software;
324 }
325
326 bool haveFastSqrt(Type *Ty) { return false; }
327
328 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) { return true; }
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100329
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100330 unsigned getFPOpCost(Type *Ty) { return TargetTransformInfo::TCC_Basic; }
331
332 int getIntImmCodeSizeCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
333 Type *Ty) {
334 return 0;
335 }
336
337 unsigned getIntImmCost(const APInt &Imm, Type *Ty) { return TTI::TCC_Basic; }
338
339 unsigned getIntImmCost(unsigned Opcode, unsigned Idx, const APInt &Imm,
340 Type *Ty) {
341 return TTI::TCC_Free;
342 }
343
344 unsigned getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,
345 Type *Ty) {
346 return TTI::TCC_Free;
347 }
348
349 unsigned getNumberOfRegisters(bool Vector) { return 8; }
350
351 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
352
353 unsigned getMinVectorRegisterBitWidth() { return 128; }
354
355 bool shouldMaximizeVectorBandwidth(bool OptSize) const { return false; }
356
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100357 unsigned getMinimumVF(unsigned ElemWidth) const { return 0; }
358
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100359 bool
360 shouldConsiderAddressTypePromotion(const Instruction &I,
361 bool &AllowPromotionWithoutCommonHeader) {
362 AllowPromotionWithoutCommonHeader = false;
363 return false;
364 }
365
366 unsigned getCacheLineSize() { return 0; }
367
368 llvm::Optional<unsigned> getCacheSize(TargetTransformInfo::CacheLevel Level) {
369 switch (Level) {
370 case TargetTransformInfo::CacheLevel::L1D:
371 LLVM_FALLTHROUGH;
372 case TargetTransformInfo::CacheLevel::L2D:
373 return llvm::Optional<unsigned>();
374 }
375
376 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
377 }
378
379 llvm::Optional<unsigned> getCacheAssociativity(
380 TargetTransformInfo::CacheLevel Level) {
381 switch (Level) {
382 case TargetTransformInfo::CacheLevel::L1D:
383 LLVM_FALLTHROUGH;
384 case TargetTransformInfo::CacheLevel::L2D:
385 return llvm::Optional<unsigned>();
386 }
387
388 llvm_unreachable("Unknown TargetTransformInfo::CacheLevel");
389 }
390
391 unsigned getPrefetchDistance() { return 0; }
392
393 unsigned getMinPrefetchStride() { return 1; }
394
395 unsigned getMaxPrefetchIterationsAhead() { return UINT_MAX; }
396
397 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
398
399 unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,
400 TTI::OperandValueKind Opd1Info,
401 TTI::OperandValueKind Opd2Info,
402 TTI::OperandValueProperties Opd1PropInfo,
403 TTI::OperandValueProperties Opd2PropInfo,
404 ArrayRef<const Value *> Args) {
405 return 1;
406 }
407
408 unsigned getShuffleCost(TTI::ShuffleKind Kind, Type *Ty, int Index,
409 Type *SubTp) {
410 return 1;
411 }
412
413 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
414 const Instruction *I) { return 1; }
415
416 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
417 VectorType *VecTy, unsigned Index) {
418 return 1;
419 }
420
421 unsigned getCFInstrCost(unsigned Opcode) { return 1; }
422
423 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
424 const Instruction *I) {
425 return 1;
426 }
427
428 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
429 return 1;
430 }
431
432 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
433 unsigned AddressSpace, const Instruction *I) {
434 return 1;
435 }
436
437 unsigned getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,
438 unsigned AddressSpace) {
439 return 1;
440 }
441
442 unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,
443 bool VariableMask,
444 unsigned Alignment) {
445 return 1;
446 }
447
448 unsigned getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,
449 unsigned Factor,
450 ArrayRef<unsigned> Indices,
Andrew Walbran16937d02019-10-22 13:54:20 +0100451 unsigned Alignment, unsigned AddressSpace,
452 bool UseMaskForCond = false,
453 bool UseMaskForGaps = false) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100454 return 1;
455 }
456
457 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
458 ArrayRef<Type *> Tys, FastMathFlags FMF,
459 unsigned ScalarizationCostPassed) {
460 return 1;
461 }
462 unsigned getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,
463 ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) {
464 return 1;
465 }
466
467 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) {
468 return 1;
469 }
470
471 unsigned getNumberOfParts(Type *Tp) { return 0; }
472
473 unsigned getAddressComputationCost(Type *Tp, ScalarEvolution *,
474 const SCEV *) {
475 return 0;
476 }
477
478 unsigned getArithmeticReductionCost(unsigned, Type *, bool) { return 1; }
479
480 unsigned getMinMaxReductionCost(Type *, Type *, bool, bool) { return 1; }
481
482 unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) { return 0; }
483
484 bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) {
485 return false;
486 }
487
488 unsigned getAtomicMemIntrinsicMaxElementSize() const {
489 // Note for overrides: You must ensure for all element unordered-atomic
490 // memory intrinsics that all power-of-2 element sizes up to, and
491 // including, the return value of this method have a corresponding
492 // runtime lib call. These runtime lib call definitions can be found
493 // in RuntimeLibcalls.h
494 return 0;
495 }
496
497 Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,
498 Type *ExpectedType) {
499 return nullptr;
500 }
501
502 Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,
503 unsigned SrcAlign, unsigned DestAlign) const {
504 return Type::getInt8Ty(Context);
505 }
506
507 void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,
508 LLVMContext &Context,
509 unsigned RemainingBytes,
510 unsigned SrcAlign,
511 unsigned DestAlign) const {
512 for (unsigned i = 0; i != RemainingBytes; ++i)
513 OpsOut.push_back(Type::getInt8Ty(Context));
514 }
515
516 bool areInlineCompatible(const Function *Caller,
517 const Function *Callee) const {
518 return (Caller->getFnAttribute("target-cpu") ==
519 Callee->getFnAttribute("target-cpu")) &&
520 (Caller->getFnAttribute("target-features") ==
521 Callee->getFnAttribute("target-features"));
522 }
523
Andrew Walbran16937d02019-10-22 13:54:20 +0100524 bool areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,
525 SmallPtrSetImpl<Argument *> &Args) const {
526 return (Caller->getFnAttribute("target-cpu") ==
527 Callee->getFnAttribute("target-cpu")) &&
528 (Caller->getFnAttribute("target-features") ==
529 Callee->getFnAttribute("target-features"));
530 }
531
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100532 bool isIndexedLoadLegal(TTI::MemIndexedMode Mode, Type *Ty,
533 const DataLayout &DL) const {
534 return false;
535 }
536
537 bool isIndexedStoreLegal(TTI::MemIndexedMode Mode, Type *Ty,
538 const DataLayout &DL) const {
539 return false;
540 }
541
542 unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const { return 128; }
543
544 bool isLegalToVectorizeLoad(LoadInst *LI) const { return true; }
545
546 bool isLegalToVectorizeStore(StoreInst *SI) const { return true; }
547
548 bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,
549 unsigned Alignment,
550 unsigned AddrSpace) const {
551 return true;
552 }
553
554 bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,
555 unsigned Alignment,
556 unsigned AddrSpace) const {
557 return true;
558 }
559
560 unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,
561 unsigned ChainSizeInBytes,
562 VectorType *VecTy) const {
563 return VF;
564 }
565
566 unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,
567 unsigned ChainSizeInBytes,
568 VectorType *VecTy) const {
569 return VF;
570 }
571
572 bool useReductionIntrinsic(unsigned Opcode, Type *Ty,
573 TTI::ReductionFlags Flags) const {
574 return false;
575 }
576
577 bool shouldExpandReduction(const IntrinsicInst *II) const {
578 return true;
579 }
580
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100581 unsigned getGISelRematGlobalCost() const {
582 return 1;
583 }
584
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100585protected:
586 // Obtain the minimum required size to hold the value (without the sign)
587 // In case of a vector it returns the min required size for one element.
588 unsigned minRequiredElementSize(const Value* Val, bool &isSigned) {
589 if (isa<ConstantDataVector>(Val) || isa<ConstantVector>(Val)) {
590 const auto* VectorValue = cast<Constant>(Val);
591
592 // In case of a vector need to pick the max between the min
593 // required size for each element
594 auto *VT = cast<VectorType>(Val->getType());
595
596 // Assume unsigned elements
597 isSigned = false;
598
599 // The max required size is the total vector width divided by num
600 // of elements in the vector
601 unsigned MaxRequiredSize = VT->getBitWidth() / VT->getNumElements();
602
603 unsigned MinRequiredSize = 0;
604 for(unsigned i = 0, e = VT->getNumElements(); i < e; ++i) {
605 if (auto* IntElement =
606 dyn_cast<ConstantInt>(VectorValue->getAggregateElement(i))) {
607 bool signedElement = IntElement->getValue().isNegative();
608 // Get the element min required size.
609 unsigned ElementMinRequiredSize =
610 IntElement->getValue().getMinSignedBits() - 1;
611 // In case one element is signed then all the vector is signed.
612 isSigned |= signedElement;
613 // Save the max required bit size between all the elements.
614 MinRequiredSize = std::max(MinRequiredSize, ElementMinRequiredSize);
615 }
616 else {
617 // not an int constant element
618 return MaxRequiredSize;
619 }
620 }
621 return MinRequiredSize;
622 }
623
624 if (const auto* CI = dyn_cast<ConstantInt>(Val)) {
625 isSigned = CI->getValue().isNegative();
626 return CI->getValue().getMinSignedBits() - 1;
627 }
628
629 if (const auto* Cast = dyn_cast<SExtInst>(Val)) {
630 isSigned = true;
631 return Cast->getSrcTy()->getScalarSizeInBits() - 1;
632 }
633
634 if (const auto* Cast = dyn_cast<ZExtInst>(Val)) {
635 isSigned = false;
636 return Cast->getSrcTy()->getScalarSizeInBits();
637 }
638
639 isSigned = false;
640 return Val->getType()->getScalarSizeInBits();
641 }
642
643 bool isStridedAccess(const SCEV *Ptr) {
644 return Ptr && isa<SCEVAddRecExpr>(Ptr);
645 }
646
647 const SCEVConstant *getConstantStrideStep(ScalarEvolution *SE,
648 const SCEV *Ptr) {
649 if (!isStridedAccess(Ptr))
650 return nullptr;
651 const SCEVAddRecExpr *AddRec = cast<SCEVAddRecExpr>(Ptr);
652 return dyn_cast<SCEVConstant>(AddRec->getStepRecurrence(*SE));
653 }
654
655 bool isConstantStridedAccessLessThan(ScalarEvolution *SE, const SCEV *Ptr,
656 int64_t MergeDistance) {
657 const SCEVConstant *Step = getConstantStrideStep(SE, Ptr);
658 if (!Step)
659 return false;
660 APInt StrideVal = Step->getAPInt();
661 if (StrideVal.getBitWidth() > 64)
662 return false;
663 // FIXME: Need to take absolute value for negative stride case.
664 return StrideVal.getSExtValue() < MergeDistance;
665 }
666};
667
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100668/// CRTP base class for use as a mix-in that aids implementing
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100669/// a TargetTransformInfo-compatible class.
670template <typename T>
671class TargetTransformInfoImplCRTPBase : public TargetTransformInfoImplBase {
672private:
673 typedef TargetTransformInfoImplBase BaseT;
674
675protected:
676 explicit TargetTransformInfoImplCRTPBase(const DataLayout &DL) : BaseT(DL) {}
677
678public:
679 using BaseT::getCallCost;
680
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100681 unsigned getCallCost(const Function *F, int NumArgs, const User *U) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100682 assert(F && "A concrete function must be provided to this routine.");
683
684 if (NumArgs < 0)
685 // Set the argument number to the number of explicit arguments in the
686 // function.
687 NumArgs = F->arg_size();
688
689 if (Intrinsic::ID IID = F->getIntrinsicID()) {
690 FunctionType *FTy = F->getFunctionType();
691 SmallVector<Type *, 8> ParamTys(FTy->param_begin(), FTy->param_end());
692 return static_cast<T *>(this)
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100693 ->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys, U);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100694 }
695
696 if (!static_cast<T *>(this)->isLoweredToCall(F))
697 return TTI::TCC_Basic; // Give a basic cost if it will be lowered
698 // directly.
699
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100700 return static_cast<T *>(this)->getCallCost(F->getFunctionType(), NumArgs, U);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100701 }
702
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100703 unsigned getCallCost(const Function *F, ArrayRef<const Value *> Arguments,
704 const User *U) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100705 // Simply delegate to generic handling of the call.
706 // FIXME: We should use instsimplify or something else to catch calls which
707 // will constant fold with these arguments.
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100708 return static_cast<T *>(this)->getCallCost(F, Arguments.size(), U);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100709 }
710
711 using BaseT::getGEPCost;
712
713 int getGEPCost(Type *PointeeType, const Value *Ptr,
714 ArrayRef<const Value *> Operands) {
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100715 assert(PointeeType && Ptr && "can't get GEPCost of nullptr");
716 // TODO: will remove this when pointers have an opaque type.
717 assert(Ptr->getType()->getScalarType()->getPointerElementType() ==
718 PointeeType &&
719 "explicit pointee type doesn't match operand's pointee type");
720 auto *BaseGV = dyn_cast<GlobalValue>(Ptr->stripPointerCasts());
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100721 bool HasBaseReg = (BaseGV == nullptr);
722
723 auto PtrSizeBits = DL.getPointerTypeSizeInBits(Ptr->getType());
724 APInt BaseOffset(PtrSizeBits, 0);
725 int64_t Scale = 0;
726
727 auto GTI = gep_type_begin(PointeeType, Operands);
728 Type *TargetType = nullptr;
729
730 // Handle the case where the GEP instruction has a single operand,
731 // the basis, therefore TargetType is a nullptr.
732 if (Operands.empty())
733 return !BaseGV ? TTI::TCC_Free : TTI::TCC_Basic;
734
735 for (auto I = Operands.begin(); I != Operands.end(); ++I, ++GTI) {
736 TargetType = GTI.getIndexedType();
737 // We assume that the cost of Scalar GEP with constant index and the
738 // cost of Vector GEP with splat constant index are the same.
739 const ConstantInt *ConstIdx = dyn_cast<ConstantInt>(*I);
740 if (!ConstIdx)
741 if (auto Splat = getSplatValue(*I))
742 ConstIdx = dyn_cast<ConstantInt>(Splat);
743 if (StructType *STy = GTI.getStructTypeOrNull()) {
744 // For structures the index is always splat or scalar constant
745 assert(ConstIdx && "Unexpected GEP index");
746 uint64_t Field = ConstIdx->getZExtValue();
747 BaseOffset += DL.getStructLayout(STy)->getElementOffset(Field);
748 } else {
749 int64_t ElementSize = DL.getTypeAllocSize(GTI.getIndexedType());
750 if (ConstIdx) {
751 BaseOffset +=
752 ConstIdx->getValue().sextOrTrunc(PtrSizeBits) * ElementSize;
753 } else {
754 // Needs scale register.
755 if (Scale != 0)
756 // No addressing mode takes two scale registers.
757 return TTI::TCC_Basic;
758 Scale = ElementSize;
759 }
760 }
761 }
762
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100763 if (static_cast<T *>(this)->isLegalAddressingMode(
764 TargetType, const_cast<GlobalValue *>(BaseGV),
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100765 BaseOffset.sextOrTrunc(64).getSExtValue(), HasBaseReg, Scale,
766 Ptr->getType()->getPointerAddressSpace()))
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100767 return TTI::TCC_Free;
768 return TTI::TCC_Basic;
769 }
770
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100771 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
772 ArrayRef<Type *> ParamTys, const User *U) {
773 switch (IID) {
774 default:
775 // Intrinsics rarely (if ever) have normal argument setup constraints.
776 // Model them as having a basic instruction cost.
777 return TTI::TCC_Basic;
778
779 // TODO: other libc intrinsics.
780 case Intrinsic::memcpy:
781 return static_cast<T *>(this)->getMemcpyCost(dyn_cast<Instruction>(U));
782
783 case Intrinsic::annotation:
784 case Intrinsic::assume:
785 case Intrinsic::sideeffect:
786 case Intrinsic::dbg_declare:
787 case Intrinsic::dbg_value:
788 case Intrinsic::dbg_label:
789 case Intrinsic::invariant_start:
790 case Intrinsic::invariant_end:
791 case Intrinsic::launder_invariant_group:
792 case Intrinsic::strip_invariant_group:
793 case Intrinsic::is_constant:
794 case Intrinsic::lifetime_start:
795 case Intrinsic::lifetime_end:
796 case Intrinsic::objectsize:
797 case Intrinsic::ptr_annotation:
798 case Intrinsic::var_annotation:
799 case Intrinsic::experimental_gc_result:
800 case Intrinsic::experimental_gc_relocate:
801 case Intrinsic::coro_alloc:
802 case Intrinsic::coro_begin:
803 case Intrinsic::coro_free:
804 case Intrinsic::coro_end:
805 case Intrinsic::coro_frame:
806 case Intrinsic::coro_size:
807 case Intrinsic::coro_suspend:
808 case Intrinsic::coro_param:
809 case Intrinsic::coro_subfn_addr:
810 // These intrinsics don't actually represent code after lowering.
811 return TTI::TCC_Free;
812 }
813 }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100814
815 unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100816 ArrayRef<const Value *> Arguments, const User *U) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100817 // Delegate to the generic intrinsic handling code. This mostly provides an
818 // opportunity for targets to (for example) special case the cost of
819 // certain intrinsics based on constants used as arguments.
820 SmallVector<Type *, 8> ParamTys;
821 ParamTys.reserve(Arguments.size());
822 for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx)
823 ParamTys.push_back(Arguments[Idx]->getType());
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100824 return static_cast<T *>(this)->getIntrinsicCost(IID, RetTy, ParamTys, U);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100825 }
826
827 unsigned getUserCost(const User *U, ArrayRef<const Value *> Operands) {
828 if (isa<PHINode>(U))
829 return TTI::TCC_Free; // Model all PHI nodes as free.
830
831 // Static alloca doesn't generate target instructions.
832 if (auto *A = dyn_cast<AllocaInst>(U))
833 if (A->isStaticAlloca())
834 return TTI::TCC_Free;
835
836 if (const GEPOperator *GEP = dyn_cast<GEPOperator>(U)) {
837 return static_cast<T *>(this)->getGEPCost(GEP->getSourceElementType(),
838 GEP->getPointerOperand(),
839 Operands.drop_front());
840 }
841
842 if (auto CS = ImmutableCallSite(U)) {
843 const Function *F = CS.getCalledFunction();
844 if (!F) {
845 // Just use the called value type.
846 Type *FTy = CS.getCalledValue()->getType()->getPointerElementType();
847 return static_cast<T *>(this)
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100848 ->getCallCost(cast<FunctionType>(FTy), CS.arg_size(), U);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100849 }
850
851 SmallVector<const Value *, 8> Arguments(CS.arg_begin(), CS.arg_end());
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100852 return static_cast<T *>(this)->getCallCost(F, Arguments, U);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100853 }
854
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100855 if (isa<SExtInst>(U) || isa<ZExtInst>(U) || isa<FPExtInst>(U))
856 // The old behaviour of generally treating extensions of icmp to be free
857 // has been removed. A target that needs it should override getUserCost().
858 return static_cast<T *>(this)->getExtCost(cast<Instruction>(U),
859 Operands.back());
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100860
861 return static_cast<T *>(this)->getOperationCost(
862 Operator::getOpcode(U), U->getType(),
863 U->getNumOperands() == 1 ? U->getOperand(0)->getType() : nullptr);
864 }
865
866 int getInstructionLatency(const Instruction *I) {
867 SmallVector<const Value *, 4> Operands(I->value_op_begin(),
868 I->value_op_end());
869 if (getUserCost(I, Operands) == TTI::TCC_Free)
870 return 0;
871
872 if (isa<LoadInst>(I))
873 return 4;
874
875 Type *DstTy = I->getType();
876
877 // Usually an intrinsic is a simple instruction.
878 // A real function call is much slower.
879 if (auto *CI = dyn_cast<CallInst>(I)) {
880 const Function *F = CI->getCalledFunction();
881 if (!F || static_cast<T *>(this)->isLoweredToCall(F))
882 return 40;
883 // Some intrinsics return a value and a flag, we use the value type
884 // to decide its latency.
885 if (StructType* StructTy = dyn_cast<StructType>(DstTy))
886 DstTy = StructTy->getElementType(0);
887 // Fall through to simple instructions.
888 }
889
890 if (VectorType *VectorTy = dyn_cast<VectorType>(DstTy))
891 DstTy = VectorTy->getElementType();
892 if (DstTy->isFloatingPointTy())
893 return 3;
894
895 return 1;
896 }
897};
898}
899
900#endif