blob: 9776c20400d6fdc367541c34494f7c9a61a1772d [file] [log] [blame]
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001//===- BasicTTIImpl.h -------------------------------------------*- C++ -*-===//
2//
Andrew Walbran16937d02019-10-22 13:54:20 +01003// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01006//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// This file provides a helper that implements much of the TTI interface in
11/// terms of the target-independent code generator and TargetLowering
12/// interfaces.
13//
14//===----------------------------------------------------------------------===//
15
16#ifndef LLVM_CODEGEN_BASICTTIIMPL_H
17#define LLVM_CODEGEN_BASICTTIIMPL_H
18
19#include "llvm/ADT/APInt.h"
20#include "llvm/ADT/ArrayRef.h"
21#include "llvm/ADT/BitVector.h"
22#include "llvm/ADT/SmallPtrSet.h"
23#include "llvm/ADT/SmallVector.h"
24#include "llvm/Analysis/LoopInfo.h"
25#include "llvm/Analysis/TargetTransformInfo.h"
26#include "llvm/Analysis/TargetTransformInfoImpl.h"
27#include "llvm/CodeGen/ISDOpcodes.h"
28#include "llvm/CodeGen/TargetLowering.h"
29#include "llvm/CodeGen/TargetSubtargetInfo.h"
30#include "llvm/CodeGen/ValueTypes.h"
31#include "llvm/IR/BasicBlock.h"
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010032#include "llvm/IR/Constant.h"
33#include "llvm/IR/Constants.h"
34#include "llvm/IR/DataLayout.h"
35#include "llvm/IR/DerivedTypes.h"
36#include "llvm/IR/InstrTypes.h"
37#include "llvm/IR/Instruction.h"
38#include "llvm/IR/Instructions.h"
39#include "llvm/IR/Intrinsics.h"
40#include "llvm/IR/Operator.h"
41#include "llvm/IR/Type.h"
42#include "llvm/IR/Value.h"
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010043#include "llvm/Support/Casting.h"
44#include "llvm/Support/CommandLine.h"
45#include "llvm/Support/ErrorHandling.h"
46#include "llvm/Support/MachineValueType.h"
47#include "llvm/Support/MathExtras.h"
48#include <algorithm>
49#include <cassert>
50#include <cstdint>
51#include <limits>
52#include <utility>
53
54namespace llvm {
55
56class Function;
57class GlobalValue;
58class LLVMContext;
59class ScalarEvolution;
60class SCEV;
61class TargetMachine;
62
63extern cl::opt<unsigned> PartialUnrollingThreshold;
64
Andrew Scullcdfcccc2018-10-05 20:58:37 +010065/// Base class which can be used to help build a TTI implementation.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010066///
67/// This class provides as much implementation of the TTI interface as is
68/// possible using the target independent parts of the code generator.
69///
70/// In order to subclass it, your class must implement a getST() method to
71/// return the subtarget, and a getTLI() method to return the target lowering.
72/// We need these methods implemented in the derived class so that this class
73/// doesn't have to duplicate storage for them.
74template <typename T>
75class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase<T> {
76private:
77 using BaseT = TargetTransformInfoImplCRTPBase<T>;
78 using TTI = TargetTransformInfo;
79
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020080 /// Helper function to access this as a T.
81 T *thisT() { return static_cast<T *>(this); }
82
Andrew Walbran16937d02019-10-22 13:54:20 +010083 /// Estimate a cost of Broadcast as an extract and sequence of insert
84 /// operations.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020085 unsigned getBroadcastShuffleOverhead(FixedVectorType *VTy) {
Andrew Walbran16937d02019-10-22 13:54:20 +010086 unsigned Cost = 0;
87 // Broadcast cost is equal to the cost of extracting the zero'th element
88 // plus the cost of inserting it into every element of the result vector.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020089 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, 0);
Andrew Walbran16937d02019-10-22 13:54:20 +010090
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020091 for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
92 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
Andrew Walbran16937d02019-10-22 13:54:20 +010093 }
94 return Cost;
95 }
96
Andrew Scull5e1ddfa2018-08-14 10:06:54 +010097 /// Estimate a cost of shuffle as a sequence of extract and insert
98 /// operations.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +020099 unsigned getPermuteShuffleOverhead(FixedVectorType *VTy) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100100 unsigned Cost = 0;
101 // Shuffle cost is equal to the cost of extracting element from its argument
102 // plus the cost of inserting them onto the result vector.
103
104 // e.g. <4 x float> has a mask of <0,5,2,7> i.e we need to extract from
105 // index 0 of first vector, index 1 of second vector,index 2 of first
106 // vector and finally index 3 of second vector and insert them at index
107 // <0,1,2,3> of result vector.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200108 for (int i = 0, e = VTy->getNumElements(); i < e; ++i) {
109 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy, i);
110 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy, i);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100111 }
112 return Cost;
113 }
114
Andrew Walbran16937d02019-10-22 13:54:20 +0100115 /// Estimate a cost of subvector extraction as a sequence of extract and
116 /// insert operations.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200117 unsigned getExtractSubvectorOverhead(VectorType *VTy, int Index,
118 FixedVectorType *SubVTy) {
119 assert(VTy && SubVTy &&
Andrew Walbran16937d02019-10-22 13:54:20 +0100120 "Can only extract subvectors from vectors");
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200121 int NumSubElts = SubVTy->getNumElements();
122 assert((!isa<FixedVectorType>(VTy) ||
123 (Index + NumSubElts) <=
124 (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
Andrew Walbran16937d02019-10-22 13:54:20 +0100125 "SK_ExtractSubvector index out of range");
126
127 unsigned Cost = 0;
128 // Subvector extraction cost is equal to the cost of extracting element from
129 // the source type plus the cost of inserting them into the result vector
130 // type.
131 for (int i = 0; i != NumSubElts; ++i) {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200132 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VTy,
133 i + Index);
134 Cost +=
135 thisT()->getVectorInstrCost(Instruction::InsertElement, SubVTy, i);
Andrew Walbran16937d02019-10-22 13:54:20 +0100136 }
137 return Cost;
138 }
139
140 /// Estimate a cost of subvector insertion as a sequence of extract and
141 /// insert operations.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200142 unsigned getInsertSubvectorOverhead(VectorType *VTy, int Index,
143 FixedVectorType *SubVTy) {
144 assert(VTy && SubVTy &&
Andrew Walbran16937d02019-10-22 13:54:20 +0100145 "Can only insert subvectors into vectors");
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200146 int NumSubElts = SubVTy->getNumElements();
147 assert((!isa<FixedVectorType>(VTy) ||
148 (Index + NumSubElts) <=
149 (int)cast<FixedVectorType>(VTy)->getNumElements()) &&
Andrew Walbran16937d02019-10-22 13:54:20 +0100150 "SK_InsertSubvector index out of range");
151
152 unsigned Cost = 0;
153 // Subvector insertion cost is equal to the cost of extracting element from
154 // the source type plus the cost of inserting them into the result vector
155 // type.
156 for (int i = 0; i != NumSubElts; ++i) {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200157 Cost +=
158 thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVTy, i);
159 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, VTy,
160 i + Index);
Andrew Walbran16937d02019-10-22 13:54:20 +0100161 }
162 return Cost;
163 }
164
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100165 /// Local query method delegates up to T which *must* implement this!
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100166 const TargetSubtargetInfo *getST() const {
167 return static_cast<const T *>(this)->getST();
168 }
169
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100170 /// Local query method delegates up to T which *must* implement this!
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100171 const TargetLoweringBase *getTLI() const {
172 return static_cast<const T *>(this)->getTLI();
173 }
174
175 static ISD::MemIndexedMode getISDIndexedMode(TTI::MemIndexedMode M) {
176 switch (M) {
177 case TTI::MIM_Unindexed:
178 return ISD::UNINDEXED;
179 case TTI::MIM_PreInc:
180 return ISD::PRE_INC;
181 case TTI::MIM_PreDec:
182 return ISD::PRE_DEC;
183 case TTI::MIM_PostInc:
184 return ISD::POST_INC;
185 case TTI::MIM_PostDec:
186 return ISD::POST_DEC;
187 }
188 llvm_unreachable("Unexpected MemIndexedMode");
189 }
190
191protected:
192 explicit BasicTTIImplBase(const TargetMachine *TM, const DataLayout &DL)
193 : BaseT(DL) {}
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200194 virtual ~BasicTTIImplBase() = default;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100195
196 using TargetTransformInfoImplBase::DL;
197
198public:
199 /// \name Scalar TTI Implementations
200 /// @{
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100201 bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,
202 unsigned AddressSpace, unsigned Alignment,
203 bool *Fast) const {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100204 EVT E = EVT::getIntegerVT(Context, BitWidth);
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100205 return getTLI()->allowsMisalignedMemoryAccesses(
206 E, AddressSpace, Alignment, MachineMemOperand::MONone, Fast);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100207 }
208
209 bool hasBranchDivergence() { return false; }
210
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200211 bool useGPUDivergenceAnalysis() { return false; }
212
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100213 bool isSourceOfDivergence(const Value *V) { return false; }
214
215 bool isAlwaysUniform(const Value *V) { return false; }
216
217 unsigned getFlatAddressSpace() {
218 // Return an invalid address space.
219 return -1;
220 }
221
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200222 bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,
223 Intrinsic::ID IID) const {
224 return false;
225 }
226
227 bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const {
228 return getTLI()->getTargetMachine().isNoopAddrSpaceCast(FromAS, ToAS);
229 }
230
231 unsigned getAssumedAddrSpace(const Value *V) const {
232 return getTLI()->getTargetMachine().getAssumedAddrSpace(V);
233 }
234
235 Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,
236 Value *NewV) const {
237 return nullptr;
238 }
239
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100240 bool isLegalAddImmediate(int64_t imm) {
241 return getTLI()->isLegalAddImmediate(imm);
242 }
243
244 bool isLegalICmpImmediate(int64_t imm) {
245 return getTLI()->isLegalICmpImmediate(imm);
246 }
247
248 bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
249 bool HasBaseReg, int64_t Scale,
250 unsigned AddrSpace, Instruction *I = nullptr) {
251 TargetLoweringBase::AddrMode AM;
252 AM.BaseGV = BaseGV;
253 AM.BaseOffs = BaseOffset;
254 AM.HasBaseReg = HasBaseReg;
255 AM.Scale = Scale;
256 return getTLI()->isLegalAddressingMode(DL, AM, Ty, AddrSpace, I);
257 }
258
259 bool isIndexedLoadLegal(TTI::MemIndexedMode M, Type *Ty,
260 const DataLayout &DL) const {
261 EVT VT = getTLI()->getValueType(DL, Ty);
262 return getTLI()->isIndexedLoadLegal(getISDIndexedMode(M), VT);
263 }
264
265 bool isIndexedStoreLegal(TTI::MemIndexedMode M, Type *Ty,
266 const DataLayout &DL) const {
267 EVT VT = getTLI()->getValueType(DL, Ty);
268 return getTLI()->isIndexedStoreLegal(getISDIndexedMode(M), VT);
269 }
270
271 bool isLSRCostLess(TTI::LSRCost C1, TTI::LSRCost C2) {
272 return TargetTransformInfoImplBase::isLSRCostLess(C1, C2);
273 }
274
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200275 bool isNumRegsMajorCostOfLSR() {
276 return TargetTransformInfoImplBase::isNumRegsMajorCostOfLSR();
277 }
278
279 bool isProfitableLSRChainElement(Instruction *I) {
280 return TargetTransformInfoImplBase::isProfitableLSRChainElement(I);
281 }
282
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100283 int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,
284 bool HasBaseReg, int64_t Scale, unsigned AddrSpace) {
285 TargetLoweringBase::AddrMode AM;
286 AM.BaseGV = BaseGV;
287 AM.BaseOffs = BaseOffset;
288 AM.HasBaseReg = HasBaseReg;
289 AM.Scale = Scale;
290 return getTLI()->getScalingFactorCost(DL, AM, Ty, AddrSpace);
291 }
292
293 bool isTruncateFree(Type *Ty1, Type *Ty2) {
294 return getTLI()->isTruncateFree(Ty1, Ty2);
295 }
296
297 bool isProfitableToHoist(Instruction *I) {
298 return getTLI()->isProfitableToHoist(I);
299 }
300
301 bool useAA() const { return getST()->useAA(); }
302
303 bool isTypeLegal(Type *Ty) {
304 EVT VT = getTLI()->getValueType(DL, Ty);
305 return getTLI()->isTypeLegal(VT);
306 }
307
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200308 unsigned getRegUsageForType(Type *Ty) {
309 return getTLI()->getTypeLegalizationCost(DL, Ty).first;
310 }
311
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100312 int getGEPCost(Type *PointeeType, const Value *Ptr,
313 ArrayRef<const Value *> Operands) {
314 return BaseT::getGEPCost(PointeeType, Ptr, Operands);
315 }
316
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100317 unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200318 unsigned &JumpTableSize,
319 ProfileSummaryInfo *PSI,
320 BlockFrequencyInfo *BFI) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100321 /// Try to find the estimated number of clusters. Note that the number of
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200322 /// clusters identified in this function could be different from the actual
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100323 /// numbers found in lowering. This function ignore switches that are
324 /// lowered with a mix of jump table / bit test / BTree. This function was
325 /// initially intended to be used when estimating the cost of switch in
326 /// inline cost heuristic, but it's a generic cost model to be used in other
327 /// places (e.g., in loop unrolling).
328 unsigned N = SI.getNumCases();
329 const TargetLoweringBase *TLI = getTLI();
330 const DataLayout &DL = this->getDataLayout();
331
332 JumpTableSize = 0;
333 bool IsJTAllowed = TLI->areJTsAllowed(SI.getParent()->getParent());
334
335 // Early exit if both a jump table and bit test are not allowed.
336 if (N < 1 || (!IsJTAllowed && DL.getIndexSizeInBits(0u) < N))
337 return N;
338
339 APInt MaxCaseVal = SI.case_begin()->getCaseValue()->getValue();
340 APInt MinCaseVal = MaxCaseVal;
341 for (auto CI : SI.cases()) {
342 const APInt &CaseVal = CI.getCaseValue()->getValue();
343 if (CaseVal.sgt(MaxCaseVal))
344 MaxCaseVal = CaseVal;
345 if (CaseVal.slt(MinCaseVal))
346 MinCaseVal = CaseVal;
347 }
348
349 // Check if suitable for a bit test
350 if (N <= DL.getIndexSizeInBits(0u)) {
351 SmallPtrSet<const BasicBlock *, 4> Dests;
352 for (auto I : SI.cases())
353 Dests.insert(I.getCaseSuccessor());
354
355 if (TLI->isSuitableForBitTests(Dests.size(), N, MinCaseVal, MaxCaseVal,
356 DL))
357 return 1;
358 }
359
360 // Check if suitable for a jump table.
361 if (IsJTAllowed) {
362 if (N < 2 || N < TLI->getMinimumJumpTableEntries())
363 return N;
364 uint64_t Range =
365 (MaxCaseVal - MinCaseVal)
366 .getLimitedValue(std::numeric_limits<uint64_t>::max() - 1) + 1;
367 // Check whether a range of clusters is dense enough for a jump table
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200368 if (TLI->isSuitableForJumpTable(&SI, N, Range, PSI, BFI)) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100369 JumpTableSize = Range;
370 return 1;
371 }
372 }
373 return N;
374 }
375
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100376 bool shouldBuildLookupTables() {
377 const TargetLoweringBase *TLI = getTLI();
378 return TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) ||
379 TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other);
380 }
381
382 bool haveFastSqrt(Type *Ty) {
383 const TargetLoweringBase *TLI = getTLI();
384 EVT VT = TLI->getValueType(DL, Ty);
385 return TLI->isTypeLegal(VT) &&
386 TLI->isOperationLegalOrCustom(ISD::FSQRT, VT);
387 }
388
389 bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) {
390 return true;
391 }
392
393 unsigned getFPOpCost(Type *Ty) {
394 // Check whether FADD is available, as a proxy for floating-point in
395 // general.
396 const TargetLoweringBase *TLI = getTLI();
397 EVT VT = TLI->getValueType(DL, Ty);
398 if (TLI->isOperationLegalOrCustomOrPromote(ISD::FADD, VT))
399 return TargetTransformInfo::TCC_Basic;
400 return TargetTransformInfo::TCC_Expensive;
401 }
402
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100403 unsigned getInliningThresholdMultiplier() { return 1; }
404
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200405 int getInlinerVectorBonusPercent() { return 150; }
406
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100407 void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,
408 TTI::UnrollingPreferences &UP) {
409 // This unrolling functionality is target independent, but to provide some
410 // motivation for its intended use, for x86:
411
412 // According to the Intel 64 and IA-32 Architectures Optimization Reference
413 // Manual, Intel Core models and later have a loop stream detector (and
414 // associated uop queue) that can benefit from partial unrolling.
415 // The relevant requirements are:
416 // - The loop must have no more than 4 (8 for Nehalem and later) branches
417 // taken, and none of them may be calls.
418 // - The loop can have no more than 18 (28 for Nehalem and later) uops.
419
420 // According to the Software Optimization Guide for AMD Family 15h
421 // Processors, models 30h-4fh (Steamroller and later) have a loop predictor
422 // and loop buffer which can benefit from partial unrolling.
423 // The relevant requirements are:
424 // - The loop must have fewer than 16 branches
425 // - The loop must have less than 40 uops in all executed loop branches
426
427 // The number of taken branches in a loop is hard to estimate here, and
428 // benchmarking has revealed that it is better not to be conservative when
429 // estimating the branch count. As a result, we'll ignore the branch limits
430 // until someone finds a case where it matters in practice.
431
432 unsigned MaxOps;
433 const TargetSubtargetInfo *ST = getST();
434 if (PartialUnrollingThreshold.getNumOccurrences() > 0)
435 MaxOps = PartialUnrollingThreshold;
436 else if (ST->getSchedModel().LoopMicroOpBufferSize > 0)
437 MaxOps = ST->getSchedModel().LoopMicroOpBufferSize;
438 else
439 return;
440
441 // Scan the loop: don't unroll loops with calls.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200442 for (BasicBlock *BB : L->blocks()) {
443 for (Instruction &I : *BB) {
444 if (isa<CallInst>(I) || isa<InvokeInst>(I)) {
445 if (const Function *F = cast<CallBase>(I).getCalledFunction()) {
446 if (!thisT()->isLoweredToCall(F))
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100447 continue;
448 }
449
450 return;
451 }
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200452 }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100453 }
454
455 // Enable runtime and partial unrolling up to the specified size.
456 // Enable using trip count upper bound to unroll loops.
457 UP.Partial = UP.Runtime = UP.UpperBound = true;
458 UP.PartialThreshold = MaxOps;
459
460 // Avoid unrolling when optimizing for size.
461 UP.OptSizeThreshold = 0;
462 UP.PartialOptSizeThreshold = 0;
463
464 // Set number of instructions optimized when "back edge"
465 // becomes "fall through" to default value of 2.
466 UP.BEInsns = 2;
467 }
468
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200469 void getPeelingPreferences(Loop *L, ScalarEvolution &SE,
470 TTI::PeelingPreferences &PP) {
471 PP.PeelCount = 0;
472 PP.AllowPeeling = true;
473 PP.AllowLoopNestsPeeling = false;
474 PP.PeelProfiledIterations = true;
475 }
476
Andrew Walbran3d2c1972020-04-07 12:24:26 +0100477 bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,
478 AssumptionCache &AC,
479 TargetLibraryInfo *LibInfo,
480 HardwareLoopInfo &HWLoopInfo) {
481 return BaseT::isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);
482 }
483
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200484 bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,
485 AssumptionCache &AC, TargetLibraryInfo *TLI,
486 DominatorTree *DT,
487 const LoopAccessInfo *LAI) {
488 return BaseT::preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);
489 }
490
491 bool emitGetActiveLaneMask() {
492 return BaseT::emitGetActiveLaneMask();
493 }
494
495 Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,
496 IntrinsicInst &II) {
497 return BaseT::instCombineIntrinsic(IC, II);
498 }
499
500 Optional<Value *> simplifyDemandedUseBitsIntrinsic(InstCombiner &IC,
501 IntrinsicInst &II,
502 APInt DemandedMask,
503 KnownBits &Known,
504 bool &KnownBitsComputed) {
505 return BaseT::simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,
506 KnownBitsComputed);
507 }
508
509 Optional<Value *> simplifyDemandedVectorEltsIntrinsic(
510 InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,
511 APInt &UndefElts2, APInt &UndefElts3,
512 std::function<void(Instruction *, unsigned, APInt, APInt &)>
513 SimplifyAndSetOp) {
514 return BaseT::simplifyDemandedVectorEltsIntrinsic(
515 IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,
516 SimplifyAndSetOp);
517 }
518
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100519 int getInstructionLatency(const Instruction *I) {
520 if (isa<LoadInst>(I))
521 return getST()->getSchedModel().DefaultLoadLatency;
522
523 return BaseT::getInstructionLatency(I);
524 }
525
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200526 virtual Optional<unsigned>
527 getCacheSize(TargetTransformInfo::CacheLevel Level) const {
528 return Optional<unsigned>(
529 getST()->getCacheSize(static_cast<unsigned>(Level)));
530 }
531
532 virtual Optional<unsigned>
533 getCacheAssociativity(TargetTransformInfo::CacheLevel Level) const {
534 Optional<unsigned> TargetResult =
535 getST()->getCacheAssociativity(static_cast<unsigned>(Level));
536
537 if (TargetResult)
538 return TargetResult;
539
540 return BaseT::getCacheAssociativity(Level);
541 }
542
543 virtual unsigned getCacheLineSize() const {
544 return getST()->getCacheLineSize();
545 }
546
547 virtual unsigned getPrefetchDistance() const {
548 return getST()->getPrefetchDistance();
549 }
550
551 virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,
552 unsigned NumStridedMemAccesses,
553 unsigned NumPrefetches,
554 bool HasCall) const {
555 return getST()->getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,
556 NumPrefetches, HasCall);
557 }
558
559 virtual unsigned getMaxPrefetchIterationsAhead() const {
560 return getST()->getMaxPrefetchIterationsAhead();
561 }
562
563 virtual bool enableWritePrefetching() const {
564 return getST()->enableWritePrefetching();
565 }
566
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100567 /// @}
568
569 /// \name Vector TTI Implementations
570 /// @{
571
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100572 unsigned getRegisterBitWidth(bool Vector) const { return 32; }
573
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200574 Optional<unsigned> getMaxVScale() const { return None; }
575
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100576 /// Estimate the overhead of scalarizing an instruction. Insert and Extract
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200577 /// are set if the demanded result elements need to be inserted and/or
578 /// extracted from vectors.
579 unsigned getScalarizationOverhead(VectorType *InTy, const APInt &DemandedElts,
580 bool Insert, bool Extract) {
581 /// FIXME: a bitfield is not a reasonable abstraction for talking about
582 /// which elements are needed from a scalable vector
583 auto *Ty = cast<FixedVectorType>(InTy);
584
585 assert(DemandedElts.getBitWidth() == Ty->getNumElements() &&
586 "Vector size mismatch");
587
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100588 unsigned Cost = 0;
589
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200590 for (int i = 0, e = Ty->getNumElements(); i < e; ++i) {
591 if (!DemandedElts[i])
592 continue;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100593 if (Insert)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200594 Cost += thisT()->getVectorInstrCost(Instruction::InsertElement, Ty, i);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100595 if (Extract)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200596 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, i);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100597 }
598
599 return Cost;
600 }
601
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200602 /// Helper wrapper for the DemandedElts variant of getScalarizationOverhead.
603 unsigned getScalarizationOverhead(VectorType *InTy, bool Insert,
604 bool Extract) {
605 auto *Ty = cast<FixedVectorType>(InTy);
606
607 APInt DemandedElts = APInt::getAllOnesValue(Ty->getNumElements());
608 return thisT()->getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);
609 }
610
611 /// Estimate the overhead of scalarizing an instruction's unique
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100612 /// non-constant operands. The types of the arguments are ordinarily
613 /// scalar, in which case the costs are multiplied with VF.
614 unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,
615 unsigned VF) {
616 unsigned Cost = 0;
617 SmallPtrSet<const Value*, 4> UniqueOperands;
618 for (const Value *A : Args) {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200619 // Disregard things like metadata arguments.
620 Type *Ty = A->getType();
621 if (!Ty->isIntOrIntVectorTy() && !Ty->isFPOrFPVectorTy() &&
622 !Ty->isPtrOrPtrVectorTy())
623 continue;
624
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100625 if (!isa<Constant>(A) && UniqueOperands.insert(A).second) {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200626 auto *VecTy = dyn_cast<VectorType>(Ty);
627 if (VecTy) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100628 // If A is a vector operand, VF should be 1 or correspond to A.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200629 assert((VF == 1 ||
630 VF == cast<FixedVectorType>(VecTy)->getNumElements()) &&
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100631 "Vector argument does not match VF");
632 }
633 else
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200634 VecTy = FixedVectorType::get(Ty, VF);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100635
636 Cost += getScalarizationOverhead(VecTy, false, true);
637 }
638 }
639
640 return Cost;
641 }
642
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200643 unsigned getScalarizationOverhead(VectorType *InTy,
644 ArrayRef<const Value *> Args) {
645 auto *Ty = cast<FixedVectorType>(InTy);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100646
647 unsigned Cost = 0;
648
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200649 Cost += getScalarizationOverhead(Ty, true, false);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100650 if (!Args.empty())
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200651 Cost += getOperandsScalarizationOverhead(Args, Ty->getNumElements());
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100652 else
653 // When no information on arguments is provided, we add the cost
654 // associated with one argument as a heuristic.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200655 Cost += getScalarizationOverhead(Ty, false, true);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100656
657 return Cost;
658 }
659
660 unsigned getMaxInterleaveFactor(unsigned VF) { return 1; }
661
662 unsigned getArithmeticInstrCost(
663 unsigned Opcode, Type *Ty,
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200664 TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100665 TTI::OperandValueKind Opd1Info = TTI::OK_AnyValue,
666 TTI::OperandValueKind Opd2Info = TTI::OK_AnyValue,
667 TTI::OperandValueProperties Opd1PropInfo = TTI::OP_None,
668 TTI::OperandValueProperties Opd2PropInfo = TTI::OP_None,
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200669 ArrayRef<const Value *> Args = ArrayRef<const Value *>(),
670 const Instruction *CxtI = nullptr) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100671 // Check if any of the operands are vector operands.
672 const TargetLoweringBase *TLI = getTLI();
673 int ISD = TLI->InstructionOpcodeToISD(Opcode);
674 assert(ISD && "Invalid opcode");
675
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200676 // TODO: Handle more cost kinds.
677 if (CostKind != TTI::TCK_RecipThroughput)
678 return BaseT::getArithmeticInstrCost(Opcode, Ty, CostKind,
679 Opd1Info, Opd2Info,
680 Opd1PropInfo, Opd2PropInfo,
681 Args, CxtI);
682
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100683 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, Ty);
684
685 bool IsFloat = Ty->isFPOrFPVectorTy();
686 // Assume that floating point arithmetic operations cost twice as much as
687 // integer operations.
688 unsigned OpCost = (IsFloat ? 2 : 1);
689
690 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
691 // The operation is legal. Assume it costs 1.
692 // TODO: Once we have extract/insert subvector cost we need to use them.
693 return LT.first * OpCost;
694 }
695
696 if (!TLI->isOperationExpand(ISD, LT.second)) {
697 // If the operation is custom lowered, then assume that the code is twice
698 // as expensive.
699 return LT.first * 2 * OpCost;
700 }
701
702 // Else, assume that we need to scalarize this op.
703 // TODO: If one of the types get legalized by splitting, handle this
704 // similarly to what getCastInstrCost() does.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200705 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
706 unsigned Num = cast<FixedVectorType>(VTy)->getNumElements();
707 unsigned Cost = thisT()->getArithmeticInstrCost(
708 Opcode, VTy->getScalarType(), CostKind, Opd1Info, Opd2Info,
709 Opd1PropInfo, Opd2PropInfo, Args, CxtI);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100710 // Return the cost of multiple scalar invocation plus the cost of
711 // inserting and extracting the values.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200712 return getScalarizationOverhead(VTy, Args) + Num * Cost;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100713 }
714
715 // We don't know anything about this scalar instruction.
716 return OpCost;
717 }
718
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200719 unsigned getShuffleCost(TTI::ShuffleKind Kind, VectorType *Tp, int Index,
720 VectorType *SubTp) {
721
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100722 switch (Kind) {
Andrew Walbran16937d02019-10-22 13:54:20 +0100723 case TTI::SK_Broadcast:
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200724 return getBroadcastShuffleOverhead(cast<FixedVectorType>(Tp));
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100725 case TTI::SK_Select:
Andrew Walbran16937d02019-10-22 13:54:20 +0100726 case TTI::SK_Reverse:
Andrew Scullcdfcccc2018-10-05 20:58:37 +0100727 case TTI::SK_Transpose:
728 case TTI::SK_PermuteSingleSrc:
729 case TTI::SK_PermuteTwoSrc:
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200730 return getPermuteShuffleOverhead(cast<FixedVectorType>(Tp));
Andrew Walbran16937d02019-10-22 13:54:20 +0100731 case TTI::SK_ExtractSubvector:
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200732 return getExtractSubvectorOverhead(Tp, Index,
733 cast<FixedVectorType>(SubTp));
Andrew Walbran16937d02019-10-22 13:54:20 +0100734 case TTI::SK_InsertSubvector:
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200735 return getInsertSubvectorOverhead(Tp, Index,
736 cast<FixedVectorType>(SubTp));
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100737 }
Andrew Walbran16937d02019-10-22 13:54:20 +0100738 llvm_unreachable("Unknown TTI::ShuffleKind");
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100739 }
740
741 unsigned getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200742 TTI::CastContextHint CCH,
743 TTI::TargetCostKind CostKind,
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100744 const Instruction *I = nullptr) {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200745 if (BaseT::getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I) == 0)
746 return 0;
747
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100748 const TargetLoweringBase *TLI = getTLI();
749 int ISD = TLI->InstructionOpcodeToISD(Opcode);
750 assert(ISD && "Invalid opcode");
751 std::pair<unsigned, MVT> SrcLT = TLI->getTypeLegalizationCost(DL, Src);
752 std::pair<unsigned, MVT> DstLT = TLI->getTypeLegalizationCost(DL, Dst);
753
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200754 TypeSize SrcSize = SrcLT.second.getSizeInBits();
755 TypeSize DstSize = DstLT.second.getSizeInBits();
756 bool IntOrPtrSrc = Src->isIntegerTy() || Src->isPointerTy();
757 bool IntOrPtrDst = Dst->isIntegerTy() || Dst->isPointerTy();
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100758
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200759 switch (Opcode) {
760 default:
761 break;
762 case Instruction::Trunc:
763 // Check for NOOP conversions.
764 if (TLI->isTruncateFree(SrcLT.second, DstLT.second))
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100765 return 0;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200766 LLVM_FALLTHROUGH;
767 case Instruction::BitCast:
768 // Bitcast between types that are legalized to the same type are free and
769 // assume int to/from ptr of the same size is also free.
770 if (SrcLT.first == DstLT.first && IntOrPtrSrc == IntOrPtrDst &&
771 SrcSize == DstSize)
772 return 0;
773 break;
774 case Instruction::FPExt:
775 if (I && getTLI()->isExtFree(I))
776 return 0;
777 break;
778 case Instruction::ZExt:
779 if (TLI->isZExtFree(SrcLT.second, DstLT.second))
780 return 0;
781 LLVM_FALLTHROUGH;
782 case Instruction::SExt:
783 if (I && getTLI()->isExtFree(I))
784 return 0;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100785
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200786 // If this is a zext/sext of a load, return 0 if the corresponding
787 // extending load exists on target.
788 if (CCH == TTI::CastContextHint::Normal) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100789 EVT ExtVT = EVT::getEVT(Dst);
790 EVT LoadVT = EVT::getEVT(Src);
791 unsigned LType =
792 ((Opcode == Instruction::ZExt) ? ISD::ZEXTLOAD : ISD::SEXTLOAD);
793 if (TLI->isLoadExtLegal(LType, ExtVT, LoadVT))
794 return 0;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200795 }
796 break;
797 case Instruction::AddrSpaceCast:
798 if (TLI->isFreeAddrSpaceCast(Src->getPointerAddressSpace(),
799 Dst->getPointerAddressSpace()))
800 return 0;
801 break;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100802 }
803
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200804 auto *SrcVTy = dyn_cast<VectorType>(Src);
805 auto *DstVTy = dyn_cast<VectorType>(Dst);
806
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100807 // If the cast is marked as legal (or promote) then assume low cost.
808 if (SrcLT.first == DstLT.first &&
809 TLI->isOperationLegalOrPromote(ISD, DstLT.second))
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200810 return SrcLT.first;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100811
812 // Handle scalar conversions.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200813 if (!SrcVTy && !DstVTy) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100814 // Just check the op cost. If the operation is legal then assume it costs
815 // 1.
816 if (!TLI->isOperationExpand(ISD, DstLT.second))
817 return 1;
818
819 // Assume that illegal scalar instruction are expensive.
820 return 4;
821 }
822
823 // Check vector-to-vector casts.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200824 if (DstVTy && SrcVTy) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100825 // If the cast is between same-sized registers, then the check is simple.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200826 if (SrcLT.first == DstLT.first && SrcSize == DstSize) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100827
828 // Assume that Zext is done using AND.
829 if (Opcode == Instruction::ZExt)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200830 return SrcLT.first;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100831
832 // Assume that sext is done using SHL and SRA.
833 if (Opcode == Instruction::SExt)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200834 return SrcLT.first * 2;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100835
836 // Just check the op cost. If the operation is legal then assume it
837 // costs
838 // 1 and multiply by the type-legalization overhead.
839 if (!TLI->isOperationExpand(ISD, DstLT.second))
840 return SrcLT.first * 1;
841 }
842
843 // If we are legalizing by splitting, query the concrete TTI for the cost
844 // of casting the original vector twice. We also need to factor in the
845 // cost of the split itself. Count that as 1, to be consistent with
846 // TLI->getTypeLegalizationCost().
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200847 bool SplitSrc =
848 TLI->getTypeAction(Src->getContext(), TLI->getValueType(DL, Src)) ==
849 TargetLowering::TypeSplitVector;
850 bool SplitDst =
851 TLI->getTypeAction(Dst->getContext(), TLI->getValueType(DL, Dst)) ==
852 TargetLowering::TypeSplitVector;
853 if ((SplitSrc || SplitDst) &&
854 cast<FixedVectorType>(SrcVTy)->getNumElements() > 1 &&
855 cast<FixedVectorType>(DstVTy)->getNumElements() > 1) {
856 Type *SplitDstTy = VectorType::getHalfElementsVectorType(DstVTy);
857 Type *SplitSrcTy = VectorType::getHalfElementsVectorType(SrcVTy);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100858 T *TTI = static_cast<T *>(this);
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200859 // If both types need to be split then the split is free.
860 unsigned SplitCost =
861 (!SplitSrc || !SplitDst) ? TTI->getVectorSplitCost() : 0;
862 return SplitCost +
863 (2 * TTI->getCastInstrCost(Opcode, SplitDstTy, SplitSrcTy, CCH,
864 CostKind, I));
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100865 }
866
867 // In other cases where the source or destination are illegal, assume
868 // the operation will get scalarized.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200869 unsigned Num = cast<FixedVectorType>(DstVTy)->getNumElements();
870 unsigned Cost = thisT()->getCastInstrCost(
871 Opcode, Dst->getScalarType(), Src->getScalarType(), CCH, CostKind, I);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100872
873 // Return the cost of multiple scalar invocation plus the cost of
874 // inserting and extracting the values.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200875 return getScalarizationOverhead(DstVTy, true, true) + Num * Cost;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100876 }
877
878 // We already handled vector-to-vector and scalar-to-scalar conversions.
879 // This
880 // is where we handle bitcast between vectors and scalars. We need to assume
881 // that the conversion is scalarized in one way or another.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200882 if (Opcode == Instruction::BitCast) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100883 // Illegal bitcasts are done by storing and loading from a stack slot.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200884 return (SrcVTy ? getScalarizationOverhead(SrcVTy, false, true) : 0) +
885 (DstVTy ? getScalarizationOverhead(DstVTy, true, false) : 0);
886 }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100887
888 llvm_unreachable("Unhandled cast");
889 }
890
891 unsigned getExtractWithExtendCost(unsigned Opcode, Type *Dst,
892 VectorType *VecTy, unsigned Index) {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200893 return thisT()->getVectorInstrCost(Instruction::ExtractElement, VecTy,
894 Index) +
895 thisT()->getCastInstrCost(Opcode, Dst, VecTy->getElementType(),
896 TTI::CastContextHint::None, TTI::TCK_RecipThroughput);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100897 }
898
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200899 unsigned getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) {
900 return BaseT::getCFInstrCost(Opcode, CostKind);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100901 }
902
903 unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200904 CmpInst::Predicate VecPred,
905 TTI::TargetCostKind CostKind,
906 const Instruction *I = nullptr) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100907 const TargetLoweringBase *TLI = getTLI();
908 int ISD = TLI->InstructionOpcodeToISD(Opcode);
909 assert(ISD && "Invalid opcode");
910
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200911 // TODO: Handle other cost kinds.
912 if (CostKind != TTI::TCK_RecipThroughput)
913 return BaseT::getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind,
914 I);
915
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100916 // Selects on vectors are actually vector selects.
917 if (ISD == ISD::SELECT) {
918 assert(CondTy && "CondTy must exist");
919 if (CondTy->isVectorTy())
920 ISD = ISD::VSELECT;
921 }
922 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, ValTy);
923
924 if (!(ValTy->isVectorTy() && !LT.second.isVector()) &&
925 !TLI->isOperationExpand(ISD, LT.second)) {
926 // The operation is legal. Assume it costs 1. Multiply
927 // by the type-legalization overhead.
928 return LT.first * 1;
929 }
930
931 // Otherwise, assume that the cast is scalarized.
932 // TODO: If one of the types get legalized by splitting, handle this
933 // similarly to what getCastInstrCost() does.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200934 if (auto *ValVTy = dyn_cast<VectorType>(ValTy)) {
935 unsigned Num = cast<FixedVectorType>(ValVTy)->getNumElements();
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100936 if (CondTy)
937 CondTy = CondTy->getScalarType();
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200938 unsigned Cost = thisT()->getCmpSelInstrCost(
939 Opcode, ValVTy->getScalarType(), CondTy, VecPred, CostKind, I);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100940
941 // Return the cost of multiple scalar invocation plus the cost of
942 // inserting and extracting the values.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200943 return getScalarizationOverhead(ValVTy, true, false) + Num * Cost;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100944 }
945
946 // Unknown scalar opcode.
947 return 1;
948 }
949
950 unsigned getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) {
951 std::pair<unsigned, MVT> LT =
952 getTLI()->getTypeLegalizationCost(DL, Val->getScalarType());
953
954 return LT.first;
955 }
956
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200957 unsigned getMemoryOpCost(unsigned Opcode, Type *Src, MaybeAlign Alignment,
958 unsigned AddressSpace,
959 TTI::TargetCostKind CostKind,
960 const Instruction *I = nullptr) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100961 assert(!Src->isVoidTy() && "Invalid type");
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200962 // Assume types, such as structs, are expensive.
963 if (getTLI()->getValueType(DL, Src, true) == MVT::Other)
964 return 4;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100965 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Src);
966
967 // Assuming that all loads of legal types cost 1.
968 unsigned Cost = LT.first;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200969 if (CostKind != TTI::TCK_RecipThroughput)
970 return Cost;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100971
972 if (Src->isVectorTy() &&
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200973 // In practice it's not currently possible to have a change in lane
974 // length for extending loads or truncating stores so both types should
975 // have the same scalable property.
976 TypeSize::isKnownLT(Src->getPrimitiveSizeInBits(),
977 LT.second.getSizeInBits())) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100978 // This is a vector load that legalizes to a larger type than the vector
979 // itself. Unless the corresponding extending load or truncating store is
980 // legal, then this will scalarize.
981 TargetLowering::LegalizeAction LA = TargetLowering::Expand;
982 EVT MemVT = getTLI()->getValueType(DL, Src);
983 if (Opcode == Instruction::Store)
984 LA = getTLI()->getTruncStoreAction(LT.second, MemVT);
985 else
986 LA = getTLI()->getLoadExtAction(ISD::EXTLOAD, LT.second, MemVT);
987
988 if (LA != TargetLowering::Legal && LA != TargetLowering::Custom) {
989 // This is a vector load/store for some illegal type that is scalarized.
990 // We must account for the cost of building or decomposing the vector.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +0200991 Cost += getScalarizationOverhead(cast<VectorType>(Src),
992 Opcode != Instruction::Store,
Andrew Scull5e1ddfa2018-08-14 10:06:54 +0100993 Opcode == Instruction::Store);
994 }
995 }
996
997 return Cost;
998 }
999
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001000 unsigned getGatherScatterOpCost(unsigned Opcode, Type *DataTy,
1001 const Value *Ptr, bool VariableMask,
1002 Align Alignment, TTI::TargetCostKind CostKind,
1003 const Instruction *I = nullptr) {
1004 auto *VT = cast<FixedVectorType>(DataTy);
1005 // Assume the target does not have support for gather/scatter operations
1006 // and provide a rough estimate.
1007 //
1008 // First, compute the cost of extracting the individual addresses and the
1009 // individual memory operations.
1010 int LoadCost =
1011 VT->getNumElements() *
1012 (getVectorInstrCost(
1013 Instruction::ExtractElement,
1014 FixedVectorType::get(PointerType::get(VT->getElementType(), 0),
1015 VT->getNumElements()),
1016 -1) +
1017 getMemoryOpCost(Opcode, VT->getElementType(), Alignment, 0, CostKind));
1018
1019 // Next, compute the cost of packing the result in a vector.
1020 int PackingCost = getScalarizationOverhead(VT, Opcode != Instruction::Store,
1021 Opcode == Instruction::Store);
1022
1023 int ConditionalCost = 0;
1024 if (VariableMask) {
1025 // Compute the cost of conditionally executing the memory operations with
1026 // variable masks. This includes extracting the individual conditions, a
1027 // branches and PHIs to combine the results.
1028 // NOTE: Estimating the cost of conditionally executing the memory
1029 // operations accurately is quite difficult and the current solution
1030 // provides a very rough estimate only.
1031 ConditionalCost =
1032 VT->getNumElements() *
1033 (getVectorInstrCost(
1034 Instruction::ExtractElement,
1035 FixedVectorType::get(Type::getInt1Ty(DataTy->getContext()),
1036 VT->getNumElements()),
1037 -1) +
1038 getCFInstrCost(Instruction::Br, CostKind) +
1039 getCFInstrCost(Instruction::PHI, CostKind));
1040 }
1041
1042 return LoadCost + PackingCost + ConditionalCost;
1043 }
1044
1045 unsigned getInterleavedMemoryOpCost(
1046 unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,
1047 Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,
1048 bool UseMaskForCond = false, bool UseMaskForGaps = false) {
1049 auto *VT = cast<FixedVectorType>(VecTy);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001050
1051 unsigned NumElts = VT->getNumElements();
1052 assert(Factor > 1 && NumElts % Factor == 0 && "Invalid interleave factor");
1053
1054 unsigned NumSubElts = NumElts / Factor;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001055 auto *SubVT = FixedVectorType::get(VT->getElementType(), NumSubElts);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001056
1057 // Firstly, the cost of load/store operation.
Andrew Walbran16937d02019-10-22 13:54:20 +01001058 unsigned Cost;
1059 if (UseMaskForCond || UseMaskForGaps)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001060 Cost = thisT()->getMaskedMemoryOpCost(Opcode, VecTy, Alignment,
1061 AddressSpace, CostKind);
Andrew Walbran16937d02019-10-22 13:54:20 +01001062 else
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001063 Cost = thisT()->getMemoryOpCost(Opcode, VecTy, Alignment, AddressSpace,
1064 CostKind);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001065
1066 // Legalize the vector type, and get the legalized and unlegalized type
1067 // sizes.
1068 MVT VecTyLT = getTLI()->getTypeLegalizationCost(DL, VecTy).second;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001069 unsigned VecTySize = thisT()->getDataLayout().getTypeStoreSize(VecTy);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001070 unsigned VecTyLTSize = VecTyLT.getStoreSize();
1071
1072 // Return the ceiling of dividing A by B.
1073 auto ceil = [](unsigned A, unsigned B) { return (A + B - 1) / B; };
1074
1075 // Scale the cost of the memory operation by the fraction of legalized
1076 // instructions that will actually be used. We shouldn't account for the
1077 // cost of dead instructions since they will be removed.
1078 //
1079 // E.g., An interleaved load of factor 8:
1080 // %vec = load <16 x i64>, <16 x i64>* %ptr
1081 // %v0 = shufflevector %vec, undef, <0, 8>
1082 //
1083 // If <16 x i64> is legalized to 8 v2i64 loads, only 2 of the loads will be
1084 // used (those corresponding to elements [0:1] and [8:9] of the unlegalized
1085 // type). The other loads are unused.
1086 //
1087 // We only scale the cost of loads since interleaved store groups aren't
1088 // allowed to have gaps.
1089 if (Opcode == Instruction::Load && VecTySize > VecTyLTSize) {
1090 // The number of loads of a legal type it will take to represent a load
1091 // of the unlegalized vector type.
1092 unsigned NumLegalInsts = ceil(VecTySize, VecTyLTSize);
1093
1094 // The number of elements of the unlegalized type that correspond to a
1095 // single legal instruction.
1096 unsigned NumEltsPerLegalInst = ceil(NumElts, NumLegalInsts);
1097
1098 // Determine which legal instructions will be used.
1099 BitVector UsedInsts(NumLegalInsts, false);
1100 for (unsigned Index : Indices)
1101 for (unsigned Elt = 0; Elt < NumSubElts; ++Elt)
1102 UsedInsts.set((Index + Elt * Factor) / NumEltsPerLegalInst);
1103
1104 // Scale the cost of the load by the fraction of legal instructions that
1105 // will be used.
1106 Cost *= UsedInsts.count() / NumLegalInsts;
1107 }
1108
1109 // Then plus the cost of interleave operation.
1110 if (Opcode == Instruction::Load) {
1111 // The interleave cost is similar to extract sub vectors' elements
1112 // from the wide vector, and insert them into sub vectors.
1113 //
1114 // E.g. An interleaved load of factor 2 (with one member of index 0):
1115 // %vec = load <8 x i32>, <8 x i32>* %ptr
1116 // %v0 = shuffle %vec, undef, <0, 2, 4, 6> ; Index 0
1117 // The cost is estimated as extract elements at 0, 2, 4, 6 from the
1118 // <8 x i32> vector and insert them into a <4 x i32> vector.
1119
1120 assert(Indices.size() <= Factor &&
1121 "Interleaved memory op has too many members");
1122
1123 for (unsigned Index : Indices) {
1124 assert(Index < Factor && "Invalid index for interleaved memory op");
1125
1126 // Extract elements from loaded vector for each sub vector.
1127 for (unsigned i = 0; i < NumSubElts; i++)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001128 Cost += thisT()->getVectorInstrCost(Instruction::ExtractElement, VT,
1129 Index + i * Factor);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001130 }
1131
1132 unsigned InsSubCost = 0;
1133 for (unsigned i = 0; i < NumSubElts; i++)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001134 InsSubCost +=
1135 thisT()->getVectorInstrCost(Instruction::InsertElement, SubVT, i);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001136
1137 Cost += Indices.size() * InsSubCost;
1138 } else {
1139 // The interleave cost is extract all elements from sub vectors, and
1140 // insert them into the wide vector.
1141 //
1142 // E.g. An interleaved store of factor 2:
1143 // %v0_v1 = shuffle %v0, %v1, <0, 4, 1, 5, 2, 6, 3, 7>
1144 // store <8 x i32> %interleaved.vec, <8 x i32>* %ptr
1145 // The cost is estimated as extract all elements from both <4 x i32>
1146 // vectors and insert into the <8 x i32> vector.
1147
1148 unsigned ExtSubCost = 0;
1149 for (unsigned i = 0; i < NumSubElts; i++)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001150 ExtSubCost +=
1151 thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001152 Cost += ExtSubCost * Factor;
1153
1154 for (unsigned i = 0; i < NumElts; i++)
1155 Cost += static_cast<T *>(this)
1156 ->getVectorInstrCost(Instruction::InsertElement, VT, i);
1157 }
1158
Andrew Walbran16937d02019-10-22 13:54:20 +01001159 if (!UseMaskForCond)
1160 return Cost;
1161
1162 Type *I8Type = Type::getInt8Ty(VT->getContext());
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001163 auto *MaskVT = FixedVectorType::get(I8Type, NumElts);
1164 SubVT = FixedVectorType::get(I8Type, NumSubElts);
Andrew Walbran16937d02019-10-22 13:54:20 +01001165
1166 // The Mask shuffling cost is extract all the elements of the Mask
1167 // and insert each of them Factor times into the wide vector:
1168 //
1169 // E.g. an interleaved group with factor 3:
1170 // %mask = icmp ult <8 x i32> %vec1, %vec2
1171 // %interleaved.mask = shufflevector <8 x i1> %mask, <8 x i1> undef,
1172 // <24 x i32> <0,0,0,1,1,1,2,2,2,3,3,3,4,4,4,5,5,5,6,6,6,7,7,7>
1173 // The cost is estimated as extract all mask elements from the <8xi1> mask
1174 // vector and insert them factor times into the <24xi1> shuffled mask
1175 // vector.
1176 for (unsigned i = 0; i < NumSubElts; i++)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001177 Cost +=
1178 thisT()->getVectorInstrCost(Instruction::ExtractElement, SubVT, i);
Andrew Walbran16937d02019-10-22 13:54:20 +01001179
1180 for (unsigned i = 0; i < NumElts; i++)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001181 Cost +=
1182 thisT()->getVectorInstrCost(Instruction::InsertElement, MaskVT, i);
Andrew Walbran16937d02019-10-22 13:54:20 +01001183
1184 // The Gaps mask is invariant and created outside the loop, therefore the
1185 // cost of creating it is not accounted for here. However if we have both
1186 // a MaskForGaps and some other mask that guards the execution of the
1187 // memory access, we need to account for the cost of And-ing the two masks
1188 // inside the loop.
1189 if (UseMaskForGaps)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001190 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, MaskVT,
1191 CostKind);
Andrew Walbran16937d02019-10-22 13:54:20 +01001192
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001193 return Cost;
1194 }
1195
1196 /// Get intrinsic cost based on arguments.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001197 unsigned getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1198 TTI::TargetCostKind CostKind) {
1199 // Check for generically free intrinsics.
1200 if (BaseT::getIntrinsicInstrCost(ICA, CostKind) == 0)
1201 return 0;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001202
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001203 // Assume that target intrinsics are cheap.
1204 Intrinsic::ID IID = ICA.getID();
1205 if (Function::isTargetIntrinsic(IID))
1206 return TargetTransformInfo::TCC_Basic;
1207
1208 if (ICA.isTypeBasedOnly())
1209 return getTypeBasedIntrinsicInstrCost(ICA, CostKind);
1210
1211 Type *RetTy = ICA.getReturnType();
1212
1213 ElementCount VF = ICA.getVectorFactor();
1214 ElementCount RetVF =
1215 (RetTy->isVectorTy() ? cast<VectorType>(RetTy)->getElementCount()
1216 : ElementCount::getFixed(1));
1217 assert((RetVF.isScalar() || VF.isScalar()) &&
1218 "VF > 1 and RetVF is a vector type");
1219 const IntrinsicInst *I = ICA.getInst();
1220 const SmallVectorImpl<const Value *> &Args = ICA.getArgs();
1221 FastMathFlags FMF = ICA.getFlags();
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001222 switch (IID) {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001223 default:
1224 break;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001225
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001226 case Intrinsic::cttz:
1227 // FIXME: If necessary, this should go in target-specific overrides.
1228 if (VF.isScalar() && RetVF.isScalar() &&
1229 getTLI()->isCheapToSpeculateCttz())
1230 return TargetTransformInfo::TCC_Basic;
1231 break;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001232
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001233 case Intrinsic::ctlz:
1234 // FIXME: If necessary, this should go in target-specific overrides.
1235 if (VF.isScalar() && RetVF.isScalar() &&
1236 getTLI()->isCheapToSpeculateCtlz())
1237 return TargetTransformInfo::TCC_Basic;
1238 break;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001239
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001240 case Intrinsic::memcpy:
1241 return thisT()->getMemcpyCost(ICA.getInst());
1242
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001243 case Intrinsic::masked_scatter: {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001244 assert(VF.isScalar() && "Can't vectorize types here.");
1245 const Value *Mask = Args[3];
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001246 bool VarMask = !isa<Constant>(Mask);
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001247 Align Alignment = cast<ConstantInt>(Args[2])->getAlignValue();
1248 return thisT()->getGatherScatterOpCost(Instruction::Store,
1249 Args[0]->getType(), Args[1],
1250 VarMask, Alignment, CostKind, I);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001251 }
1252 case Intrinsic::masked_gather: {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001253 assert(VF.isScalar() && "Can't vectorize types here.");
1254 const Value *Mask = Args[2];
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001255 bool VarMask = !isa<Constant>(Mask);
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001256 Align Alignment = cast<ConstantInt>(Args[1])->getAlignValue();
1257 return thisT()->getGatherScatterOpCost(Instruction::Load, RetTy, Args[0],
1258 VarMask, Alignment, CostKind, I);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001259 }
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001260 case Intrinsic::experimental_vector_extract: {
1261 // FIXME: Handle case where a scalable vector is extracted from a scalable
1262 // vector
1263 if (isa<ScalableVectorType>(RetTy))
1264 return BaseT::getIntrinsicInstrCost(ICA, CostKind);
1265 unsigned Index = cast<ConstantInt>(Args[1])->getZExtValue();
1266 return thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
1267 cast<VectorType>(Args[0]->getType()),
1268 Index, cast<VectorType>(RetTy));
1269 }
1270 case Intrinsic::experimental_vector_insert: {
1271 // FIXME: Handle case where a scalable vector is inserted into a scalable
1272 // vector
1273 if (isa<ScalableVectorType>(Args[1]->getType()))
1274 return BaseT::getIntrinsicInstrCost(ICA, CostKind);
1275 unsigned Index = cast<ConstantInt>(Args[2])->getZExtValue();
1276 return thisT()->getShuffleCost(
1277 TTI::SK_InsertSubvector, cast<VectorType>(Args[0]->getType()), Index,
1278 cast<VectorType>(Args[1]->getType()));
1279 }
1280 case Intrinsic::vector_reduce_add:
1281 case Intrinsic::vector_reduce_mul:
1282 case Intrinsic::vector_reduce_and:
1283 case Intrinsic::vector_reduce_or:
1284 case Intrinsic::vector_reduce_xor:
1285 case Intrinsic::vector_reduce_smax:
1286 case Intrinsic::vector_reduce_smin:
1287 case Intrinsic::vector_reduce_fmax:
1288 case Intrinsic::vector_reduce_fmin:
1289 case Intrinsic::vector_reduce_umax:
1290 case Intrinsic::vector_reduce_umin: {
1291 if (isa<ScalableVectorType>(RetTy))
1292 return BaseT::getIntrinsicInstrCost(ICA, CostKind);
1293 IntrinsicCostAttributes Attrs(IID, RetTy, Args[0]->getType(), FMF, 1, I);
1294 return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1295 }
1296 case Intrinsic::vector_reduce_fadd:
1297 case Intrinsic::vector_reduce_fmul: {
1298 if (isa<ScalableVectorType>(RetTy))
1299 return BaseT::getIntrinsicInstrCost(ICA, CostKind);
1300 IntrinsicCostAttributes Attrs(
1301 IID, RetTy, {Args[0]->getType(), Args[1]->getType()}, FMF, 1, I);
1302 return getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
1303 }
Andrew Walbran16937d02019-10-22 13:54:20 +01001304 case Intrinsic::fshl:
1305 case Intrinsic::fshr: {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001306 if (isa<ScalableVectorType>(RetTy))
1307 return BaseT::getIntrinsicInstrCost(ICA, CostKind);
1308 const Value *X = Args[0];
1309 const Value *Y = Args[1];
1310 const Value *Z = Args[2];
Andrew Walbran16937d02019-10-22 13:54:20 +01001311 TTI::OperandValueProperties OpPropsX, OpPropsY, OpPropsZ, OpPropsBW;
1312 TTI::OperandValueKind OpKindX = TTI::getOperandInfo(X, OpPropsX);
1313 TTI::OperandValueKind OpKindY = TTI::getOperandInfo(Y, OpPropsY);
1314 TTI::OperandValueKind OpKindZ = TTI::getOperandInfo(Z, OpPropsZ);
1315 TTI::OperandValueKind OpKindBW = TTI::OK_UniformConstantValue;
1316 OpPropsBW = isPowerOf2_32(RetTy->getScalarSizeInBits()) ? TTI::OP_PowerOf2
1317 : TTI::OP_None;
1318 // fshl: (X << (Z % BW)) | (Y >> (BW - (Z % BW)))
1319 // fshr: (X << (BW - (Z % BW))) | (Y >> (Z % BW))
1320 unsigned Cost = 0;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001321 Cost +=
1322 thisT()->getArithmeticInstrCost(BinaryOperator::Or, RetTy, CostKind);
1323 Cost +=
1324 thisT()->getArithmeticInstrCost(BinaryOperator::Sub, RetTy, CostKind);
1325 Cost += thisT()->getArithmeticInstrCost(
1326 BinaryOperator::Shl, RetTy, CostKind, OpKindX, OpKindZ, OpPropsX);
1327 Cost += thisT()->getArithmeticInstrCost(
1328 BinaryOperator::LShr, RetTy, CostKind, OpKindY, OpKindZ, OpPropsY);
Andrew Walbran16937d02019-10-22 13:54:20 +01001329 // Non-constant shift amounts requires a modulo.
1330 if (OpKindZ != TTI::OK_UniformConstantValue &&
1331 OpKindZ != TTI::OK_NonUniformConstantValue)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001332 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::URem, RetTy,
1333 CostKind, OpKindZ, OpKindBW,
1334 OpPropsZ, OpPropsBW);
Andrew Walbran16937d02019-10-22 13:54:20 +01001335 // For non-rotates (X != Y) we must add shift-by-zero handling costs.
1336 if (X != Y) {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001337 Type *CondTy = RetTy->getWithNewBitWidth(1);
1338 Cost +=
1339 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1340 CmpInst::BAD_ICMP_PREDICATE, CostKind);
1341 Cost +=
1342 thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1343 CmpInst::BAD_ICMP_PREDICATE, CostKind);
Andrew Walbran16937d02019-10-22 13:54:20 +01001344 }
1345 return Cost;
1346 }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001347 }
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001348 // TODO: Handle the remaining intrinsic with scalable vector type
1349 if (isa<ScalableVectorType>(RetTy))
1350 return BaseT::getIntrinsicInstrCost(ICA, CostKind);
1351
1352 // Assume that we need to scalarize this intrinsic.
1353 SmallVector<Type *, 4> Types;
1354 for (const Value *Op : Args) {
1355 Type *OpTy = Op->getType();
1356 assert(VF.isScalar() || !OpTy->isVectorTy());
1357 Types.push_back(VF.isScalar()
1358 ? OpTy
1359 : FixedVectorType::get(OpTy, VF.getKnownMinValue()));
1360 }
1361
1362 if (VF.isVector() && !RetTy->isVoidTy())
1363 RetTy = FixedVectorType::get(RetTy, VF.getKnownMinValue());
1364
1365 // Compute the scalarization overhead based on Args for a vector
1366 // intrinsic. A vectorizer will pass a scalar RetTy and VF > 1, while
1367 // CostModel will pass a vector RetTy and VF is 1.
1368 unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();
1369 if (RetVF.isVector() || VF.isVector()) {
1370 ScalarizationCost = 0;
1371 if (!RetTy->isVoidTy())
1372 ScalarizationCost +=
1373 getScalarizationOverhead(cast<VectorType>(RetTy), true, false);
1374 ScalarizationCost +=
1375 getOperandsScalarizationOverhead(Args, VF.getKnownMinValue());
1376 }
1377
1378 IntrinsicCostAttributes Attrs(IID, RetTy, Types, FMF, ScalarizationCost, I);
1379 return thisT()->getTypeBasedIntrinsicInstrCost(Attrs, CostKind);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001380 }
1381
1382 /// Get intrinsic cost based on argument types.
1383 /// If ScalarizationCostPassed is std::numeric_limits<unsigned>::max(), the
1384 /// cost of scalarizing the arguments and the return value will be computed
1385 /// based on types.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001386 unsigned getTypeBasedIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,
1387 TTI::TargetCostKind CostKind) {
1388 Intrinsic::ID IID = ICA.getID();
1389 Type *RetTy = ICA.getReturnType();
1390 const SmallVectorImpl<Type *> &Tys = ICA.getArgTypes();
1391 FastMathFlags FMF = ICA.getFlags();
1392 unsigned ScalarizationCostPassed = ICA.getScalarizationCost();
1393 bool SkipScalarizationCost = ICA.skipScalarizationCost();
Andrew Walbran16937d02019-10-22 13:54:20 +01001394
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001395 VectorType *VecOpTy = nullptr;
1396 if (!Tys.empty()) {
1397 // The vector reduction operand is operand 0 except for fadd/fmul.
1398 // Their operand 0 is a scalar start value, so the vector op is operand 1.
1399 unsigned VecTyIndex = 0;
1400 if (IID == Intrinsic::vector_reduce_fadd ||
1401 IID == Intrinsic::vector_reduce_fmul)
1402 VecTyIndex = 1;
1403 assert(Tys.size() > VecTyIndex && "Unexpected IntrinsicCostAttributes");
1404 VecOpTy = dyn_cast<VectorType>(Tys[VecTyIndex]);
1405 }
1406
1407 // Library call cost - other than size, make it expensive.
1408 unsigned SingleCallCost = CostKind == TTI::TCK_CodeSize ? 1 : 10;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001409 SmallVector<unsigned, 2> ISDs;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001410 switch (IID) {
1411 default: {
1412 // Assume that we need to scalarize this intrinsic.
1413 unsigned ScalarizationCost = ScalarizationCostPassed;
1414 unsigned ScalarCalls = 1;
1415 Type *ScalarRetTy = RetTy;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001416 if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
1417 if (!SkipScalarizationCost)
1418 ScalarizationCost = getScalarizationOverhead(RetVTy, true, false);
1419 ScalarCalls = std::max(ScalarCalls,
1420 cast<FixedVectorType>(RetVTy)->getNumElements());
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001421 ScalarRetTy = RetTy->getScalarType();
1422 }
1423 SmallVector<Type *, 4> ScalarTys;
1424 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1425 Type *Ty = Tys[i];
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001426 if (auto *VTy = dyn_cast<VectorType>(Ty)) {
1427 if (!SkipScalarizationCost)
1428 ScalarizationCost += getScalarizationOverhead(VTy, false, true);
1429 ScalarCalls = std::max(ScalarCalls,
1430 cast<FixedVectorType>(VTy)->getNumElements());
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001431 Ty = Ty->getScalarType();
1432 }
1433 ScalarTys.push_back(Ty);
1434 }
1435 if (ScalarCalls == 1)
1436 return 1; // Return cost of a scalar intrinsic. Assume it to be cheap.
1437
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001438 IntrinsicCostAttributes ScalarAttrs(IID, ScalarRetTy, ScalarTys, FMF);
Andrew Walbran16937d02019-10-22 13:54:20 +01001439 unsigned ScalarCost =
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001440 thisT()->getIntrinsicInstrCost(ScalarAttrs, CostKind);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001441
1442 return ScalarCalls * ScalarCost + ScalarizationCost;
1443 }
1444 // Look for intrinsics that can be lowered directly or turned into a scalar
1445 // intrinsic call.
1446 case Intrinsic::sqrt:
1447 ISDs.push_back(ISD::FSQRT);
1448 break;
1449 case Intrinsic::sin:
1450 ISDs.push_back(ISD::FSIN);
1451 break;
1452 case Intrinsic::cos:
1453 ISDs.push_back(ISD::FCOS);
1454 break;
1455 case Intrinsic::exp:
1456 ISDs.push_back(ISD::FEXP);
1457 break;
1458 case Intrinsic::exp2:
1459 ISDs.push_back(ISD::FEXP2);
1460 break;
1461 case Intrinsic::log:
1462 ISDs.push_back(ISD::FLOG);
1463 break;
1464 case Intrinsic::log10:
1465 ISDs.push_back(ISD::FLOG10);
1466 break;
1467 case Intrinsic::log2:
1468 ISDs.push_back(ISD::FLOG2);
1469 break;
1470 case Intrinsic::fabs:
1471 ISDs.push_back(ISD::FABS);
1472 break;
Andrew Scull0372a572018-11-16 15:47:06 +00001473 case Intrinsic::canonicalize:
1474 ISDs.push_back(ISD::FCANONICALIZE);
1475 break;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001476 case Intrinsic::minnum:
1477 ISDs.push_back(ISD::FMINNUM);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001478 break;
1479 case Intrinsic::maxnum:
1480 ISDs.push_back(ISD::FMAXNUM);
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001481 break;
1482 case Intrinsic::minimum:
1483 ISDs.push_back(ISD::FMINIMUM);
1484 break;
1485 case Intrinsic::maximum:
1486 ISDs.push_back(ISD::FMAXIMUM);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001487 break;
1488 case Intrinsic::copysign:
1489 ISDs.push_back(ISD::FCOPYSIGN);
1490 break;
1491 case Intrinsic::floor:
1492 ISDs.push_back(ISD::FFLOOR);
1493 break;
1494 case Intrinsic::ceil:
1495 ISDs.push_back(ISD::FCEIL);
1496 break;
1497 case Intrinsic::trunc:
1498 ISDs.push_back(ISD::FTRUNC);
1499 break;
1500 case Intrinsic::nearbyint:
1501 ISDs.push_back(ISD::FNEARBYINT);
1502 break;
1503 case Intrinsic::rint:
1504 ISDs.push_back(ISD::FRINT);
1505 break;
1506 case Intrinsic::round:
1507 ISDs.push_back(ISD::FROUND);
1508 break;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001509 case Intrinsic::roundeven:
1510 ISDs.push_back(ISD::FROUNDEVEN);
1511 break;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001512 case Intrinsic::pow:
1513 ISDs.push_back(ISD::FPOW);
1514 break;
1515 case Intrinsic::fma:
1516 ISDs.push_back(ISD::FMA);
1517 break;
1518 case Intrinsic::fmuladd:
1519 ISDs.push_back(ISD::FMA);
1520 break;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001521 case Intrinsic::experimental_constrained_fmuladd:
1522 ISDs.push_back(ISD::STRICT_FMA);
1523 break;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001524 // FIXME: We should return 0 whenever getIntrinsicCost == TCC_Free.
1525 case Intrinsic::lifetime_start:
1526 case Intrinsic::lifetime_end:
1527 case Intrinsic::sideeffect:
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001528 case Intrinsic::pseudoprobe:
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001529 return 0;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001530 case Intrinsic::masked_store: {
1531 Type *Ty = Tys[0];
1532 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1533 return thisT()->getMaskedMemoryOpCost(Instruction::Store, Ty, TyAlign, 0,
1534 CostKind);
1535 }
1536 case Intrinsic::masked_load: {
1537 Type *Ty = RetTy;
1538 Align TyAlign = thisT()->DL.getABITypeAlign(Ty);
1539 return thisT()->getMaskedMemoryOpCost(Instruction::Load, Ty, TyAlign, 0,
1540 CostKind);
1541 }
1542 case Intrinsic::vector_reduce_add:
1543 return thisT()->getArithmeticReductionCost(Instruction::Add, VecOpTy,
1544 /*IsPairwiseForm=*/false,
1545 CostKind);
1546 case Intrinsic::vector_reduce_mul:
1547 return thisT()->getArithmeticReductionCost(Instruction::Mul, VecOpTy,
1548 /*IsPairwiseForm=*/false,
1549 CostKind);
1550 case Intrinsic::vector_reduce_and:
1551 return thisT()->getArithmeticReductionCost(Instruction::And, VecOpTy,
1552 /*IsPairwiseForm=*/false,
1553 CostKind);
1554 case Intrinsic::vector_reduce_or:
1555 return thisT()->getArithmeticReductionCost(Instruction::Or, VecOpTy,
1556 /*IsPairwiseForm=*/false,
1557 CostKind);
1558 case Intrinsic::vector_reduce_xor:
1559 return thisT()->getArithmeticReductionCost(Instruction::Xor, VecOpTy,
1560 /*IsPairwiseForm=*/false,
1561 CostKind);
1562 case Intrinsic::vector_reduce_fadd:
1563 // FIXME: Add new flag for cost of strict reductions.
1564 return thisT()->getArithmeticReductionCost(Instruction::FAdd, VecOpTy,
1565 /*IsPairwiseForm=*/false,
1566 CostKind);
1567 case Intrinsic::vector_reduce_fmul:
1568 // FIXME: Add new flag for cost of strict reductions.
1569 return thisT()->getArithmeticReductionCost(Instruction::FMul, VecOpTy,
1570 /*IsPairwiseForm=*/false,
1571 CostKind);
1572 case Intrinsic::vector_reduce_smax:
1573 case Intrinsic::vector_reduce_smin:
1574 case Intrinsic::vector_reduce_fmax:
1575 case Intrinsic::vector_reduce_fmin:
1576 return thisT()->getMinMaxReductionCost(
1577 VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
1578 /*IsPairwiseForm=*/false,
1579 /*IsUnsigned=*/false, CostKind);
1580 case Intrinsic::vector_reduce_umax:
1581 case Intrinsic::vector_reduce_umin:
1582 return thisT()->getMinMaxReductionCost(
1583 VecOpTy, cast<VectorType>(CmpInst::makeCmpResultType(VecOpTy)),
1584 /*IsPairwiseForm=*/false,
1585 /*IsUnsigned=*/true, CostKind);
1586 case Intrinsic::abs:
1587 case Intrinsic::smax:
1588 case Intrinsic::smin:
1589 case Intrinsic::umax:
1590 case Intrinsic::umin: {
1591 // abs(X) = select(icmp(X,0),X,sub(0,X))
1592 // minmax(X,Y) = select(icmp(X,Y),X,Y)
1593 Type *CondTy = RetTy->getWithNewBitWidth(1);
1594 unsigned Cost = 0;
1595 // TODO: Ideally getCmpSelInstrCost would accept an icmp condition code.
1596 Cost +=
1597 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1598 CmpInst::BAD_ICMP_PREDICATE, CostKind);
1599 Cost +=
1600 thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1601 CmpInst::BAD_ICMP_PREDICATE, CostKind);
1602 // TODO: Should we add an OperandValueProperties::OP_Zero property?
1603 if (IID == Intrinsic::abs)
1604 Cost += thisT()->getArithmeticInstrCost(
1605 BinaryOperator::Sub, RetTy, CostKind, TTI::OK_UniformConstantValue);
1606 return Cost;
1607 }
Andrew Walbran16937d02019-10-22 13:54:20 +01001608 case Intrinsic::sadd_sat:
1609 case Intrinsic::ssub_sat: {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001610 Type *CondTy = RetTy->getWithNewBitWidth(1);
Andrew Walbran16937d02019-10-22 13:54:20 +01001611
1612 Type *OpTy = StructType::create({RetTy, CondTy});
1613 Intrinsic::ID OverflowOp = IID == Intrinsic::sadd_sat
1614 ? Intrinsic::sadd_with_overflow
1615 : Intrinsic::ssub_with_overflow;
1616
1617 // SatMax -> Overflow && SumDiff < 0
1618 // SatMin -> Overflow && SumDiff >= 0
1619 unsigned Cost = 0;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001620 IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1621 ScalarizationCostPassed);
1622 Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1623 Cost +=
1624 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, RetTy, CondTy,
1625 CmpInst::BAD_ICMP_PREDICATE, CostKind);
1626 Cost += 2 * thisT()->getCmpSelInstrCost(
1627 BinaryOperator::Select, RetTy, CondTy,
1628 CmpInst::BAD_ICMP_PREDICATE, CostKind);
Andrew Walbran16937d02019-10-22 13:54:20 +01001629 return Cost;
1630 }
1631 case Intrinsic::uadd_sat:
1632 case Intrinsic::usub_sat: {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001633 Type *CondTy = RetTy->getWithNewBitWidth(1);
Andrew Walbran16937d02019-10-22 13:54:20 +01001634
1635 Type *OpTy = StructType::create({RetTy, CondTy});
1636 Intrinsic::ID OverflowOp = IID == Intrinsic::uadd_sat
1637 ? Intrinsic::uadd_with_overflow
1638 : Intrinsic::usub_with_overflow;
1639
1640 unsigned Cost = 0;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001641 IntrinsicCostAttributes Attrs(OverflowOp, OpTy, {RetTy, RetTy}, FMF,
1642 ScalarizationCostPassed);
1643 Cost += thisT()->getIntrinsicInstrCost(Attrs, CostKind);
1644 Cost +=
1645 thisT()->getCmpSelInstrCost(BinaryOperator::Select, RetTy, CondTy,
1646 CmpInst::BAD_ICMP_PREDICATE, CostKind);
Andrew Walbran16937d02019-10-22 13:54:20 +01001647 return Cost;
1648 }
Andrew Walbran3d2c1972020-04-07 12:24:26 +01001649 case Intrinsic::smul_fix:
1650 case Intrinsic::umul_fix: {
1651 unsigned ExtSize = RetTy->getScalarSizeInBits() * 2;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001652 Type *ExtTy = RetTy->getWithNewBitWidth(ExtSize);
Andrew Walbran3d2c1972020-04-07 12:24:26 +01001653
1654 unsigned ExtOp =
1655 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001656 TTI::CastContextHint CCH = TTI::CastContextHint::None;
Andrew Walbran3d2c1972020-04-07 12:24:26 +01001657
1658 unsigned Cost = 0;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001659 Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, RetTy, CCH, CostKind);
Andrew Walbran3d2c1972020-04-07 12:24:26 +01001660 Cost +=
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001661 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
1662 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, RetTy, ExtTy,
1663 CCH, CostKind);
1664 Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, RetTy,
1665 CostKind, TTI::OK_AnyValue,
1666 TTI::OK_UniformConstantValue);
1667 Cost += thisT()->getArithmeticInstrCost(Instruction::Shl, RetTy, CostKind,
1668 TTI::OK_AnyValue,
1669 TTI::OK_UniformConstantValue);
1670 Cost += thisT()->getArithmeticInstrCost(Instruction::Or, RetTy, CostKind);
Andrew Walbran3d2c1972020-04-07 12:24:26 +01001671 return Cost;
1672 }
Andrew Walbran16937d02019-10-22 13:54:20 +01001673 case Intrinsic::sadd_with_overflow:
1674 case Intrinsic::ssub_with_overflow: {
1675 Type *SumTy = RetTy->getContainedType(0);
1676 Type *OverflowTy = RetTy->getContainedType(1);
1677 unsigned Opcode = IID == Intrinsic::sadd_with_overflow
1678 ? BinaryOperator::Add
1679 : BinaryOperator::Sub;
1680
1681 // LHSSign -> LHS >= 0
1682 // RHSSign -> RHS >= 0
1683 // SumSign -> Sum >= 0
1684 //
1685 // Add:
1686 // Overflow -> (LHSSign == RHSSign) && (LHSSign != SumSign)
1687 // Sub:
1688 // Overflow -> (LHSSign != RHSSign) && (LHSSign != SumSign)
1689 unsigned Cost = 0;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001690 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
1691 Cost += 3 * thisT()->getCmpSelInstrCost(
1692 Instruction::ICmp, SumTy, OverflowTy,
1693 CmpInst::BAD_ICMP_PREDICATE, CostKind);
1694 Cost += 2 * thisT()->getCmpSelInstrCost(
1695 Instruction::Select, OverflowTy, OverflowTy,
1696 CmpInst::BAD_ICMP_PREDICATE, CostKind);
1697 Cost += thisT()->getArithmeticInstrCost(BinaryOperator::And, OverflowTy,
1698 CostKind);
Andrew Walbran16937d02019-10-22 13:54:20 +01001699 return Cost;
1700 }
1701 case Intrinsic::uadd_with_overflow:
1702 case Intrinsic::usub_with_overflow: {
1703 Type *SumTy = RetTy->getContainedType(0);
1704 Type *OverflowTy = RetTy->getContainedType(1);
1705 unsigned Opcode = IID == Intrinsic::uadd_with_overflow
1706 ? BinaryOperator::Add
1707 : BinaryOperator::Sub;
1708
1709 unsigned Cost = 0;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001710 Cost += thisT()->getArithmeticInstrCost(Opcode, SumTy, CostKind);
1711 Cost +=
1712 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, SumTy, OverflowTy,
1713 CmpInst::BAD_ICMP_PREDICATE, CostKind);
Andrew Walbran16937d02019-10-22 13:54:20 +01001714 return Cost;
1715 }
Andrew Walbran3d2c1972020-04-07 12:24:26 +01001716 case Intrinsic::smul_with_overflow:
1717 case Intrinsic::umul_with_overflow: {
1718 Type *MulTy = RetTy->getContainedType(0);
1719 Type *OverflowTy = RetTy->getContainedType(1);
1720 unsigned ExtSize = MulTy->getScalarSizeInBits() * 2;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001721 Type *ExtTy = MulTy->getWithNewBitWidth(ExtSize);
Andrew Walbran3d2c1972020-04-07 12:24:26 +01001722
1723 unsigned ExtOp =
1724 IID == Intrinsic::smul_fix ? Instruction::SExt : Instruction::ZExt;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001725 TTI::CastContextHint CCH = TTI::CastContextHint::None;
Andrew Walbran3d2c1972020-04-07 12:24:26 +01001726
1727 unsigned Cost = 0;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001728 Cost += 2 * thisT()->getCastInstrCost(ExtOp, ExtTy, MulTy, CCH, CostKind);
Andrew Walbran3d2c1972020-04-07 12:24:26 +01001729 Cost +=
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001730 thisT()->getArithmeticInstrCost(Instruction::Mul, ExtTy, CostKind);
1731 Cost += 2 * thisT()->getCastInstrCost(Instruction::Trunc, MulTy, ExtTy,
1732 CCH, CostKind);
1733 Cost += thisT()->getArithmeticInstrCost(Instruction::LShr, MulTy,
1734 CostKind, TTI::OK_AnyValue,
1735 TTI::OK_UniformConstantValue);
Andrew Walbran3d2c1972020-04-07 12:24:26 +01001736
1737 if (IID == Intrinsic::smul_with_overflow)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001738 Cost += thisT()->getArithmeticInstrCost(Instruction::AShr, MulTy,
1739 CostKind, TTI::OK_AnyValue,
1740 TTI::OK_UniformConstantValue);
Andrew Walbran3d2c1972020-04-07 12:24:26 +01001741
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001742 Cost +=
1743 thisT()->getCmpSelInstrCost(BinaryOperator::ICmp, MulTy, OverflowTy,
1744 CmpInst::BAD_ICMP_PREDICATE, CostKind);
Andrew Walbran3d2c1972020-04-07 12:24:26 +01001745 return Cost;
1746 }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001747 case Intrinsic::ctpop:
1748 ISDs.push_back(ISD::CTPOP);
1749 // In case of legalization use TCC_Expensive. This is cheaper than a
1750 // library call but still not a cheap instruction.
1751 SingleCallCost = TargetTransformInfo::TCC_Expensive;
1752 break;
1753 // FIXME: ctlz, cttz, ...
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001754 case Intrinsic::bswap:
1755 ISDs.push_back(ISD::BSWAP);
1756 break;
1757 case Intrinsic::bitreverse:
1758 ISDs.push_back(ISD::BITREVERSE);
1759 break;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001760 }
1761
1762 const TargetLoweringBase *TLI = getTLI();
1763 std::pair<unsigned, MVT> LT = TLI->getTypeLegalizationCost(DL, RetTy);
1764
1765 SmallVector<unsigned, 2> LegalCost;
1766 SmallVector<unsigned, 2> CustomCost;
1767 for (unsigned ISD : ISDs) {
1768 if (TLI->isOperationLegalOrPromote(ISD, LT.second)) {
Andrew Scull0372a572018-11-16 15:47:06 +00001769 if (IID == Intrinsic::fabs && LT.second.isFloatingPoint() &&
1770 TLI->isFAbsFree(LT.second)) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001771 return 0;
1772 }
1773
1774 // The operation is legal. Assume it costs 1.
1775 // If the type is split to multiple registers, assume that there is some
1776 // overhead to this.
1777 // TODO: Once we have extract/insert subvector cost we need to use them.
1778 if (LT.first > 1)
1779 LegalCost.push_back(LT.first * 2);
1780 else
1781 LegalCost.push_back(LT.first * 1);
1782 } else if (!TLI->isOperationExpand(ISD, LT.second)) {
1783 // If the operation is custom lowered then assume
1784 // that the code is twice as expensive.
1785 CustomCost.push_back(LT.first * 2);
1786 }
1787 }
1788
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001789 auto *MinLegalCostI = std::min_element(LegalCost.begin(), LegalCost.end());
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001790 if (MinLegalCostI != LegalCost.end())
1791 return *MinLegalCostI;
1792
Andrew Walbran16937d02019-10-22 13:54:20 +01001793 auto MinCustomCostI =
1794 std::min_element(CustomCost.begin(), CustomCost.end());
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001795 if (MinCustomCostI != CustomCost.end())
1796 return *MinCustomCostI;
1797
1798 // If we can't lower fmuladd into an FMA estimate the cost as a floating
1799 // point mul followed by an add.
1800 if (IID == Intrinsic::fmuladd)
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001801 return thisT()->getArithmeticInstrCost(BinaryOperator::FMul, RetTy,
1802 CostKind) +
1803 thisT()->getArithmeticInstrCost(BinaryOperator::FAdd, RetTy,
1804 CostKind);
1805 if (IID == Intrinsic::experimental_constrained_fmuladd) {
1806 IntrinsicCostAttributes FMulAttrs(
1807 Intrinsic::experimental_constrained_fmul, RetTy, Tys);
1808 IntrinsicCostAttributes FAddAttrs(
1809 Intrinsic::experimental_constrained_fadd, RetTy, Tys);
1810 return thisT()->getIntrinsicInstrCost(FMulAttrs, CostKind) +
1811 thisT()->getIntrinsicInstrCost(FAddAttrs, CostKind);
1812 }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001813
1814 // Else, assume that we need to scalarize this intrinsic. For math builtins
1815 // this will emit a costly libcall, adding call overhead and spills. Make it
1816 // very expensive.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001817 if (auto *RetVTy = dyn_cast<VectorType>(RetTy)) {
1818 unsigned ScalarizationCost = SkipScalarizationCost ?
1819 ScalarizationCostPassed : getScalarizationOverhead(RetVTy, true, false);
1820
1821 unsigned ScalarCalls = cast<FixedVectorType>(RetVTy)->getNumElements();
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001822 SmallVector<Type *, 4> ScalarTys;
1823 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
1824 Type *Ty = Tys[i];
1825 if (Ty->isVectorTy())
1826 Ty = Ty->getScalarType();
1827 ScalarTys.push_back(Ty);
1828 }
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001829 IntrinsicCostAttributes Attrs(IID, RetTy->getScalarType(), ScalarTys, FMF);
1830 unsigned ScalarCost = thisT()->getIntrinsicInstrCost(Attrs, CostKind);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001831 for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) {
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001832 if (auto *VTy = dyn_cast<VectorType>(Tys[i])) {
1833 if (!ICA.skipScalarizationCost())
1834 ScalarizationCost += getScalarizationOverhead(VTy, false, true);
1835 ScalarCalls = std::max(ScalarCalls,
1836 cast<FixedVectorType>(VTy)->getNumElements());
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001837 }
1838 }
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001839 return ScalarCalls * ScalarCost + ScalarizationCost;
1840 }
1841
1842 // This is going to be turned into a library call, make it expensive.
1843 return SingleCallCost;
1844 }
1845
Andrew Scullcdfcccc2018-10-05 20:58:37 +01001846 /// Compute a cost of the given call instruction.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001847 ///
1848 /// Compute the cost of calling function F with return type RetTy and
1849 /// argument types Tys. F might be nullptr, in this case the cost of an
1850 /// arbitrary call with the specified signature will be returned.
1851 /// This is used, for instance, when we estimate call of a vector
1852 /// counterpart of the given function.
1853 /// \param F Called function, might be nullptr.
1854 /// \param RetTy Return value types.
1855 /// \param Tys Argument types.
1856 /// \returns The cost of Call instruction.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001857 unsigned getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,
1858 TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) {
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001859 return 10;
1860 }
1861
1862 unsigned getNumberOfParts(Type *Tp) {
1863 std::pair<unsigned, MVT> LT = getTLI()->getTypeLegalizationCost(DL, Tp);
1864 return LT.first;
1865 }
1866
1867 unsigned getAddressComputationCost(Type *Ty, ScalarEvolution *,
1868 const SCEV *) {
1869 return 0;
1870 }
1871
1872 /// Try to calculate arithmetic and shuffle op costs for reduction operations.
1873 /// We're assuming that reduction operation are performing the following way:
1874 /// 1. Non-pairwise reduction
1875 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1876 /// <n x i32> <i32 n/2, i32 n/2 + 1, ..., i32 n, i32 undef, ..., i32 undef>
1877 /// \----------------v-------------/ \----------v------------/
1878 /// n/2 elements n/2 elements
1879 /// %red1 = op <n x t> %val, <n x t> val1
1880 /// After this operation we have a vector %red1 where only the first n/2
1881 /// elements are meaningful, the second n/2 elements are undefined and can be
1882 /// dropped. All other operations are actually working with the vector of
1883 /// length n/2, not n, though the real vector length is still n.
1884 /// %val2 = shufflevector<n x t> %red1, <n x t> %undef,
1885 /// <n x i32> <i32 n/4, i32 n/4 + 1, ..., i32 n/2, i32 undef, ..., i32 undef>
1886 /// \----------------v-------------/ \----------v------------/
1887 /// n/4 elements 3*n/4 elements
1888 /// %red2 = op <n x t> %red1, <n x t> val2 - working with the vector of
1889 /// length n/2, the resulting vector has length n/4 etc.
1890 /// 2. Pairwise reduction:
1891 /// Everything is the same except for an additional shuffle operation which
1892 /// is used to produce operands for pairwise kind of reductions.
1893 /// %val1 = shufflevector<n x t> %val, <n x t> %undef,
1894 /// <n x i32> <i32 0, i32 2, ..., i32 n-2, i32 undef, ..., i32 undef>
1895 /// \-------------v----------/ \----------v------------/
1896 /// n/2 elements n/2 elements
1897 /// %val2 = shufflevector<n x t> %val, <n x t> %undef,
1898 /// <n x i32> <i32 1, i32 3, ..., i32 n-1, i32 undef, ..., i32 undef>
1899 /// \-------------v----------/ \----------v------------/
1900 /// n/2 elements n/2 elements
1901 /// %red1 = op <n x t> %val1, <n x t> val2
1902 /// Again, the operation is performed on <n x t> vector, but the resulting
1903 /// vector %red1 is <n/2 x t> vector.
1904 ///
1905 /// The cost model should take into account that the actual length of the
1906 /// vector is reduced on each iteration.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001907 unsigned getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,
1908 bool IsPairwise,
1909 TTI::TargetCostKind CostKind) {
1910 Type *ScalarTy = Ty->getElementType();
1911 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001912 unsigned NumReduxLevels = Log2_32(NumVecElts);
1913 unsigned ArithCost = 0;
1914 unsigned ShuffleCost = 0;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001915 std::pair<unsigned, MVT> LT =
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001916 thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001917 unsigned LongVectorCount = 0;
1918 unsigned MVTLen =
1919 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1920 while (NumVecElts > MVTLen) {
1921 NumVecElts /= 2;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001922 VectorType *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001923 // Assume the pairwise shuffles add a cost.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001924 ShuffleCost +=
1925 (IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
1926 Ty, NumVecElts, SubTy);
1927 ArithCost += thisT()->getArithmeticInstrCost(Opcode, SubTy, CostKind);
Andrew Walbran16937d02019-10-22 13:54:20 +01001928 Ty = SubTy;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001929 ++LongVectorCount;
1930 }
Andrew Walbran16937d02019-10-22 13:54:20 +01001931
1932 NumReduxLevels -= LongVectorCount;
1933
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001934 // The minimal length of the vector is limited by the real length of vector
1935 // operations performed on the current platform. That's why several final
1936 // reduction operations are performed on the vectors with the same
1937 // architecture-dependent length.
Andrew Walbran16937d02019-10-22 13:54:20 +01001938
1939 // Non pairwise reductions need one shuffle per reduction level. Pairwise
1940 // reductions need two shuffles on every level, but the last one. On that
1941 // level one of the shuffles is <0, u, u, ...> which is identity.
1942 unsigned NumShuffles = NumReduxLevels;
1943 if (IsPairwise && NumReduxLevels >= 1)
1944 NumShuffles += NumReduxLevels - 1;
1945 ShuffleCost += NumShuffles *
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001946 thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty);
1947 ArithCost += NumReduxLevels * thisT()->getArithmeticInstrCost(Opcode, Ty);
Andrew Walbran16937d02019-10-22 13:54:20 +01001948 return ShuffleCost + ArithCost +
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001949 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001950 }
1951
1952 /// Try to calculate op costs for min/max reduction operations.
1953 /// \param CondTy Conditional type for the Select instruction.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001954 unsigned getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,
1955 bool IsPairwise, bool IsUnsigned,
1956 TTI::TargetCostKind CostKind) {
1957 Type *ScalarTy = Ty->getElementType();
1958 Type *ScalarCondTy = CondTy->getElementType();
1959 unsigned NumVecElts = cast<FixedVectorType>(Ty)->getNumElements();
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001960 unsigned NumReduxLevels = Log2_32(NumVecElts);
1961 unsigned CmpOpcode;
1962 if (Ty->isFPOrFPVectorTy()) {
1963 CmpOpcode = Instruction::FCmp;
1964 } else {
1965 assert(Ty->isIntOrIntVectorTy() &&
1966 "expecting floating point or integer type for min/max reduction");
1967 CmpOpcode = Instruction::ICmp;
1968 }
1969 unsigned MinMaxCost = 0;
1970 unsigned ShuffleCost = 0;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001971 std::pair<unsigned, MVT> LT =
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001972 thisT()->getTLI()->getTypeLegalizationCost(DL, Ty);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001973 unsigned LongVectorCount = 0;
1974 unsigned MVTLen =
1975 LT.second.isVector() ? LT.second.getVectorNumElements() : 1;
1976 while (NumVecElts > MVTLen) {
1977 NumVecElts /= 2;
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001978 auto *SubTy = FixedVectorType::get(ScalarTy, NumVecElts);
1979 CondTy = FixedVectorType::get(ScalarCondTy, NumVecElts);
Andrew Walbran16937d02019-10-22 13:54:20 +01001980
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001981 // Assume the pairwise shuffles add a cost.
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001982 ShuffleCost +=
1983 (IsPairwise + 1) * thisT()->getShuffleCost(TTI::SK_ExtractSubvector,
1984 Ty, NumVecElts, SubTy);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001985 MinMaxCost +=
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02001986 thisT()->getCmpSelInstrCost(CmpOpcode, SubTy, CondTy,
1987 CmpInst::BAD_ICMP_PREDICATE, CostKind) +
1988 thisT()->getCmpSelInstrCost(Instruction::Select, SubTy, CondTy,
1989 CmpInst::BAD_ICMP_PREDICATE, CostKind);
Andrew Walbran16937d02019-10-22 13:54:20 +01001990 Ty = SubTy;
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001991 ++LongVectorCount;
1992 }
Andrew Walbran16937d02019-10-22 13:54:20 +01001993
1994 NumReduxLevels -= LongVectorCount;
1995
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01001996 // The minimal length of the vector is limited by the real length of vector
1997 // operations performed on the current platform. That's why several final
1998 // reduction opertions are perfomed on the vectors with the same
1999 // architecture-dependent length.
Andrew Walbran16937d02019-10-22 13:54:20 +01002000
2001 // Non pairwise reductions need one shuffle per reduction level. Pairwise
2002 // reductions need two shuffles on every level, but the last one. On that
2003 // level one of the shuffles is <0, u, u, ...> which is identity.
2004 unsigned NumShuffles = NumReduxLevels;
2005 if (IsPairwise && NumReduxLevels >= 1)
2006 NumShuffles += NumReduxLevels - 1;
2007 ShuffleCost += NumShuffles *
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02002008 thisT()->getShuffleCost(TTI::SK_PermuteSingleSrc, Ty, 0, Ty);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01002009 MinMaxCost +=
Andrew Walbran16937d02019-10-22 13:54:20 +01002010 NumReduxLevels *
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02002011 (thisT()->getCmpSelInstrCost(CmpOpcode, Ty, CondTy,
2012 CmpInst::BAD_ICMP_PREDICATE, CostKind) +
2013 thisT()->getCmpSelInstrCost(Instruction::Select, Ty, CondTy,
2014 CmpInst::BAD_ICMP_PREDICATE, CostKind));
Andrew Walbran16937d02019-10-22 13:54:20 +01002015 // The last min/max should be in vector registers and we counted it above.
2016 // So just need a single extractelement.
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01002017 return ShuffleCost + MinMaxCost +
Olivier Deprezf4ef2d02021-04-20 13:36:24 +02002018 thisT()->getVectorInstrCost(Instruction::ExtractElement, Ty, 0);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01002019 }
2020
2021 unsigned getVectorSplitCost() { return 1; }
2022
2023 /// @}
2024};
2025
Andrew Scullcdfcccc2018-10-05 20:58:37 +01002026/// Concrete BasicTTIImpl that can be used if no further customization
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01002027/// is needed.
2028class BasicTTIImpl : public BasicTTIImplBase<BasicTTIImpl> {
2029 using BaseT = BasicTTIImplBase<BasicTTIImpl>;
2030
2031 friend class BasicTTIImplBase<BasicTTIImpl>;
2032
2033 const TargetSubtargetInfo *ST;
2034 const TargetLoweringBase *TLI;
2035
2036 const TargetSubtargetInfo *getST() const { return ST; }
2037 const TargetLoweringBase *getTLI() const { return TLI; }
2038
2039public:
Andrew Scullcdfcccc2018-10-05 20:58:37 +01002040 explicit BasicTTIImpl(const TargetMachine *TM, const Function &F);
Andrew Scull5e1ddfa2018-08-14 10:06:54 +01002041};
2042
2043} // end namespace llvm
2044
2045#endif // LLVM_CODEGEN_BASICTTIIMPL_H