Blame - linux-x64/clang/include/llvm/Analysis/TargetTransformInfo.h - hafnium/prebuilts

2018-08-14 10:06:54 +0100

[diff] [blame]

1

//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//

2

//

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

3

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4

// See https://llvm.org/LICENSE.txt for license information.

5

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

6

//

7

//===----------------------------------------------------------------------===//

8

/// \file

9

/// This pass exposes codegen information to IR-level passes. Every

10

/// transformation that uses codegen information is broken into three parts:

11

/// 1. The IR-level analysis pass.

12

/// 2. The IR-level transformation interface which provides the needed

13

/// information.

14

/// 3. Codegen-level implementation which uses target-specific hooks.

15

///

16

/// This file defines #2, which is the interface that IR-level transformations

17

/// use for querying the codegen.

18

///

19

//===----------------------------------------------------------------------===//

20

21

#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H

22

#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H

23

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

24

#include "llvm/IR/InstrTypes.h"

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

25

#include "llvm/IR/Operator.h"

26

#include "llvm/IR/PassManager.h"

27

#include "llvm/Pass.h"

28

#include "llvm/Support/AtomicOrdering.h"

29

#include "llvm/Support/DataTypes.h"

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

30

#include "llvm/Support/InstructionCost.h"

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

31

#include <functional>

namespace llvm {

namespace Intrinsic {

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

36

typedef unsigned ID;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

37

}

38

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

39

class AssumptionCache;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

40

class BlockFrequencyInfo;

41

class DominatorTree;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

42

class BranchInst;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

43

class CallBase;

44

class ExtractElementInst;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

45

class Function;

46

class GlobalValue;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

47

class InstCombiner;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

48

class IntrinsicInst;

49

class LoadInst;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

50

class LoopAccessInfo;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

51

class Loop;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

52

class LoopInfo;

53

class ProfileSummaryInfo;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

54

class SCEV;

55

class ScalarEvolution;

56

class StoreInst;

57

class SwitchInst;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

58

class TargetLibraryInfo;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

59

class Type;

60

class User;

61

class Value;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

62

struct KnownBits;

63

template <typename T> class Optional;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

64

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

65

/// Information about a load/store intrinsic defined by the target.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

66

struct MemIntrinsicInfo {

67

/// This is the pointer that the intrinsic is loading from or storing to.

68

/// If this is non-null, then analysis/optimization passes can assume that

69

/// this intrinsic is functionally equivalent to a load/store from this

70

/// pointer.

71

Value *PtrVal = nullptr;

72

73

// Ordering for atomic operations.

74

AtomicOrdering Ordering = AtomicOrdering::NotAtomic;

75

76

// Same Id is set by the target for corresponding load/store intrinsics.

77

unsigned short MatchingId = 0;

78

79

bool ReadMem = false;

80

bool WriteMem = false;

81

bool IsVolatile = false;

82

83

bool isUnordered() const {

84

return (Ordering == AtomicOrdering::NotAtomic ||

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

85

Ordering == AtomicOrdering::Unordered) &&

86

!IsVolatile;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

}

};

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

90

/// Attributes of a target dependent hardware loop.

91

struct HardwareLoopInfo {

92

HardwareLoopInfo() = delete;

93

HardwareLoopInfo(Loop *L) : L(L) {}

94

Loop *L = nullptr;

95

BasicBlock *ExitBlock = nullptr;

96

BranchInst *ExitBranch = nullptr;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

97

const SCEV *TripCount = nullptr;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

98

IntegerType *CountType = nullptr;

99

Value *LoopDecrement = nullptr; // Decrement the loop counter by this

100

// value in every iteration.

101

bool IsNestingLegal = false; // Can a hardware loop be a parent to

102

// another hardware loop?

103

bool CounterInReg = false; // Should loop counter be updated in

104

// the loop via a phi?

105

bool PerformEntryTest = false; // Generate the intrinsic which also performs

106

// icmp ne zero on the loop counter value and

107

// produces an i1 to guard the loop entry.

108

bool isHardwareLoopCandidate(ScalarEvolution &SE, LoopInfo &LI,

109

DominatorTree &DT, bool ForceNestedLoop = false,

110

bool ForceHardwareLoopPHI = false);

111

bool canAnalyze(LoopInfo &LI);

112

};

113

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

114

class IntrinsicCostAttributes {

115

const IntrinsicInst *II = nullptr;

116

Type *RetTy = nullptr;

117

Intrinsic::ID IID;

118

SmallVector<Type *, 4> ParamTys;

119

SmallVector<const Value *, 4> Arguments;

120

FastMathFlags FMF;

121

ElementCount VF = ElementCount::getFixed(1);

122

// If ScalarizationCost is UINT_MAX, the cost of scalarizing the

123

// arguments and the return value will be computed based on types.

124

unsigned ScalarizationCost = std::numeric_limits<unsigned>::max();

125

126

public:

127

IntrinsicCostAttributes(const IntrinsicInst &I);

128

129

IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI);

130

131

IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,

132

ElementCount Factor);

133

134

IntrinsicCostAttributes(Intrinsic::ID Id, const CallBase &CI,

135

ElementCount Factor, unsigned ScalarCost);

136

137

IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,

138

ArrayRef<Type *> Tys, FastMathFlags Flags);

139

140

IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,

141

ArrayRef<Type *> Tys, FastMathFlags Flags,

142

unsigned ScalarCost);

143

144

IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,

145

ArrayRef<Type *> Tys, FastMathFlags Flags,

146

unsigned ScalarCost,

147

const IntrinsicInst *I);

148

149

IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,

150

ArrayRef<Type *> Tys);

151

152

IntrinsicCostAttributes(Intrinsic::ID Id, Type *RTy,

153

ArrayRef<const Value *> Args);

154

155

Intrinsic::ID getID() const { return IID; }

156

const IntrinsicInst *getInst() const { return II; }

157

Type *getReturnType() const { return RetTy; }

158

ElementCount getVectorFactor() const { return VF; }

159

FastMathFlags getFlags() const { return FMF; }

160

unsigned getScalarizationCost() const { return ScalarizationCost; }

161

const SmallVectorImpl<const Value *> &getArgs() const { return Arguments; }

162

const SmallVectorImpl<Type *> &getArgTypes() const { return ParamTys; }

163

164

bool isTypeBasedOnly() const {

165

return Arguments.empty();

166

}

167

168

bool skipScalarizationCost() const {

169

return ScalarizationCost != std::numeric_limits<unsigned>::max();

}

};

class TargetTransformInfo;

174

typedef TargetTransformInfo TTI;

175

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

176

/// This pass provides access to the codegen interfaces that are needed

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

177

/// for IR-level transformations.

178

class TargetTransformInfo {

179

public:

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

180

/// Construct a TTI object using a type implementing the \c Concept

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

181

/// API below.

182

///

183

/// This is used by targets to construct a TTI wrapping their target-specific

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

184

/// implementation that encodes appropriate costs for their target.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

185

template <typename T> TargetTransformInfo(T Impl);

186

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

187

/// Construct a baseline TTI object using a minimal implementation of

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

188

/// the \c Concept API below.

189

///

190

/// The TTI implementation will reflect the information in the DataLayout

191

/// provided if non-null.

192

explicit TargetTransformInfo(const DataLayout &DL);

193

194

// Provide move semantics.

195

TargetTransformInfo(TargetTransformInfo &&Arg);

196

TargetTransformInfo &operator=(TargetTransformInfo &&RHS);

197

198

// We need to define the destructor out-of-line to define our sub-classes

199

// out-of-line.

200

~TargetTransformInfo();

201

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

202

/// Handle the invalidation of this information.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

203

///

204

/// When used as a result of \c TargetIRAnalysis this method will be called

205

/// when the function this was computed for changes. When it returns false,

206

/// the information is preserved across those changes.

207

bool invalidate(Function &, const PreservedAnalyses &,

208

FunctionAnalysisManager::Invalidator &) {

209

// FIXME: We should probably in some way ensure that the subtarget

210

// information for a function hasn't changed.

return false;

}

/// \name Generic Target Information

215

/// @{

216

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

217

/// The kind of cost model.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

218

///

219

/// There are several different cost models that can be customized by the

220

/// target. The normalization of each cost model may be target specific.

221

enum TargetCostKind {

222

TCK_RecipThroughput, ///< Reciprocal throughput.

223

TCK_Latency, ///< The latency of instruction.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

224

TCK_CodeSize, ///< Instruction code size.

225

TCK_SizeAndLatency ///< The weighted sum of size and latency.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

226

};

227

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

228

/// Query the cost of a specified instruction.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

229

///

230

/// Clients should use this interface to query the cost of an existing

231

/// instruction. The instruction must have a valid parent (basic block).

232

///

233

/// Note, this method does not cache the cost calculation and it

234

/// can be expensive in some cases.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

235

InstructionCost getInstructionCost(const Instruction *I,

236

enum TargetCostKind kind) const {

237

InstructionCost Cost;

238

switch (kind) {

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

239

case TCK_RecipThroughput:

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

240

Cost = getInstructionThroughput(I);

241

break;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

242

case TCK_Latency:

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

243

Cost = getInstructionLatency(I);

244

break;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

245

case TCK_CodeSize:

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

246

case TCK_SizeAndLatency:

247

Cost = getUserCost(I, kind);

248

break;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

249

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

250

if (Cost == -1)

251

Cost.setInvalid();

252

return Cost;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

253

}

254

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

255

/// Underlying constants for 'cost' values in this interface.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

256

///

257

/// Many APIs in this interface return a cost. This enum defines the

258

/// fundamental values that should be used to interpret (and produce) those

259

/// costs. The costs are returned as an int rather than a member of this

260

/// enumeration because it is expected that the cost of one IR instruction

261

/// may have a multiplicative factor to it or otherwise won't fit directly

262

/// into the enum. Moreover, it is common to sum or average costs which works

263

/// better as simple integral values. Thus this enum only provides constants.

264

/// Also note that the returned costs are signed integers to make it natural

265

/// to add, subtract, and test with zero (a common boundary condition). It is

266

/// not expected that 2^32 is a realistic cost to be modeling at any point.

267

///

268

/// Note that these costs should usually reflect the intersection of code-size

269

/// cost and execution cost. A free instruction is typically one that folds

270

/// into another instruction. For example, reg-to-reg moves can often be

271

/// skipped by renaming the registers in the CPU, but they still are encoded

272

/// and thus wouldn't be considered 'free' here.

273

enum TargetCostConstants {

274

TCC_Free = 0, ///< Expected to fold away in lowering.

275

TCC_Basic = 1, ///< The cost of a typical 'add' instruction.

276

TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.

277

};

278

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

279

/// Estimate the cost of a GEP operation when lowered.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

280

int getGEPCost(Type *PointeeType, const Value *Ptr,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

281

ArrayRef<const Value *> Operands,

282

TargetCostKind CostKind = TCK_SizeAndLatency) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

283

284

/// \returns A value by which our inlining threshold should be multiplied.

285

/// This is primarily used to bump up the inlining threshold wholesale on

286

/// targets where calls are unusually expensive.

287

///

288

/// TODO: This is a rather blunt instrument. Perhaps altering the costs of

289

/// individual classes of instructions would be better.

290

unsigned getInliningThresholdMultiplier() const;

291

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

292

/// \returns Vector bonus in percent.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

293

///

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

294

/// Vector bonuses: We want to more aggressively inline vector-dense kernels

295

/// and apply this bonus based on the percentage of vector instructions. A

296

/// bonus is applied if the vector instructions exceed 50% and half that

297

/// amount is applied if it exceeds 10%. Note that these bonuses are some what

298

/// arbitrary and evolved over time by accident as much as because they are

299

/// principled bonuses.

300

/// FIXME: It would be nice to base the bonus values on something more

301

/// scientific. A target may has no bonus on vector instructions.

302

int getInlinerVectorBonusPercent() const;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

303

304

/// \return the expected cost of a memcpy, which could e.g. depend on the

305

/// source/destination type and alignment and the number of bytes copied.

306

int getMemcpyCost(const Instruction *I) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

307

308

/// \return The estimated number of case clusters when lowering \p 'SI'.

309

/// \p JTSize Set a jump table size only when \p SI is suitable for a jump

310

/// table.

311

unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

312

unsigned &JTSize,

313

ProfileSummaryInfo *PSI,

314

BlockFrequencyInfo *BFI) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

315

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

316

/// Estimate the cost of a given IR user when lowered.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

317

///

318

/// This can estimate the cost of either a ConstantExpr or Instruction when

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

319

/// lowered.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

320

///

321

/// \p Operands is a list of operands which can be a result of transformations

322

/// of the current operands. The number of the operands on the list must equal

323

/// to the number of the current operands the IR user has. Their order on the

324

/// list must be the same as the order of the current operands the IR user

325

/// has.

326

///

327

/// The returned cost is defined in terms of \c TargetCostConstants, see its

328

/// comments for a detailed explanation of the cost values.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

329

int getUserCost(const User *U, ArrayRef<const Value *> Operands,

330

TargetCostKind CostKind) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

331

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

332

/// This is a helper function which calls the two-argument getUserCost

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

333

/// with \p Operands which are the current operands U has.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

334

int getUserCost(const User *U, TargetCostKind CostKind) const {

335

SmallVector<const Value *, 4> Operands(U->operand_values());

336

return getUserCost(U, Operands, CostKind);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

337

}

338

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

339

/// Return true if branch divergence exists.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

340

///

341

/// Branch divergence has a significantly negative impact on GPU performance

342

/// when threads in the same wavefront take different paths due to conditional

343

/// branches.

344

bool hasBranchDivergence() const;

345

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

346

/// Return true if the target prefers to use GPU divergence analysis to

347

/// replace the legacy version.

348

bool useGPUDivergenceAnalysis() const;

349

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

350

/// Returns whether V is a source of divergence.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

351

///

352

/// This function provides the target-dependent information for

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

353

/// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis

354

/// first builds the dependency graph, and then runs the reachability

355

/// algorithm starting with the sources of divergence.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

356

bool isSourceOfDivergence(const Value *V) const;

357

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

358

// Returns true for the target specific

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

359

// set of operations which produce uniform result

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

360

// even taking non-uniform arguments

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

361

bool isAlwaysUniform(const Value *V) const;

362

363

/// Returns the address space ID for a target's 'flat' address space. Note

364

/// this is not necessarily the same as addrspace(0), which LLVM sometimes

365

/// refers to as the generic address space. The flat address space is a

366

/// generic address space that can be used access multiple segments of memory

367

/// with different address spaces. Access of a memory location through a

368

/// pointer with this address space is expected to be legal but slower

369

/// compared to the same memory location accessed through a pointer with a

370

/// different address space.

371

//

372

/// This is for targets with different pointer representations which can

373

/// be converted with the addrspacecast instruction. If a pointer is converted

374

/// to this address space, optimizations should attempt to replace the access

375

/// with the source address space.

376

///

377

/// \returns ~0u if the target does not have such a flat address space to

378

/// optimize away.

379

unsigned getFlatAddressSpace() const;

380

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

381

/// Return any intrinsic address operand indexes which may be rewritten if

382

/// they use a flat address space pointer.

383

///

384

/// \returns true if the intrinsic was handled.

385

bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,

386

Intrinsic::ID IID) const;

387

388

bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const;

389

390

unsigned getAssumedAddrSpace(const Value *V) const;

391

392

/// Rewrite intrinsic call \p II such that \p OldV will be replaced with \p

393

/// NewV, which has a different address space. This should happen for every

394

/// operand index that collectFlatAddressOperands returned for the intrinsic.

395

/// \returns nullptr if the intrinsic was not handled. Otherwise, returns the

396

/// new value (which may be the original \p II with modified operands).

397

Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,

398

Value *NewV) const;

399

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

400

/// Test whether calls to a function lower to actual program function

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

401

/// calls.

402

///

403

/// The idea is to test whether the program is likely to require a 'call'

404

/// instruction or equivalent in order to call the given function.

405

///

406

/// FIXME: It's not clear that this is a good or useful query API. Client's

407

/// should probably move to simpler cost metrics using the above.

408

/// Alternatively, we could split the cost interface into distinct code-size

409

/// and execution-speed costs. This would allow modelling the core of this

410

/// query more accurately as a call is a single small instruction, but

411

/// incurs significant execution cost.

412

bool isLoweredToCall(const Function *F) const;

413

414

struct LSRCost {

415

/// TODO: Some of these could be merged. Also, a lexical ordering

416

/// isn't always optimal.

unsigned Insns;

unsigned NumRegs;

unsigned AddRecCost;

unsigned NumIVMuls;

unsigned NumBaseAdds;

unsigned ImmCost;

unsigned SetupCost;

unsigned ScaleCost;

};

/// Parameters that control the generic loop unrolling transformation.

428

struct UnrollingPreferences {

429

/// The cost threshold for the unrolled loop. Should be relative to the

430

/// getUserCost values returned by this API, and the expectation is that

431

/// the unrolled loop's instructions when run through that interface should

432

/// not exceed this cost. However, this is only an estimate. Also, specific

433

/// loops may be unrolled even with a cost above this threshold if deemed

434

/// profitable. Set this to UINT_MAX to disable the loop body cost

435

/// restriction.

436

unsigned Threshold;

437

/// If complete unrolling will reduce the cost of the loop, we will boost

438

/// the Threshold by a certain percent to allow more aggressive complete

439

/// unrolling. This value provides the maximum boost percentage that we

440

/// can apply to Threshold (The value should be no less than 100).

441

/// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,

442

/// MaxPercentThresholdBoost / 100)

443

/// E.g. if complete unrolling reduces the loop execution time by 50%

444

/// then we boost the threshold by the factor of 2x. If unrolling is not

445

/// expected to reduce the running time, then we do not increase the

446

/// threshold.

447

unsigned MaxPercentThresholdBoost;

448

/// The cost threshold for the unrolled loop when optimizing for size (set

449

/// to UINT_MAX to disable).

450

unsigned OptSizeThreshold;

451

/// The cost threshold for the unrolled loop, like Threshold, but used

452

/// for partial/runtime unrolling (set to UINT_MAX to disable).

453

unsigned PartialThreshold;

454

/// The cost threshold for the unrolled loop when optimizing for size, like

455

/// OptSizeThreshold, but used for partial/runtime unrolling (set to

456

/// UINT_MAX to disable).

457

unsigned PartialOptSizeThreshold;

458

/// A forced unrolling factor (the number of concatenated bodies of the

459

/// original loop in the unrolled loop body). When set to 0, the unrolling

460

/// transformation will select an unrolling factor based on the current cost

461

/// threshold and other factors.

462

unsigned Count;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

463

/// Default unroll count for loops with run-time trip count.

464

unsigned DefaultUnrollRuntimeCount;

465

// Set the maximum unrolling factor. The unrolling factor may be selected

466

// using the appropriate cost threshold, but may not exceed this number

467

// (set to UINT_MAX to disable). This does not apply in cases where the

468

// loop is being fully unrolled.

469

unsigned MaxCount;

470

/// Set the maximum unrolling factor for full unrolling. Like MaxCount, but

471

/// applies even if full unrolling is selected. This allows a target to fall

472

/// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.

473

unsigned FullUnrollMaxCount;

474

// Represents number of instructions optimized when "back edge"

475

// becomes "fall through" in unrolled loop.

476

// For now we count a conditional branch on a backedge and a comparison

477

// feeding it.

478

unsigned BEInsns;

479

/// Allow partial unrolling (unrolling of loops to expand the size of the

480

/// loop body, not only to eliminate small constant-trip-count loops).

481

bool Partial;

482

/// Allow runtime unrolling (unrolling of loops to expand the size of the

483

/// loop body even when the number of loop iterations is not known at

484

/// compile time).

485

bool Runtime;

486

/// Allow generation of a loop remainder (extra iterations after unroll).

487

bool AllowRemainder;

488

/// Allow emitting expensive instructions (such as divisions) when computing

489

/// the trip count of a loop for runtime unrolling.

490

bool AllowExpensiveTripCount;

491

/// Apply loop unroll on any kind of loop

492

/// (mainly to loops that fail runtime unrolling).

493

bool Force;

494

/// Allow using trip count upper bound to unroll loops.

495

bool UpperBound;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

496

/// Allow unrolling of all the iterations of the runtime loop remainder.

497

bool UnrollRemainder;

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

498

/// Allow unroll and jam. Used to enable unroll and jam for the target.

499

bool UnrollAndJam;

500

/// Threshold for unroll and jam, for inner loop size. The 'Threshold'

501

/// value above is used during unroll and jam for the outer loop size.

502

/// This value is used in the same manner to limit the size of the inner

503

/// loop.

504

unsigned UnrollAndJamInnerLoopThreshold;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

505

/// Don't allow loop unrolling to simulate more than this number of

506

/// iterations when checking full unroll profitability

507

unsigned MaxIterationsCountToAnalyze;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

508

};

509

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

510

/// Get target-customized preferences for the generic loop unrolling

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

511

/// transformation. The caller will initialize UP with the current

512

/// target-independent defaults.

513

void getUnrollingPreferences(Loop *L, ScalarEvolution &,

514

UnrollingPreferences &UP) const;

515

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

516

/// Query the target whether it would be profitable to convert the given loop

517

/// into a hardware loop.

518

bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

519

AssumptionCache &AC, TargetLibraryInfo *LibInfo,

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

520

HardwareLoopInfo &HWLoopInfo) const;

521

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

522

/// Query the target whether it would be prefered to create a predicated

523

/// vector loop, which can avoid the need to emit a scalar epilogue loop.

524

bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,

525

AssumptionCache &AC, TargetLibraryInfo *TLI,

526

DominatorTree *DT,

527

const LoopAccessInfo *LAI) const;

528

529

/// Query the target whether lowering of the llvm.get.active.lane.mask

530

/// intrinsic is supported.

531

bool emitGetActiveLaneMask() const;

532

533

// Parameters that control the loop peeling transformation

534

struct PeelingPreferences {

535

/// A forced peeling factor (the number of bodied of the original loop

536

/// that should be peeled off before the loop body). When set to 0, the

537

/// a peeling factor based on profile information and other factors.

538

unsigned PeelCount;

539

/// Allow peeling off loop iterations.

540

bool AllowPeeling;

541

/// Allow peeling off loop iterations for loop nests.

542

bool AllowLoopNestsPeeling;

543

/// Allow peeling basing on profile. Uses to enable peeling off all

544

/// iterations basing on provided profile.

545

/// If the value is true the peeling cost model can decide to peel only

546

/// some iterations and in this case it will set this to false.

547

bool PeelProfiledIterations;

548

};

549

550

/// Get target-customized preferences for the generic loop peeling

551

/// transformation. The caller will initialize \p PP with the current

552

/// target-independent defaults with information from \p L and \p SE.

553

void getPeelingPreferences(Loop *L, ScalarEvolution &SE,

554

PeelingPreferences &PP) const;

555

556

/// Targets can implement their own combinations for target-specific

557

/// intrinsics. This function will be called from the InstCombine pass every

558

/// time a target-specific intrinsic is encountered.

559

///

560

/// \returns None to not do anything target specific or a value that will be

561

/// returned from the InstCombiner. It is possible to return null and stop

562

/// further processing of the intrinsic by returning nullptr.

563

Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,

564

IntrinsicInst &II) const;

565

/// Can be used to implement target-specific instruction combining.

566

/// \see instCombineIntrinsic

567

Optional<Value *>

568

simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,

569

APInt DemandedMask, KnownBits &Known,

570

bool &KnownBitsComputed) const;

571

/// Can be used to implement target-specific instruction combining.

572

/// \see instCombineIntrinsic

573

Optional<Value *> simplifyDemandedVectorEltsIntrinsic(

574

InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,

575

APInt &UndefElts2, APInt &UndefElts3,

576

std::function<void(Instruction *, unsigned, APInt, APInt &)>

577

SimplifyAndSetOp) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

578

/// @}

579

580

/// \name Scalar Target Information

581

/// @{

582

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

583

/// Flags indicating the kind of support for population count.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

584

///

585

/// Compared to the SW implementation, HW support is supposed to

586

/// significantly boost the performance when the population is dense, and it

587

/// may or may not degrade performance if the population is sparse. A HW

588

/// support is considered as "Fast" if it can outperform, or is on a par

589

/// with, SW implementation when the population is sparse; otherwise, it is

590

/// considered as "Slow".

591

enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };

592

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

593

/// Return true if the specified immediate is legal add immediate, that

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

594

/// is the target has add instructions which can add a register with the

595

/// immediate without having to materialize the immediate into a register.

596

bool isLegalAddImmediate(int64_t Imm) const;

597

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

598

/// Return true if the specified immediate is legal icmp immediate,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

599

/// that is the target has icmp instructions which can compare a register

600

/// against the immediate without having to materialize the immediate into a

601

/// register.

602

bool isLegalICmpImmediate(int64_t Imm) const;

603

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

604

/// Return true if the addressing mode represented by AM is legal for

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

605

/// this target, for a load/store of the specified type.

606

/// The type may be VoidTy, in which case only return true if the addressing

607

/// mode is legal for a load/store of any legal type.

608

/// If target returns true in LSRWithInstrQueries(), I may be valid.

609

/// TODO: Handle pre/postinc as well.

610

bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,

611

bool HasBaseReg, int64_t Scale,

612

unsigned AddrSpace = 0,

613

Instruction *I = nullptr) const;

614

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

615

/// Return true if LSR cost of C1 is lower than C1.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

616

bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,

617

TargetTransformInfo::LSRCost &C2) const;

618

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

619

/// Return true if LSR major cost is number of registers. Targets which

620

/// implement their own isLSRCostLess and unset number of registers as major

621

/// cost should return false, otherwise return true.

622

bool isNumRegsMajorCostOfLSR() const;

623

624

/// \returns true if LSR should not optimize a chain that includes \p I.

625

bool isProfitableLSRChainElement(Instruction *I) const;

626

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

627

/// Return true if the target can fuse a compare and branch.

628

/// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost

629

/// calculation for the instructions in a loop.

630

bool canMacroFuseCmp() const;

631

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

632

/// Return true if the target can save a compare for loop count, for example

633

/// hardware loop saves a compare.

634

bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,

635

DominatorTree *DT, AssumptionCache *AC,

636

TargetLibraryInfo *LibInfo) const;

637

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

638

/// \return True is LSR should make efforts to create/preserve post-inc

639

/// addressing mode expressions.

640

bool shouldFavorPostInc() const;

641

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

642

/// Return true if LSR should make efforts to generate indexed addressing

643

/// modes that operate across loop iterations.

644

bool shouldFavorBackedgeIndex(const Loop *L) const;

645

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

646

/// Return true if the target supports masked store.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

647

bool isLegalMaskedStore(Type *DataType, Align Alignment) const;

648

/// Return true if the target supports masked load.

649

bool isLegalMaskedLoad(Type *DataType, Align Alignment) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

650

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

651

/// Return true if the target supports nontemporal store.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

652

bool isLegalNTStore(Type *DataType, Align Alignment) const;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

653

/// Return true if the target supports nontemporal load.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

654

bool isLegalNTLoad(Type *DataType, Align Alignment) const;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

655

656

/// Return true if the target supports masked scatter.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

657

bool isLegalMaskedScatter(Type *DataType, Align Alignment) const;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

658

/// Return true if the target supports masked gather.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

659

bool isLegalMaskedGather(Type *DataType, Align Alignment) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

660

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

661

/// Return true if the target supports masked compress store.

662

bool isLegalMaskedCompressStore(Type *DataType) const;

663

/// Return true if the target supports masked expand load.

664

bool isLegalMaskedExpandLoad(Type *DataType) const;

665

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

666

/// Return true if the target has a unified operation to calculate division

667

/// and remainder. If so, the additional implicit multiplication and

668

/// subtraction required to calculate a remainder from division are free. This

669

/// can enable more aggressive transformations for division and remainder than

670

/// would typically be allowed using throughput or size cost models.

671

bool hasDivRemOp(Type *DataType, bool IsSigned) const;

672

673

/// Return true if the given instruction (assumed to be a memory access

674

/// instruction) has a volatile variant. If that's the case then we can avoid

675

/// addrspacecast to generic AS for volatile loads/stores. Default

676

/// implementation returns false, which prevents address space inference for

677

/// volatile loads/stores.

678

bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;

679

680

/// Return true if target doesn't mind addresses in vectors.

681

bool prefersVectorizedAddressing() const;

682

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

683

/// Return the cost of the scaling factor used in the addressing

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

684

/// mode represented by AM for this target, for a load/store

685

/// of the specified type.

686

/// If the AM is supported, the return value must be >= 0.

687

/// If the AM is not supported, it returns a negative value.

688

/// TODO: Handle pre/postinc as well.

689

int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,

690

bool HasBaseReg, int64_t Scale,

691

unsigned AddrSpace = 0) const;

692

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

693

/// Return true if the loop strength reduce pass should make

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

694

/// Instruction* based TTI queries to isLegalAddressingMode(). This is

695

/// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned

696

/// immediate offset and no index register.

697

bool LSRWithInstrQueries() const;

698

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

699

/// Return true if it's free to truncate a value of type Ty1 to type

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

700

/// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16

701

/// by referencing its sub-register AX.

702

bool isTruncateFree(Type *Ty1, Type *Ty2) const;

703

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

704

/// Return true if it is profitable to hoist instruction in the

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

705

/// then/else to before if.

706

bool isProfitableToHoist(Instruction *I) const;

bool useAA() const;

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

710

/// Return true if this type is legal.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

711

bool isTypeLegal(Type *Ty) const;

712

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

713

/// Returns the estimated number of registers required to represent \p Ty.

714

unsigned getRegUsageForType(Type *Ty) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

715

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

716

/// Return true if switches should be turned into lookup tables for the

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

717

/// target.

718

bool shouldBuildLookupTables() const;

719

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

720

/// Return true if switches should be turned into lookup tables

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

721

/// containing this constant value for the target.

722

bool shouldBuildLookupTablesForConstant(Constant *C) const;

723

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

724

/// Return true if the input function which is cold at all call sites,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

725

/// should use coldcc calling convention.

726

bool useColdCCForColdCall(Function &F) const;

727

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

728

/// Estimate the overhead of scalarizing an instruction. Insert and Extract

729

/// are set if the demanded result elements need to be inserted and/or

730

/// extracted from vectors.

731

unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,

732

bool Insert, bool Extract) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

733

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

734

/// Estimate the overhead of scalarizing an instructions unique

735

/// non-constant operands. The types of the arguments are ordinarily

736

/// scalar, in which case the costs are multiplied with VF.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

737

unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,

738

unsigned VF) const;

739

740

/// If target has efficient vector element load/store instructions, it can

741

/// return true here so that insertion/extraction costs are not added to

742

/// the scalarization cost of a load/store.

743

bool supportsEfficientVectorElementLoadStore() const;

744

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

745

/// Don't restrict interleaved unrolling to small loops.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

746

bool enableAggressiveInterleaving(bool LoopHasReductions) const;

747

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

748

/// Returns options for expansion of memcmp. IsZeroCmp is

749

// true if this is the expansion of memcmp(p1, p2, s) == 0.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

750

struct MemCmpExpansionOptions {

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

751

// Return true if memcmp expansion is enabled.

752

operator bool() const { return MaxNumLoads > 0; }

753

754

// Maximum number of load operations.

755

unsigned MaxNumLoads = 0;

756

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

757

// The list of available load sizes (in bytes), sorted in decreasing order.

758

SmallVector<unsigned, 8> LoadSizes;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

759

760

// For memcmp expansion when the memcmp result is only compared equal or

761

// not-equal to 0, allow up to this number of load pairs per block. As an

762

// example, this may allow 'memcmp(a, b, 3) == 0' in a single block:

763

// a0 = load2bytes &a[0]

764

// b0 = load2bytes &b[0]

765

// a2 = load1byte &a[2]

766

// b2 = load1byte &b[2]

767

// r = cmp eq (a0 ^ b0 | a2 ^ b2), 0

768

unsigned NumLoadsPerBlock = 1;

769

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

770

// Set to true to allow overlapping loads. For example, 7-byte compares can

771

// be done with two 4-byte compares instead of 4+2+1-byte compares. This

772

// requires all loads in LoadSizes to be doable in an unaligned way.

773

bool AllowOverlappingLoads = false;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

774

};

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

775

MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,

776

bool IsZeroCmp) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

777

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

778

/// Enable matching of interleaved access groups.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

779

bool enableInterleavedAccessVectorization() const;

780

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

781

/// Enable matching of interleaved access groups that contain predicated

782

/// accesses or gaps and therefore vectorized using masked

783

/// vector loads/stores.

784

bool enableMaskedInterleavedAccessVectorization() const;

785

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

786

/// Indicate that it is potentially unsafe to automatically vectorize

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

787

/// floating-point operations because the semantics of vector and scalar

788

/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math

789

/// does not support IEEE-754 denormal numbers, while depending on the

790

/// platform, scalar floating-point math does.

791

/// This applies to floating-point math operations and calls, not memory

792

/// operations, shuffles, or casts.

793

bool isFPVectorizationPotentiallyUnsafe() const;

794

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

795

/// Determine if the target supports unaligned memory accesses.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

796

bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,

797

unsigned AddressSpace = 0,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

798

unsigned Alignment = 1,

799

bool *Fast = nullptr) const;

800

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

801

/// Return hardware support for population count.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

802

PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;

803

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

804

/// Return true if the hardware has a fast square-root instruction.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

805

bool haveFastSqrt(Type *Ty) const;

806

807

/// Return true if it is faster to check if a floating-point value is NaN

808

/// (or not-NaN) versus a comparison against a constant FP zero value.

809

/// Targets should override this if materializing a 0.0 for comparison is

810

/// generally as cheap as checking for ordered/unordered.

811

bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;

812

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

813

/// Return the expected cost of supporting the floating point operation

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

814

/// of the specified type.

815

int getFPOpCost(Type *Ty) const;

816

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

817

/// Return the expected cost of materializing for the given integer

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

818

/// immediate of the specified type.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

819

int getIntImmCost(const APInt &Imm, Type *Ty, TargetCostKind CostKind) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

820

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

821

/// Return the expected cost of materialization for the given integer

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

822

/// immediate of the specified type for a given instruction. The cost can be

823

/// zero if the immediate can be folded into the specified instruction.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

824

int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,

825

TargetCostKind CostKind,

826

Instruction *Inst = nullptr) const;

827

int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,

828

Type *Ty, TargetCostKind CostKind) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

829

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

830

/// Return the expected cost for the given integer when optimising

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

831

/// for size. This is different than the other integer immediate cost

832

/// functions in that it is subtarget agnostic. This is useful when you e.g.

833

/// target one ISA such as Aarch32 but smaller encodings could be possible

834

/// with another such as Thumb. This return value is used as a penalty when

835

/// the total costs for a constant is calculated (the bigger the cost, the

836

/// more beneficial constant hoisting is).

837

int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,

Type *Ty) const;

/// @}

/// \name Vector Target Information

842

/// @{

843

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

844

/// The various kinds of shuffle patterns for vector queries.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

845

enum ShuffleKind {

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

846

SK_Broadcast, ///< Broadcast element 0 to all other elements.

847

SK_Reverse, ///< Reverse the order of the vector.

848

SK_Select, ///< Selects elements from the corresponding lane of

849

///< either source operand. This is equivalent to a

850

///< vector select with a constant condition operand.

851

SK_Transpose, ///< Transpose two vectors.

852

SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.

853

SK_ExtractSubvector, ///< ExtractSubvector Index indicates start offset.

854

SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one

855

///< with any shuffle mask.

856

SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any

857

///< shuffle mask.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

858

};

859

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

860

/// Kind of the reduction data.

861

enum ReductionKind {

862

RK_None, /// Not a reduction.

863

RK_Arithmetic, /// Binary reduction data.

864

RK_MinMax, /// Min/max reduction data.

865

RK_UnsignedMinMax, /// Unsigned min/max reduction data.

866

};

867

868

/// Contains opcode + LHS/RHS parts of the reduction operations.

869

struct ReductionData {

870

ReductionData() = delete;

871

ReductionData(ReductionKind Kind, unsigned Opcode, Value *LHS, Value *RHS)

872

: Opcode(Opcode), LHS(LHS), RHS(RHS), Kind(Kind) {

873

assert(Kind != RK_None && "expected binary or min/max reduction only.");

874

}

875

unsigned Opcode = 0;

876

Value *LHS = nullptr;

877

Value *RHS = nullptr;

878

ReductionKind Kind = RK_None;

879

bool hasSameData(ReductionData &RD) const {

880

return Kind == RD.Kind && Opcode == RD.Opcode;

}

};

static ReductionKind matchPairwiseReduction(

885

const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);

886

887

static ReductionKind matchVectorSplittingReduction(

888

const ExtractElementInst *ReduxRoot, unsigned &Opcode, VectorType *&Ty);

889

890

static ReductionKind matchVectorReduction(const ExtractElementInst *ReduxRoot,

891

unsigned &Opcode, VectorType *&Ty,

892

bool &IsPairwise);

893

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

894

/// Additional information about an operand's possible values.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

895

enum OperandValueKind {

896

OK_AnyValue, // Operand can have any value.

897

OK_UniformValue, // Operand is uniform (splat of a value).

898

OK_UniformConstantValue, // Operand is uniform constant.

899

OK_NonUniformConstantValue // Operand is a non uniform constant value.

900

};

901

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

902

/// Additional properties of an operand's values.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

903

enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };

904

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

905

/// \return the number of registers in the target-provided register class.

906

unsigned getNumberOfRegisters(unsigned ClassID) const;

907

908

/// \return the target-provided register class ID for the provided type,

909

/// accounting for type promotion and other type-legalization techniques that

910

/// the target might apply. However, it specifically does not account for the

911

/// scalarization or splitting of vector types. Should a vector type require

912

/// scalarization or splitting into multiple underlying vector registers, that

913

/// type should be mapped to a register class containing no registers.

914

/// Specifically, this is designed to provide a simple, high-level view of the

915

/// register allocation later performed by the backend. These register classes

916

/// don't necessarily map onto the register classes used by the backend.

917

/// FIXME: It's not currently possible to determine how many registers

918

/// are used by the provided type.

919

unsigned getRegisterClassForType(bool Vector, Type *Ty = nullptr) const;

920

921

/// \return the target-provided register class name

922

const char *getRegisterClassName(unsigned ClassID) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

923

924

/// \return The width of the largest scalar or vector register type.

925

unsigned getRegisterBitWidth(bool Vector) const;

926

927

/// \return The width of the smallest vector register type.

928

unsigned getMinVectorRegisterBitWidth() const;

929

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

930

/// \return The maximum value of vscale if the target specifies an

931

/// architectural maximum vector length, and None otherwise.

932

Optional<unsigned> getMaxVScale() const;

933

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

934

/// \return True if the vectorization factor should be chosen to

935

/// make the vector of the smallest element type match the size of a

936

/// vector register. For wider element types, this could result in

937

/// creating vectors that span multiple vector registers.

938

/// If false, the vectorization factor will be chosen based on the

939

/// size of the widest element type.

940

bool shouldMaximizeVectorBandwidth(bool OptSize) const;

941

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

942

/// \return The minimum vectorization factor for types of given element

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

943

/// bit width, or 0 if there is no minimum VF. The returned value only

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

944

/// applies when shouldMaximizeVectorBandwidth returns true.

945

unsigned getMinimumVF(unsigned ElemWidth) const;

946

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

947

/// \return The maximum vectorization factor for types of given element

948

/// bit width and opcode, or 0 if there is no maximum VF.

949

/// Currently only used by the SLP vectorizer.

950

unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const;

951

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

952

/// \return True if it should be considered for address type promotion.

953

/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is

954

/// profitable without finding other extensions fed by the same input.

955

bool shouldConsiderAddressTypePromotion(

956

const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;

957

958

/// \return The size of a cache line in bytes.

959

unsigned getCacheLineSize() const;

960

961

/// The possible cache levels

962

enum class CacheLevel {

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

963

L1D, // The L1 data cache

964

L2D, // The L2 data cache

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

965

966

// We currently do not model L3 caches, as their sizes differ widely between

967

// microarchitectures. Also, we currently do not have a use for L3 cache

968

// size modeling yet.

969

};

970

971

/// \return The size of the cache level in bytes, if available.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

972

Optional<unsigned> getCacheSize(CacheLevel Level) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

973

974

/// \return The associativity of the cache level, if available.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

975

Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

976

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

977

/// \return How much before a load we should place the prefetch

978

/// instruction. This is currently measured in number of

979

/// instructions.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

980

unsigned getPrefetchDistance() const;

981

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

982

/// Some HW prefetchers can handle accesses up to a certain constant stride.

983

/// Sometimes prefetching is beneficial even below the HW prefetcher limit,

984

/// and the arguments provided are meant to serve as a basis for deciding this

985

/// for a particular loop.

986

///

987

/// \param NumMemAccesses Number of memory accesses in the loop.

988

/// \param NumStridedMemAccesses Number of the memory accesses that

989

/// ScalarEvolution could find a known stride

990

/// for.

991

/// \param NumPrefetches Number of software prefetches that will be

992

/// emitted as determined by the addresses

993

/// involved and the cache line size.

994

/// \param HasCall True if the loop contains a call.

995

///

996

/// \return This is the minimum stride in bytes where it makes sense to start

997

/// adding SW prefetches. The default is 1, i.e. prefetch with any

998

/// stride.

999

unsigned getMinPrefetchStride(unsigned NumMemAccesses,

1000

unsigned NumStridedMemAccesses,

1001

unsigned NumPrefetches, bool HasCall) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1002

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1003

/// \return The maximum number of iterations to prefetch ahead. If

1004

/// the required number of iterations is more than this number, no

1005

/// prefetching is performed.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1006

unsigned getMaxPrefetchIterationsAhead() const;

1007

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1008

/// \return True if prefetching should also be done for writes.

1009

bool enableWritePrefetching() const;

1010

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1011

/// \return The maximum interleave factor that any transform should try to

1012

/// perform for this target. This number depends on the level of parallelism

1013

/// and the number of execution units in the CPU.

1014

unsigned getMaxInterleaveFactor(unsigned VF) const;

1015

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1016

/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1017

static OperandValueKind getOperandInfo(const Value *V,

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1018

OperandValueProperties &OpProps);

Andrew Scull

0372a57

2018-11-16 15:47:06 +0000

[diff] [blame]

1019

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1020

/// This is an approximation of reciprocal throughput of a math/logic op.

1021

/// A higher cost indicates less expected throughput.

1022

/// From Agner Fog's guides, reciprocal throughput is "the average number of

1023

/// clock cycles per instruction when the instructions are not part of a

1024

/// limiting dependency chain."

1025

/// Therefore, costs should be scaled to account for multiple execution units

1026

/// on the target that can process this type of instruction. For example, if

1027

/// there are 5 scalar integer units and 2 vector integer units that can

1028

/// calculate an 'add' in a single cycle, this model should indicate that the

1029

/// cost of the vector add instruction is 2.5 times the cost of the scalar

1030

/// add instruction.

1031

/// \p Args is an optional argument which holds the instruction operands

1032

/// values so the TTI can analyze those values searching for special

1033

/// cases or optimizations based on those values.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1034

/// \p CxtI is the optional original context instruction, if one exists, to

1035

/// provide even more information.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1036

int getArithmeticInstrCost(

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1037

unsigned Opcode, Type *Ty,

1038

TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,

1039

OperandValueKind Opd1Info = OK_AnyValue,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1040

OperandValueKind Opd2Info = OK_AnyValue,

1041

OperandValueProperties Opd1PropInfo = OP_None,

1042

OperandValueProperties Opd2PropInfo = OP_None,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1043

ArrayRef<const Value *> Args = ArrayRef<const Value *>(),

1044

const Instruction *CxtI = nullptr) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1045

1046

/// \return The cost of a shuffle instruction of kind Kind and of type Tp.

1047

/// The index and subtype parameters are used by the subvector insertion and

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1048

/// extraction shuffle kinds to show the insert/extract point and the type of

1049

/// the subvector being inserted/extracted.

1050

/// NOTE: For subvector extractions Tp represents the source type.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1051

int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index = 0,

1052

VectorType *SubTp = nullptr) const;

1053

1054

/// Represents a hint about the context in which a cast is used.

1055

///

1056

/// For zext/sext, the context of the cast is the operand, which must be a

1057

/// load of some kind. For trunc, the context is of the cast is the single

1058

/// user of the instruction, which must be a store of some kind.

1059

///

1060

/// This enum allows the vectorizer to give getCastInstrCost an idea of the

1061

/// type of cast it's dealing with, as not every cast is equal. For instance,

1062

/// the zext of a load may be free, but the zext of an interleaving load can

1063

//// be (very) expensive!

1064

///

1065

/// See \c getCastContextHint to compute a CastContextHint from a cast

1066

/// Instruction*. Callers can use it if they don't need to override the

1067

/// context and just want it to be calculated from the instruction.

1068

///

1069

/// FIXME: This handles the types of load/store that the vectorizer can

1070

/// produce, which are the cases where the context instruction is most

1071

/// likely to be incorrect. There are other situations where that can happen

1072

/// too, which might be handled here but in the long run a more general

1073

/// solution of costing multiple instructions at the same times may be better.

1074

enum class CastContextHint : uint8_t {

1075

None, ///< The cast is not used with a load/store of any kind.

1076

Normal, ///< The cast is used with a normal load/store.

1077

Masked, ///< The cast is used with a masked load/store.

1078

GatherScatter, ///< The cast is used with a gather/scatter.

1079

Interleave, ///< The cast is used with an interleaved load/store.

1080

Reversed, ///< The cast is used with a reversed load/store.

1081

};

1082

1083

/// Calculates a CastContextHint from \p I.

1084

/// This should be used by callers of getCastInstrCost if they wish to

1085

/// determine the context from some instruction.

1086

/// \returns the CastContextHint for ZExt/SExt/Trunc, None if \p I is nullptr,

1087

/// or if it's another type of cast.

1088

static CastContextHint getCastContextHint(const Instruction *I);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1089

1090

/// \return The expected cost of cast instructions, such as bitcast, trunc,

1091

/// zext, etc. If there is an existing instruction that holds Opcode, it

1092

/// may be passed in the 'I' parameter.

1093

int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1094

TTI::CastContextHint CCH,

1095

TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1096

const Instruction *I = nullptr) const;

1097

1098

/// \return The expected cost of a sign- or zero-extended vector extract. Use

1099

/// -1 to indicate that there is no information about the index value.

1100

int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,

1101

unsigned Index = -1) const;

1102

1103

/// \return The expected cost of control-flow related instructions such as

1104

/// Phi, Ret, Br.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1105

int getCFInstrCost(unsigned Opcode,

1106

TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1107

1108

/// \returns The expected cost of compare and select instructions. If there

1109

/// is an existing instruction that holds Opcode, it may be passed in the

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1110

/// 'I' parameter. The \p VecPred parameter can be used to indicate the select

1111

/// is using a compare with the specified predicate as condition. When vector

1112

/// types are passed, \p VecPred must be used for all lanes.

1113

int getCmpSelInstrCost(

1114

unsigned Opcode, Type *ValTy, Type *CondTy = nullptr,

1115

CmpInst::Predicate VecPred = CmpInst::BAD_ICMP_PREDICATE,

1116

TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,

1117

const Instruction *I = nullptr) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1118

1119

/// \return The expected cost of vector Insert and Extract.

1120

/// Use -1 to indicate that there is no information on the index value.

1121

int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;

1122

1123

/// \return The cost of Load and Store instructions.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1124

int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,

1125

unsigned AddressSpace,

1126

TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,

1127

const Instruction *I = nullptr) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1128

1129

/// \return The cost of masked Load and Store instructions.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1130

int getMaskedMemoryOpCost(

1131

unsigned Opcode, Type *Src, Align Alignment, unsigned AddressSpace,

1132

TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1133

1134

/// \return The cost of Gather or Scatter operation

1135

/// \p Opcode - is a type of memory access Load or Store

1136

/// \p DataTy - a vector type of the data to be loaded or stored

1137

/// \p Ptr - pointer [or vector of pointers] - address[es] in memory

1138

/// \p VariableMask - true when the memory access is predicated with a mask

1139

/// that is not a compile-time constant

1140

/// \p Alignment - alignment of single element

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1141

/// \p I - the optional original context instruction, if one exists, e.g. the

1142

/// load/store to transform or the call to the gather/scatter intrinsic

1143

int getGatherScatterOpCost(

1144

unsigned Opcode, Type *DataTy, const Value *Ptr, bool VariableMask,

1145

Align Alignment, TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,

1146

const Instruction *I = nullptr) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1147

1148

/// \return The cost of the interleaved memory operation.

1149

/// \p Opcode is the memory operation code

1150

/// \p VecTy is the vector type of the interleaved access.

1151

/// \p Factor is the interleave factor

1152

/// \p Indices is the indices for interleaved load members (as interleaved

1153

/// load allows gaps)

1154

/// \p Alignment is the alignment of the memory operation

1155

/// \p AddressSpace is address space of the pointer.

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1156

/// \p UseMaskForCond indicates if the memory access is predicated.

1157

/// \p UseMaskForGaps indicates if gaps should be masked.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1158

int getInterleavedMemoryOpCost(

1159

unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,

1160

Align Alignment, unsigned AddressSpace,

1161

TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput,

1162

bool UseMaskForCond = false, bool UseMaskForGaps = false) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1163

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1164

/// Calculate the cost of performing a vector reduction.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1165

///

1166

/// This is the cost of reducing the vector value of type \p Ty to a scalar

1167

/// value using the operation denoted by \p Opcode. The form of the reduction

1168

/// can either be a pairwise reduction or a reduction that splits the vector

1169

/// at every reduction level.

///

/// Pairwise:

/// (v0, v1, v2, v3)

/// ((v0+v1), (v2+v3), undef, undef)

1174

/// Split:

1175

/// (v0, v1, v2, v3)

1176

/// ((v0+v2), (v1+v3), undef, undef)

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1177

int getArithmeticReductionCost(

1178

unsigned Opcode, VectorType *Ty, bool IsPairwiseForm,

1179

TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;

1180

1181

int getMinMaxReductionCost(

1182

VectorType *Ty, VectorType *CondTy, bool IsPairwiseForm, bool IsUnsigned,

1183

TTI::TargetCostKind CostKind = TTI::TCK_RecipThroughput) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1184

1185

/// \returns The cost of Intrinsic instructions. Analyses the real arguments.

1186

/// Three cases are handled: 1. scalar instruction 2. vector instruction

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1187

/// 3. scalar instruction which is to be vectorized.

1188

int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

1189

TTI::TargetCostKind CostKind) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1190

1191

/// \returns The cost of Call instructions.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1192

int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys,

1193

TTI::TargetCostKind CostKind = TTI::TCK_SizeAndLatency) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1194

1195

/// \returns The number of pieces into which the provided type must be

1196

/// split during legalization. Zero is returned when the answer is unknown.

1197

unsigned getNumberOfParts(Type *Tp) const;

1198

1199

/// \returns The cost of the address computation. For most targets this can be

1200

/// merged into the instruction indexing mode. Some targets might want to

1201

/// distinguish between address computation for memory operations on vector

1202

/// types and scalar types. Such targets should override this function.

1203

/// The 'SE' parameter holds pointer for the scalar evolution object which

1204

/// is used in order to get the Ptr step value in case of constant stride.

1205

/// The 'Ptr' parameter holds SCEV of the access pointer.

1206

int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,

1207

const SCEV *Ptr = nullptr) const;

1208

1209

/// \returns The cost, if any, of keeping values of the given types alive

1210

/// over a callsite.

1211

///

1212

/// Some types may require the use of register classes that do not have

1213

/// any callee-saved registers, so would require a spill and fill.

1214

unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;

1215

1216

/// \returns True if the intrinsic is a supported memory intrinsic. Info

1217

/// will contain additional information - whether the intrinsic may write

1218

/// or read to memory, volatility and the pointer. Info is undefined

1219

/// if false is returned.

1220

bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;

1221

1222

/// \returns The maximum element size, in bytes, for an element

1223

/// unordered-atomic memory intrinsic.

1224

unsigned getAtomicMemIntrinsicMaxElementSize() const;

1225

1226

/// \returns A value which is the result of the given memory intrinsic. New

1227

/// instructions may be created to extract the result from the given intrinsic

1228

/// memory operation. Returns nullptr if the target cannot create a result

1229

/// from the given intrinsic.

1230

Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,

1231

Type *ExpectedType) const;

1232

1233

/// \returns The type to use in a loop expansion of a memcpy call.

1234

Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1235

unsigned SrcAddrSpace, unsigned DestAddrSpace,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1236

unsigned SrcAlign, unsigned DestAlign) const;

1237

1238

/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.

1239

/// \param RemainingBytes The number of bytes to copy.

1240

///

1241

/// Calculates the operand types to use when copying \p RemainingBytes of

1242

/// memory, where source and destination alignments are \p SrcAlign and

1243

/// \p DestAlign respectively.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1244

void getMemcpyLoopResidualLoweringType(

1245

SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,

1246

unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,

1247

unsigned SrcAlign, unsigned DestAlign) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1248

1249

/// \returns True if the two functions have compatible attributes for inlining

1250

/// purposes.

1251

bool areInlineCompatible(const Function *Caller,

1252

const Function *Callee) const;

1253

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1254

/// \returns True if the caller and callee agree on how \p Args will be passed

1255

/// to the callee.

1256

/// \param[out] Args The list of compatible arguments. The implementation may

1257

/// filter out any incompatible args from this list.

1258

bool areFunctionArgsABICompatible(const Function *Caller,

1259

const Function *Callee,

1260

SmallPtrSetImpl<Argument *> &Args) const;

1261

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1262

/// The type of load/store indexing.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1263

enum MemIndexedMode {

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1264

MIM_Unindexed, ///< No indexing.

1265

MIM_PreInc, ///< Pre-incrementing.

1266

MIM_PreDec, ///< Pre-decrementing.

1267

MIM_PostInc, ///< Post-incrementing.

1268

MIM_PostDec ///< Post-decrementing.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1269

};

1270

1271

/// \returns True if the specified indexed load for the given type is legal.

1272

bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;

1273

1274

/// \returns True if the specified indexed store for the given type is legal.

1275

bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;

1276

1277

/// \returns The bitwidth of the largest vector type that should be used to

1278

/// load/store in the given address space.

1279

unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;

1280

1281

/// \returns True if the load instruction is legal to vectorize.

1282

bool isLegalToVectorizeLoad(LoadInst *LI) const;

1283

1284

/// \returns True if the store instruction is legal to vectorize.

1285

bool isLegalToVectorizeStore(StoreInst *SI) const;

1286

1287

/// \returns True if it is legal to vectorize the given load chain.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1288

bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1289

unsigned AddrSpace) const;

1290

1291

/// \returns True if it is legal to vectorize the given store chain.

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1292

bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1293

unsigned AddrSpace) const;

1294

1295

/// \returns The new vector factor value if the target doesn't support \p

1296

/// SizeInBytes loads or has a better vector factor.

1297

unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,

1298

unsigned ChainSizeInBytes,

1299

VectorType *VecTy) const;

1300

1301

/// \returns The new vector factor value if the target doesn't support \p

1302

/// SizeInBytes stores or has a better vector factor.

1303

unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,

1304

unsigned ChainSizeInBytes,

1305

VectorType *VecTy) const;

1306

1307

/// Flags describing the kind of vector reduction.

1308

struct ReductionFlags {

1309

ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}

1310

bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.

1311

bool IsSigned; ///< Whether the operation is a signed int reduction.

1312

bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.

1313

};

1314

1315

/// \returns True if the target wants to handle the given reduction idiom in

1316

/// the intrinsics form instead of the shuffle form.

1317

bool useReductionIntrinsic(unsigned Opcode, Type *Ty,

1318

ReductionFlags Flags) const;

1319

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1320

/// \returns True if the target prefers reductions in loop.

1321

bool preferInLoopReduction(unsigned Opcode, Type *Ty,

1322

ReductionFlags Flags) const;

1323

1324

/// \returns True if the target prefers reductions select kept in the loop

1325

/// when tail folding. i.e.

/// loop:

/// p = phi (0, s)

/// a = add (p, x)

/// s = select (mask, a, p)

1330

/// vecreduce.add(s)

1331

///

1332

/// As opposed to the normal scheme of p = phi (0, a) which allows the select

1333

/// to be pulled out of the loop. If the select(.., add, ..) can be predicated

1334

/// by the target, this can lead to cleaner code generation.

1335

bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,

1336

ReductionFlags Flags) const;

1337

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1338

/// \returns True if the target wants to expand the given reduction intrinsic

1339

/// into a shuffle sequence.

1340

bool shouldExpandReduction(const IntrinsicInst *II) const;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1341

1342

/// \returns the size cost of rematerializing a GlobalValue address relative

1343

/// to a stack reload.

1344

unsigned getGISelRematGlobalCost() const;

1345

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1346

/// \returns True if the target supports scalable vectors.

1347

bool supportsScalableVectors() const;

1348

1349

/// \name Vector Predication Information

1350

/// @{

1351

/// Whether the target supports the %evl parameter of VP intrinsic efficiently

1352

/// in hardware. (see LLVM Language Reference - "Vector Predication

1353

/// Intrinsics") Use of %evl is discouraged when that is not the case.

1354

bool hasActiveVectorLength() const;

/// @}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1358

/// @}

1359

1360

private:

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1361

/// Estimate the latency of specified instruction.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1362

/// Returns 1 as the default value.

1363

int getInstructionLatency(const Instruction *I) const;

1364

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1365

/// Returns the expected throughput cost of the instruction.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1366

/// Returns -1 if the cost is unknown.

1367

int getInstructionThroughput(const Instruction *I) const;

1368

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1369

/// The abstract base class used to type erase specific TTI

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

/// implementations.

class Concept;

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1373

/// The template model for the base class which wraps a concrete

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1374

/// implementation in a type erased interface.

1375

template <typename T> class Model;

1376

1377

std::unique_ptr<Concept> TTIImpl;

1378

};

1379

1380

class TargetTransformInfo::Concept {

1381

public:

1382

virtual ~Concept() = 0;

1383

virtual const DataLayout &getDataLayout() const = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1384

virtual int getGEPCost(Type *PointeeType, const Value *Ptr,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1385

ArrayRef<const Value *> Operands,

1386

TTI::TargetCostKind CostKind) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1387

virtual unsigned getInliningThresholdMultiplier() = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1388

virtual int getInlinerVectorBonusPercent() = 0;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1389

virtual int getMemcpyCost(const Instruction *I) = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1390

virtual unsigned

1391

getEstimatedNumberOfCaseClusters(const SwitchInst &SI, unsigned &JTSize,

1392

ProfileSummaryInfo *PSI,

1393

BlockFrequencyInfo *BFI) = 0;

1394

virtual int getUserCost(const User *U, ArrayRef<const Value *> Operands,

1395

TargetCostKind CostKind) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1396

virtual bool hasBranchDivergence() = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1397

virtual bool useGPUDivergenceAnalysis() = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1398

virtual bool isSourceOfDivergence(const Value *V) = 0;

1399

virtual bool isAlwaysUniform(const Value *V) = 0;

1400

virtual unsigned getFlatAddressSpace() = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1401

virtual bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,

1402

Intrinsic::ID IID) const = 0;

1403

virtual bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const = 0;

1404

virtual unsigned getAssumedAddrSpace(const Value *V) const = 0;

1405

virtual Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II,

1406

Value *OldV,

1407

Value *NewV) const = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1408

virtual bool isLoweredToCall(const Function *F) = 0;

1409

virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,

1410

UnrollingPreferences &UP) = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1411

virtual void getPeelingPreferences(Loop *L, ScalarEvolution &SE,

1412

PeelingPreferences &PP) = 0;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1413

virtual bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,

1414

AssumptionCache &AC,

1415

TargetLibraryInfo *LibInfo,

1416

HardwareLoopInfo &HWLoopInfo) = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1417

virtual bool

1418

preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,

1419

AssumptionCache &AC, TargetLibraryInfo *TLI,

1420

DominatorTree *DT, const LoopAccessInfo *LAI) = 0;

1421

virtual bool emitGetActiveLaneMask() = 0;

1422

virtual Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,

1423

IntrinsicInst &II) = 0;

1424

virtual Optional<Value *>

1425

simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,

1426

APInt DemandedMask, KnownBits &Known,

1427

bool &KnownBitsComputed) = 0;

1428

virtual Optional<Value *> simplifyDemandedVectorEltsIntrinsic(

1429

InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,

1430

APInt &UndefElts2, APInt &UndefElts3,

1431

std::function<void(Instruction *, unsigned, APInt, APInt &)>

1432

SimplifyAndSetOp) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1433

virtual bool isLegalAddImmediate(int64_t Imm) = 0;

1434

virtual bool isLegalICmpImmediate(int64_t Imm) = 0;

1435

virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,

1436

int64_t BaseOffset, bool HasBaseReg,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1437

int64_t Scale, unsigned AddrSpace,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1438

Instruction *I) = 0;

1439

virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,

1440

TargetTransformInfo::LSRCost &C2) = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1441

virtual bool isNumRegsMajorCostOfLSR() = 0;

1442

virtual bool isProfitableLSRChainElement(Instruction *I) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1443

virtual bool canMacroFuseCmp() = 0;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1444

virtual bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE,

1445

LoopInfo *LI, DominatorTree *DT, AssumptionCache *AC,

1446

TargetLibraryInfo *LibInfo) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1447

virtual bool shouldFavorPostInc() const = 0;

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1448

virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1449

virtual bool isLegalMaskedStore(Type *DataType, Align Alignment) = 0;

1450

virtual bool isLegalMaskedLoad(Type *DataType, Align Alignment) = 0;

1451

virtual bool isLegalNTStore(Type *DataType, Align Alignment) = 0;

1452

virtual bool isLegalNTLoad(Type *DataType, Align Alignment) = 0;

1453

virtual bool isLegalMaskedScatter(Type *DataType, Align Alignment) = 0;

1454

virtual bool isLegalMaskedGather(Type *DataType, Align Alignment) = 0;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1455

virtual bool isLegalMaskedCompressStore(Type *DataType) = 0;

1456

virtual bool isLegalMaskedExpandLoad(Type *DataType) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1457

virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;

1458

virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;

1459

virtual bool prefersVectorizedAddressing() = 0;

1460

virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,

1461

int64_t BaseOffset, bool HasBaseReg,

1462

int64_t Scale, unsigned AddrSpace) = 0;

1463

virtual bool LSRWithInstrQueries() = 0;

1464

virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;

1465

virtual bool isProfitableToHoist(Instruction *I) = 0;

1466

virtual bool useAA() = 0;

1467

virtual bool isTypeLegal(Type *Ty) = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1468

virtual unsigned getRegUsageForType(Type *Ty) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1469

virtual bool shouldBuildLookupTables() = 0;

1470

virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;

1471

virtual bool useColdCCForColdCall(Function &F) = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1472

virtual unsigned getScalarizationOverhead(VectorType *Ty,

1473

const APInt &DemandedElts,

1474

bool Insert, bool Extract) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1475

virtual unsigned

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1476

getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,

1477

unsigned VF) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1478

virtual bool supportsEfficientVectorElementLoadStore() = 0;

1479

virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1480

virtual MemCmpExpansionOptions

1481

enableMemCmpExpansion(bool OptSize, bool IsZeroCmp) const = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1482

virtual bool enableInterleavedAccessVectorization() = 0;

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1483

virtual bool enableMaskedInterleavedAccessVectorization() = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1484

virtual bool isFPVectorizationPotentiallyUnsafe() = 0;

1485

virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,

1486

unsigned BitWidth,

1487

unsigned AddressSpace,

1488

unsigned Alignment,

1489

bool *Fast) = 0;

1490

virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;

1491

virtual bool haveFastSqrt(Type *Ty) = 0;

1492

virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;

1493

virtual int getFPOpCost(Type *Ty) = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1494

virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx,

1495

const APInt &Imm, Type *Ty) = 0;

1496

virtual int getIntImmCost(const APInt &Imm, Type *Ty,

1497

TargetCostKind CostKind) = 0;

1498

virtual int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm,

1499

Type *Ty, TargetCostKind CostKind,

1500

Instruction *Inst = nullptr) = 0;

1501

virtual int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx,

1502

const APInt &Imm, Type *Ty,

1503

TargetCostKind CostKind) = 0;

1504

virtual unsigned getNumberOfRegisters(unsigned ClassID) const = 0;

1505

virtual unsigned getRegisterClassForType(bool Vector,

1506

Type *Ty = nullptr) const = 0;

1507

virtual const char *getRegisterClassName(unsigned ClassID) const = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1508

virtual unsigned getRegisterBitWidth(bool Vector) const = 0;

1509

virtual unsigned getMinVectorRegisterBitWidth() = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1510

virtual Optional<unsigned> getMaxVScale() const = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1511

virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1512

virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1513

virtual unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1514

virtual bool shouldConsiderAddressTypePromotion(

1515

const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1516

virtual unsigned getCacheLineSize() const = 0;

1517

virtual Optional<unsigned> getCacheSize(CacheLevel Level) const = 0;

1518

virtual Optional<unsigned> getCacheAssociativity(CacheLevel Level) const = 0;

1519

1520

/// \return How much before a load we should place the prefetch

1521

/// instruction. This is currently measured in number of

1522

/// instructions.

1523

virtual unsigned getPrefetchDistance() const = 0;

1524

1525

/// \return Some HW prefetchers can handle accesses up to a certain

1526

/// constant stride. This is the minimum stride in bytes where it

1527

/// makes sense to start adding SW prefetches. The default is 1,

1528

/// i.e. prefetch with any stride. Sometimes prefetching is beneficial

1529

/// even below the HW prefetcher limit, and the arguments provided are

1530

/// meant to serve as a basis for deciding this for a particular loop.

1531

virtual unsigned getMinPrefetchStride(unsigned NumMemAccesses,

1532

unsigned NumStridedMemAccesses,

1533

unsigned NumPrefetches,

1534

bool HasCall) const = 0;

1535

1536

/// \return The maximum number of iterations to prefetch ahead. If

1537

/// the required number of iterations is more than this number, no

1538

/// prefetching is performed.

1539

virtual unsigned getMaxPrefetchIterationsAhead() const = 0;

1540

1541

/// \return True if prefetching should also be done for writes.

1542

virtual bool enableWritePrefetching() const = 0;

1543

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1544

virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1545

virtual unsigned getArithmeticInstrCost(

1546

unsigned Opcode, Type *Ty,

1547

TTI::TargetCostKind CostKind,

1548

OperandValueKind Opd1Info,

1549

OperandValueKind Opd2Info, OperandValueProperties Opd1PropInfo,

1550

OperandValueProperties Opd2PropInfo, ArrayRef<const Value *> Args,

1551

const Instruction *CxtI = nullptr) = 0;

1552

virtual int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,

1553

VectorType *SubTp) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1554

virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1555

CastContextHint CCH,

1556

TTI::TargetCostKind CostKind,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1557

const Instruction *I) = 0;

1558

virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,

1559

VectorType *VecTy, unsigned Index) = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1560

virtual int getCFInstrCost(unsigned Opcode,

1561

TTI::TargetCostKind CostKind) = 0;

1562

virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,

1563

CmpInst::Predicate VecPred,

1564

TTI::TargetCostKind CostKind,

1565

const Instruction *I) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1566

virtual int getVectorInstrCost(unsigned Opcode, Type *Val,

1567

unsigned Index) = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1568

virtual int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,

1569

unsigned AddressSpace,

1570

TTI::TargetCostKind CostKind,

1571

const Instruction *I) = 0;

1572

virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,

1573

unsigned AddressSpace,

1574

TTI::TargetCostKind CostKind) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1575

virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1576

const Value *Ptr, bool VariableMask,

1577

Align Alignment,

1578

TTI::TargetCostKind CostKind,

1579

const Instruction *I = nullptr) = 0;

1580

1581

virtual int getInterleavedMemoryOpCost(

1582

unsigned Opcode, Type *VecTy, unsigned Factor, ArrayRef<unsigned> Indices,

1583

Align Alignment, unsigned AddressSpace, TTI::TargetCostKind CostKind,

1584

bool UseMaskForCond = false, bool UseMaskForGaps = false) = 0;

1585

virtual int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,

1586

bool IsPairwiseForm,

1587

TTI::TargetCostKind CostKind) = 0;

1588

virtual int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,

1589

bool IsPairwiseForm, bool IsUnsigned,

1590

TTI::TargetCostKind CostKind) = 0;

1591

virtual int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

1592

TTI::TargetCostKind CostKind) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1593

virtual int getCallInstrCost(Function *F, Type *RetTy,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1594

ArrayRef<Type *> Tys,

1595

TTI::TargetCostKind CostKind) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1596

virtual unsigned getNumberOfParts(Type *Tp) = 0;

1597

virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,

1598

const SCEV *Ptr) = 0;

1599

virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;

1600

virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,

1601

MemIntrinsicInfo &Info) = 0;

1602

virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;

1603

virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,

1604

Type *ExpectedType) = 0;

1605

virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1606

unsigned SrcAddrSpace,

1607

unsigned DestAddrSpace,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1608

unsigned SrcAlign,

1609

unsigned DestAlign) const = 0;

1610

virtual void getMemcpyLoopResidualLoweringType(

1611

SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1612

unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,

1613

unsigned SrcAlign, unsigned DestAlign) const = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1614

virtual bool areInlineCompatible(const Function *Caller,

1615

const Function *Callee) const = 0;

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1616

virtual bool

1617

areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,

1618

SmallPtrSetImpl<Argument *> &Args) const = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1619

virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1620

virtual bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1621

virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;

1622

virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;

1623

virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;

1624

virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1625

Align Alignment,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1626

unsigned AddrSpace) const = 0;

1627

virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1628

Align Alignment,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1629

unsigned AddrSpace) const = 0;

1630

virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,

1631

unsigned ChainSizeInBytes,

1632

VectorType *VecTy) const = 0;

1633

virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,

1634

unsigned ChainSizeInBytes,

1635

VectorType *VecTy) const = 0;

1636

virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,

1637

ReductionFlags) const = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1638

virtual bool preferInLoopReduction(unsigned Opcode, Type *Ty,

1639

ReductionFlags) const = 0;

1640

virtual bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,

1641

ReductionFlags) const = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1642

virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1643

virtual unsigned getGISelRematGlobalCost() const = 0;

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1644

virtual bool supportsScalableVectors() const = 0;

1645

virtual bool hasActiveVectorLength() const = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1646

virtual int getInstructionLatency(const Instruction *I) = 0;

1647

};

1648

1649

template <typename T>

1650

class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {

T Impl;

public:

Model(T Impl) : Impl(std::move(Impl)) {}

1655

~Model() override {}

1656

1657

const DataLayout &getDataLayout() const override {

1658

return Impl.getDataLayout();

1659

}

1660

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1661

int getGEPCost(Type *PointeeType, const Value *Ptr,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1662

ArrayRef<const Value *> Operands,

1663

enum TargetTransformInfo::TargetCostKind CostKind) override {

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1664

return Impl.getGEPCost(PointeeType, Ptr, Operands);

1665

}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1666

unsigned getInliningThresholdMultiplier() override {

1667

return Impl.getInliningThresholdMultiplier();

1668

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1669

int getInlinerVectorBonusPercent() override {

1670

return Impl.getInlinerVectorBonusPercent();

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1671

}

1672

int getMemcpyCost(const Instruction *I) override {

1673

return Impl.getMemcpyCost(I);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1674

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1675

int getUserCost(const User *U, ArrayRef<const Value *> Operands,

1676

TargetCostKind CostKind) override {

1677

return Impl.getUserCost(U, Operands, CostKind);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1678

}

1679

bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1680

bool useGPUDivergenceAnalysis() override {

1681

return Impl.useGPUDivergenceAnalysis();

1682

}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1683

bool isSourceOfDivergence(const Value *V) override {

1684

return Impl.isSourceOfDivergence(V);

1685

}

1686

1687

bool isAlwaysUniform(const Value *V) override {

1688

return Impl.isAlwaysUniform(V);

1689

}

1690

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1691

unsigned getFlatAddressSpace() override { return Impl.getFlatAddressSpace(); }

1692

1693

bool collectFlatAddressOperands(SmallVectorImpl<int> &OpIndexes,

1694

Intrinsic::ID IID) const override {

1695

return Impl.collectFlatAddressOperands(OpIndexes, IID);

1696

}

1697

1698

bool isNoopAddrSpaceCast(unsigned FromAS, unsigned ToAS) const override {

1699

return Impl.isNoopAddrSpaceCast(FromAS, ToAS);

1700

}

1701

1702

unsigned getAssumedAddrSpace(const Value *V) const override {

1703

return Impl.getAssumedAddrSpace(V);

1704

}

1705

1706

Value *rewriteIntrinsicWithAddressSpace(IntrinsicInst *II, Value *OldV,

1707

Value *NewV) const override {

1708

return Impl.rewriteIntrinsicWithAddressSpace(II, OldV, NewV);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1709

}

1710

1711

bool isLoweredToCall(const Function *F) override {

1712

return Impl.isLoweredToCall(F);

1713

}

1714

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

1715

UnrollingPreferences &UP) override {

1716

return Impl.getUnrollingPreferences(L, SE, UP);

1717

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1718

void getPeelingPreferences(Loop *L, ScalarEvolution &SE,

1719

PeelingPreferences &PP) override {

1720

return Impl.getPeelingPreferences(L, SE, PP);

1721

}

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1722

bool isHardwareLoopProfitable(Loop *L, ScalarEvolution &SE,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1723

AssumptionCache &AC, TargetLibraryInfo *LibInfo,

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1724

HardwareLoopInfo &HWLoopInfo) override {

1725

return Impl.isHardwareLoopProfitable(L, SE, AC, LibInfo, HWLoopInfo);

1726

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1727

bool preferPredicateOverEpilogue(Loop *L, LoopInfo *LI, ScalarEvolution &SE,

1728

AssumptionCache &AC, TargetLibraryInfo *TLI,

1729

DominatorTree *DT,

1730

const LoopAccessInfo *LAI) override {

1731

return Impl.preferPredicateOverEpilogue(L, LI, SE, AC, TLI, DT, LAI);

1732

}

1733

bool emitGetActiveLaneMask() override {

1734

return Impl.emitGetActiveLaneMask();

1735

}

1736

Optional<Instruction *> instCombineIntrinsic(InstCombiner &IC,

1737

IntrinsicInst &II) override {

1738

return Impl.instCombineIntrinsic(IC, II);

1739

}

1740

Optional<Value *>

1741

simplifyDemandedUseBitsIntrinsic(InstCombiner &IC, IntrinsicInst &II,

1742

APInt DemandedMask, KnownBits &Known,

1743

bool &KnownBitsComputed) override {

1744

return Impl.simplifyDemandedUseBitsIntrinsic(IC, II, DemandedMask, Known,

1745

KnownBitsComputed);

1746

}

1747

Optional<Value *> simplifyDemandedVectorEltsIntrinsic(

1748

InstCombiner &IC, IntrinsicInst &II, APInt DemandedElts, APInt &UndefElts,

1749

APInt &UndefElts2, APInt &UndefElts3,

1750

std::function<void(Instruction *, unsigned, APInt, APInt &)>

1751

SimplifyAndSetOp) override {

1752

return Impl.simplifyDemandedVectorEltsIntrinsic(

1753

IC, II, DemandedElts, UndefElts, UndefElts2, UndefElts3,

1754

SimplifyAndSetOp);

1755

}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1756

bool isLegalAddImmediate(int64_t Imm) override {

1757

return Impl.isLegalAddImmediate(Imm);

1758

}

1759

bool isLegalICmpImmediate(int64_t Imm) override {

1760

return Impl.isLegalICmpImmediate(Imm);

1761

}

1762

bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1763

bool HasBaseReg, int64_t Scale, unsigned AddrSpace,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1764

Instruction *I) override {

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1765

return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,

1766

AddrSpace, I);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1767

}

1768

bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,

1769

TargetTransformInfo::LSRCost &C2) override {

1770

return Impl.isLSRCostLess(C1, C2);

1771

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1772

bool isNumRegsMajorCostOfLSR() override {

1773

return Impl.isNumRegsMajorCostOfLSR();

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1774

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1775

bool isProfitableLSRChainElement(Instruction *I) override {

1776

return Impl.isProfitableLSRChainElement(I);

1777

}

1778

bool canMacroFuseCmp() override { return Impl.canMacroFuseCmp(); }

1779

bool canSaveCmp(Loop *L, BranchInst **BI, ScalarEvolution *SE, LoopInfo *LI,

1780

DominatorTree *DT, AssumptionCache *AC,

1781

TargetLibraryInfo *LibInfo) override {

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1782

return Impl.canSaveCmp(L, BI, SE, LI, DT, AC, LibInfo);

1783

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1784

bool shouldFavorPostInc() const override { return Impl.shouldFavorPostInc(); }

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1785

bool shouldFavorBackedgeIndex(const Loop *L) const override {

1786

return Impl.shouldFavorBackedgeIndex(L);

1787

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1788

bool isLegalMaskedStore(Type *DataType, Align Alignment) override {

1789

return Impl.isLegalMaskedStore(DataType, Alignment);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1790

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1791

bool isLegalMaskedLoad(Type *DataType, Align Alignment) override {

1792

return Impl.isLegalMaskedLoad(DataType, Alignment);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1793

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1794

bool isLegalNTStore(Type *DataType, Align Alignment) override {

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1795

return Impl.isLegalNTStore(DataType, Alignment);

1796

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1797

bool isLegalNTLoad(Type *DataType, Align Alignment) override {

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1798

return Impl.isLegalNTLoad(DataType, Alignment);

1799

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1800

bool isLegalMaskedScatter(Type *DataType, Align Alignment) override {

1801

return Impl.isLegalMaskedScatter(DataType, Alignment);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1802

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1803

bool isLegalMaskedGather(Type *DataType, Align Alignment) override {

1804

return Impl.isLegalMaskedGather(DataType, Alignment);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1805

}

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1806

bool isLegalMaskedCompressStore(Type *DataType) override {

1807

return Impl.isLegalMaskedCompressStore(DataType);

1808

}

1809

bool isLegalMaskedExpandLoad(Type *DataType) override {

1810

return Impl.isLegalMaskedExpandLoad(DataType);

1811

}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1812

bool hasDivRemOp(Type *DataType, bool IsSigned) override {

1813

return Impl.hasDivRemOp(DataType, IsSigned);

1814

}

1815

bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {

1816

return Impl.hasVolatileVariant(I, AddrSpace);

1817

}

1818

bool prefersVectorizedAddressing() override {

1819

return Impl.prefersVectorizedAddressing();

1820

}

1821

int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,

1822

bool HasBaseReg, int64_t Scale,

1823

unsigned AddrSpace) override {

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1824

return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, Scale,

1825

AddrSpace);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1826

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1827

bool LSRWithInstrQueries() override { return Impl.LSRWithInstrQueries(); }

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1828

bool isTruncateFree(Type *Ty1, Type *Ty2) override {

1829

return Impl.isTruncateFree(Ty1, Ty2);

1830

}

1831

bool isProfitableToHoist(Instruction *I) override {

1832

return Impl.isProfitableToHoist(I);

1833

}

1834

bool useAA() override { return Impl.useAA(); }

1835

bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1836

unsigned getRegUsageForType(Type *Ty) override {

1837

return Impl.getRegUsageForType(Ty);

1838

}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1839

bool shouldBuildLookupTables() override {

1840

return Impl.shouldBuildLookupTables();

1841

}

1842

bool shouldBuildLookupTablesForConstant(Constant *C) override {

1843

return Impl.shouldBuildLookupTablesForConstant(C);

1844

}

1845

bool useColdCCForColdCall(Function &F) override {

1846

return Impl.useColdCCForColdCall(F);

1847

}

1848

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1849

unsigned getScalarizationOverhead(VectorType *Ty, const APInt &DemandedElts,

1850

bool Insert, bool Extract) override {

1851

return Impl.getScalarizationOverhead(Ty, DemandedElts, Insert, Extract);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1852

}

1853

unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,

1854

unsigned VF) override {

1855

return Impl.getOperandsScalarizationOverhead(Args, VF);

1856

}

1857

1858

bool supportsEfficientVectorElementLoadStore() override {

1859

return Impl.supportsEfficientVectorElementLoadStore();

1860

}

1861

1862

bool enableAggressiveInterleaving(bool LoopHasReductions) override {

1863

return Impl.enableAggressiveInterleaving(LoopHasReductions);

1864

}

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

1865

MemCmpExpansionOptions enableMemCmpExpansion(bool OptSize,

1866

bool IsZeroCmp) const override {

1867

return Impl.enableMemCmpExpansion(OptSize, IsZeroCmp);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1868

}

1869

bool enableInterleavedAccessVectorization() override {

1870

return Impl.enableInterleavedAccessVectorization();

1871

}

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1872

bool enableMaskedInterleavedAccessVectorization() override {

1873

return Impl.enableMaskedInterleavedAccessVectorization();

1874

}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1875

bool isFPVectorizationPotentiallyUnsafe() override {

1876

return Impl.isFPVectorizationPotentiallyUnsafe();

1877

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1878

bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth,

1879

unsigned AddressSpace, unsigned Alignment,

1880

bool *Fast) override {

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1881

return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,

1882

Alignment, Fast);

1883

}

1884

PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {

1885

return Impl.getPopcntSupport(IntTyWidthInBit);

1886

}

1887

bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }

1888

1889

bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {

1890

return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);

1891

}

1892

1893

int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }

1894

1895

int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,

1896

Type *Ty) override {

1897

return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);

1898

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1899

int getIntImmCost(const APInt &Imm, Type *Ty,

1900

TargetCostKind CostKind) override {

1901

return Impl.getIntImmCost(Imm, Ty, CostKind);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1902

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1903

int getIntImmCostInst(unsigned Opc, unsigned Idx, const APInt &Imm, Type *Ty,

1904

TargetCostKind CostKind,

1905

Instruction *Inst = nullptr) override {

1906

return Impl.getIntImmCostInst(Opc, Idx, Imm, Ty, CostKind, Inst);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1907

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1908

int getIntImmCostIntrin(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,

1909

Type *Ty, TargetCostKind CostKind) override {

1910

return Impl.getIntImmCostIntrin(IID, Idx, Imm, Ty, CostKind);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1911

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1912

unsigned getNumberOfRegisters(unsigned ClassID) const override {

1913

return Impl.getNumberOfRegisters(ClassID);

1914

}

1915

unsigned getRegisterClassForType(bool Vector,

1916

Type *Ty = nullptr) const override {

1917

return Impl.getRegisterClassForType(Vector, Ty);

1918

}

1919

const char *getRegisterClassName(unsigned ClassID) const override {

1920

return Impl.getRegisterClassName(ClassID);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1921

}

1922

unsigned getRegisterBitWidth(bool Vector) const override {

1923

return Impl.getRegisterBitWidth(Vector);

1924

}

1925

unsigned getMinVectorRegisterBitWidth() override {

1926

return Impl.getMinVectorRegisterBitWidth();

1927

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1928

Optional<unsigned> getMaxVScale() const override {

1929

return Impl.getMaxVScale();

1930

}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1931

bool shouldMaximizeVectorBandwidth(bool OptSize) const override {

1932

return Impl.shouldMaximizeVectorBandwidth(OptSize);

1933

}

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1934

unsigned getMinimumVF(unsigned ElemWidth) const override {

1935

return Impl.getMinimumVF(ElemWidth);

1936

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1937

unsigned getMaximumVF(unsigned ElemWidth, unsigned Opcode) const override {

1938

return Impl.getMaximumVF(ElemWidth, Opcode);

1939

}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1940

bool shouldConsiderAddressTypePromotion(

1941

const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {

1942

return Impl.shouldConsiderAddressTypePromotion(

1943

I, AllowPromotionWithoutCommonHeader);

1944

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1945

unsigned getCacheLineSize() const override { return Impl.getCacheLineSize(); }

1946

Optional<unsigned> getCacheSize(CacheLevel Level) const override {

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1947

return Impl.getCacheSize(Level);

1948

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1949

Optional<unsigned> getCacheAssociativity(CacheLevel Level) const override {

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1950

return Impl.getCacheAssociativity(Level);

1951

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1952

1953

/// Return the preferred prefetch distance in terms of instructions.

1954

///

1955

unsigned getPrefetchDistance() const override {

1956

return Impl.getPrefetchDistance();

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1957

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1958

1959

/// Return the minimum stride necessary to trigger software

1960

/// prefetching.

1961

///

1962

unsigned getMinPrefetchStride(unsigned NumMemAccesses,

1963

unsigned NumStridedMemAccesses,

1964

unsigned NumPrefetches,

1965

bool HasCall) const override {

1966

return Impl.getMinPrefetchStride(NumMemAccesses, NumStridedMemAccesses,

1967

NumPrefetches, HasCall);

1968

}

1969

1970

/// Return the maximum prefetch distance in terms of loop

1971

/// iterations.

1972

///

1973

unsigned getMaxPrefetchIterationsAhead() const override {

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1974

return Impl.getMaxPrefetchIterationsAhead();

1975

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1976

1977

/// \return True if prefetching should also be done for writes.

1978

bool enableWritePrefetching() const override {

1979

return Impl.enableWritePrefetching();

1980

}

1981

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1982

unsigned getMaxInterleaveFactor(unsigned VF) override {

1983

return Impl.getMaxInterleaveFactor(VF);

1984

}

1985

unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1986

unsigned &JTSize,

1987

ProfileSummaryInfo *PSI,

1988

BlockFrequencyInfo *BFI) override {

1989

return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize, PSI, BFI);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1990

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

1991

unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty,

1992

TTI::TargetCostKind CostKind,

1993

OperandValueKind Opd1Info,

1994

OperandValueKind Opd2Info,

1995

OperandValueProperties Opd1PropInfo,

1996

OperandValueProperties Opd2PropInfo,

1997

ArrayRef<const Value *> Args,

1998

const Instruction *CxtI = nullptr) override {

1999

return Impl.getArithmeticInstrCost(Opcode, Ty, CostKind, Opd1Info, Opd2Info,

2000

Opd1PropInfo, Opd2PropInfo, Args, CxtI);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2001

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2002

int getShuffleCost(ShuffleKind Kind, VectorType *Tp, int Index,

2003

VectorType *SubTp) override {

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2004

return Impl.getShuffleCost(Kind, Tp, Index, SubTp);

2005

}

2006

int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2007

CastContextHint CCH, TTI::TargetCostKind CostKind,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2008

const Instruction *I) override {

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2009

return Impl.getCastInstrCost(Opcode, Dst, Src, CCH, CostKind, I);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2010

}

2011

int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,

2012

unsigned Index) override {

2013

return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);

2014

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2015

int getCFInstrCost(unsigned Opcode, TTI::TargetCostKind CostKind) override {

2016

return Impl.getCFInstrCost(Opcode, CostKind);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2017

}

2018

int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2019

CmpInst::Predicate VecPred,

2020

TTI::TargetCostKind CostKind,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2021

const Instruction *I) override {

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2022

return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, VecPred, CostKind, I);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2023

}

2024

int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {

2025

return Impl.getVectorInstrCost(Opcode, Val, Index);

2026

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2027

int getMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,

2028

unsigned AddressSpace, TTI::TargetCostKind CostKind,

2029

const Instruction *I) override {

2030

return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace,

2031

CostKind, I);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2032

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2033

int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, Align Alignment,

2034

unsigned AddressSpace,

2035

TTI::TargetCostKind CostKind) override {

2036

return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace,

2037

CostKind);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2038

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2039

int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, const Value *Ptr,

2040

bool VariableMask, Align Alignment,

2041

TTI::TargetCostKind CostKind,

2042

const Instruction *I = nullptr) override {

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2043

return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2044

Alignment, CostKind, I);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2045

}

2046

int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2047

ArrayRef<unsigned> Indices, Align Alignment,

2048

unsigned AddressSpace,

2049

TTI::TargetCostKind CostKind,

2050

bool UseMaskForCond,

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

2051

bool UseMaskForGaps) override {

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2052

return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2053

Alignment, AddressSpace, CostKind,

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

2054

UseMaskForCond, UseMaskForGaps);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2055

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2056

int getArithmeticReductionCost(unsigned Opcode, VectorType *Ty,

2057

bool IsPairwiseForm,

2058

TTI::TargetCostKind CostKind) override {

2059

return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm,

2060

CostKind);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2061

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2062

int getMinMaxReductionCost(VectorType *Ty, VectorType *CondTy,

2063

bool IsPairwiseForm, bool IsUnsigned,

2064

TTI::TargetCostKind CostKind) override {

2065

return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned,

2066

CostKind);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2067

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2068

int getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA,

2069

TTI::TargetCostKind CostKind) override {

2070

return Impl.getIntrinsicInstrCost(ICA, CostKind);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2071

}

2072

int getCallInstrCost(Function *F, Type *RetTy,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2073

ArrayRef<Type *> Tys,

2074

TTI::TargetCostKind CostKind) override {

2075

return Impl.getCallInstrCost(F, RetTy, Tys, CostKind);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2076

}

2077

unsigned getNumberOfParts(Type *Tp) override {

2078

return Impl.getNumberOfParts(Tp);

2079

}

2080

int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,

2081

const SCEV *Ptr) override {

2082

return Impl.getAddressComputationCost(Ty, SE, Ptr);

2083

}

2084

unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {

2085

return Impl.getCostOfKeepingLiveOverCall(Tys);

2086

}

2087

bool getTgtMemIntrinsic(IntrinsicInst *Inst,

2088

MemIntrinsicInfo &Info) override {

2089

return Impl.getTgtMemIntrinsic(Inst, Info);

2090

}

2091

unsigned getAtomicMemIntrinsicMaxElementSize() const override {

2092

return Impl.getAtomicMemIntrinsicMaxElementSize();

2093

}

2094

Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,

2095

Type *ExpectedType) override {

2096

return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);

2097

}

2098

Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2099

unsigned SrcAddrSpace, unsigned DestAddrSpace,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2100

unsigned SrcAlign,

2101

unsigned DestAlign) const override {

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2102

return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAddrSpace,

2103

DestAddrSpace, SrcAlign, DestAlign);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2104

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2105

void getMemcpyLoopResidualLoweringType(

2106

SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,

2107

unsigned RemainingBytes, unsigned SrcAddrSpace, unsigned DestAddrSpace,

2108

unsigned SrcAlign, unsigned DestAlign) const override {

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2109

Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2110

SrcAddrSpace, DestAddrSpace,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2111

SrcAlign, DestAlign);

2112

}

2113

bool areInlineCompatible(const Function *Caller,

2114

const Function *Callee) const override {

2115

return Impl.areInlineCompatible(Caller, Callee);

2116

}

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

2117

bool areFunctionArgsABICompatible(

2118

const Function *Caller, const Function *Callee,

2119

SmallPtrSetImpl<Argument *> &Args) const override {

2120

return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);

2121

}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2122

bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {

2123

return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());

2124

}

2125

bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {

2126

return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());

2127

}

2128

unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {

2129

return Impl.getLoadStoreVecRegBitWidth(AddrSpace);

2130

}

2131

bool isLegalToVectorizeLoad(LoadInst *LI) const override {

2132

return Impl.isLegalToVectorizeLoad(LI);

2133

}

2134

bool isLegalToVectorizeStore(StoreInst *SI) const override {

2135

return Impl.isLegalToVectorizeStore(SI);

2136

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2137

bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes, Align Alignment,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2138

unsigned AddrSpace) const override {

2139

return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,

2140

AddrSpace);

2141

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2142

bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes, Align Alignment,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2143

unsigned AddrSpace) const override {

2144

return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,

2145

AddrSpace);

2146

}

2147

unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,

2148

unsigned ChainSizeInBytes,

2149

VectorType *VecTy) const override {

2150

return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);

2151

}

2152

unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,

2153

unsigned ChainSizeInBytes,

2154

VectorType *VecTy) const override {

2155

return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);

2156

}

2157

bool useReductionIntrinsic(unsigned Opcode, Type *Ty,

2158

ReductionFlags Flags) const override {

2159

return Impl.useReductionIntrinsic(Opcode, Ty, Flags);

2160

}

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2161

bool preferInLoopReduction(unsigned Opcode, Type *Ty,

2162

ReductionFlags Flags) const override {

2163

return Impl.preferInLoopReduction(Opcode, Ty, Flags);

2164

}

2165

bool preferPredicatedReductionSelect(unsigned Opcode, Type *Ty,

2166

ReductionFlags Flags) const override {

2167

return Impl.preferPredicatedReductionSelect(Opcode, Ty, Flags);

2168

}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2169

bool shouldExpandReduction(const IntrinsicInst *II) const override {

2170

return Impl.shouldExpandReduction(II);

2171

}

Andrew Walbran

2020-04-07 12:24:26 +0100

[diff] [blame]

2172

2173

unsigned getGISelRematGlobalCost() const override {

2174

return Impl.getGISelRematGlobalCost();

2175

}

2176

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2177

bool supportsScalableVectors() const override {

2178

return Impl.supportsScalableVectors();

2179

}

2180

2181

bool hasActiveVectorLength() const override {

2182

return Impl.hasActiveVectorLength();

2183

}

2184

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2185

int getInstructionLatency(const Instruction *I) override {

2186

return Impl.getInstructionLatency(I);

}

};

template <typename T>

2191

TargetTransformInfo::TargetTransformInfo(T Impl)

2192

: TTIImpl(new Model<T>(Impl)) {}

2193

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

2194

/// Analysis pass providing the \c TargetTransformInfo.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2195

///

2196

/// The core idea of the TargetIRAnalysis is to expose an interface through

2197

/// which LLVM targets can analyze and provide information about the middle

2198

/// end's target-independent IR. This supports use cases such as target-aware

2199

/// cost modeling of IR constructs.

2200

///

2201

/// This is a function analysis because much of the cost modeling for targets

2202

/// is done in a subtarget specific way and LLVM supports compiling different

2203

/// functions targeting different subtargets in order to support runtime

2204

/// dispatch according to the observed subtarget.

2205

class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {

2206

public:

2207

typedef TargetTransformInfo Result;

2208

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

2209

/// Default construct a target IR analysis.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2210

///

2211

/// This will use the module's datalayout to construct a baseline

2212

/// conservative TTI result.

2213

TargetIRAnalysis();

2214

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

2215

/// Construct an IR analysis pass around a target-provide callback.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2216

///

2217

/// The callback will be called with a particular function for which the TTI

2218

/// is needed and must return a TTI object for that function.

2219

TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);

2220

2221

// Value semantics. We spell out the constructors for MSVC.

2222

TargetIRAnalysis(const TargetIRAnalysis &Arg)

2223

: TTICallback(Arg.TTICallback) {}

2224

TargetIRAnalysis(TargetIRAnalysis &&Arg)

2225

: TTICallback(std::move(Arg.TTICallback)) {}

2226

TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {

2227

TTICallback = RHS.TTICallback;

2228

return *this;

2229

}

2230

TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {

2231

TTICallback = std::move(RHS.TTICallback);

return *this;

}

Result run(const Function &F, FunctionAnalysisManager &);

2236

2237

private:

2238

friend AnalysisInfoMixin<TargetIRAnalysis>;

2239

static AnalysisKey Key;

2240

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

2241

/// The callback used to produce a result.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2242

///

2243

/// We use a completely opaque callback so that targets can provide whatever

2244

/// mechanism they desire for constructing the TTI for a given function.

2245

///

2246

/// FIXME: Should we really use std::function? It's relatively inefficient.

2247

/// It might be possible to arrange for even stateful callbacks to outlive

2248

/// the analysis and thus use a function_ref which would be lighter weight.

2249

/// This may also be less error prone as the callback is likely to reference

2250

/// the external TargetMachine, and that reference needs to never dangle.

2251

std::function<Result(const Function &)> TTICallback;

2252

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

2253

/// Helper function used as the callback in the default constructor.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2254

static Result getDefaultTTI(const Function &F);

2255

};

2256

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

2257

/// Wrapper pass for TargetTransformInfo.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2258

///

2259

/// This pass can be constructed from a TTI object which it stores internally

2260

/// and is queried by passes.

2261

class TargetTransformInfoWrapperPass : public ImmutablePass {

2262

TargetIRAnalysis TIRA;

2263

Optional<TargetTransformInfo> TTI;

2264

2265

virtual void anchor();

public:

static char ID;

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

2270

/// We must provide a default constructor for the pass but it should

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2271

/// never be used.

2272

///

2273

/// Use the constructor below or call one of the creation routines.

2274

TargetTransformInfoWrapperPass();

2275

2276

explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);

2277

2278

TargetTransformInfo &getTTI(const Function &F);

2279

};

2280

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

2281

/// Create an analysis pass wrapper around a TTI object.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

2282

///

2283

/// This analysis pass just holds the TTI instance and makes it available to

2284

/// clients.

2285

ImmutablePass *createTargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);

2286

Olivier Deprez

2021-04-20 13:36:24 +0200

[diff] [blame]

2287

} // namespace llvm

Andrew Scull