Blame - linux-x64/clang/include/llvm/Analysis/TargetTransformInfo.h - hafnium/prebuilts

2018-08-14 10:06:54 +0100

[diff] [blame]

1

//===- TargetTransformInfo.h ------------------------------------*- C++ -*-===//

2

//

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

3

// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.

4

// See https://llvm.org/LICENSE.txt for license information.

5

// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

6

//

7

//===----------------------------------------------------------------------===//

8

/// \file

9

/// This pass exposes codegen information to IR-level passes. Every

10

/// transformation that uses codegen information is broken into three parts:

11

/// 1. The IR-level analysis pass.

12

/// 2. The IR-level transformation interface which provides the needed

13

/// information.

14

/// 3. Codegen-level implementation which uses target-specific hooks.

15

///

16

/// This file defines #2, which is the interface that IR-level transformations

17

/// use for querying the codegen.

18

///

19

//===----------------------------------------------------------------------===//

20

21

#ifndef LLVM_ANALYSIS_TARGETTRANSFORMINFO_H

22

#define LLVM_ANALYSIS_TARGETTRANSFORMINFO_H

23

24

#include "llvm/ADT/Optional.h"

25

#include "llvm/IR/Operator.h"

26

#include "llvm/IR/PassManager.h"

27

#include "llvm/Pass.h"

28

#include "llvm/Support/AtomicOrdering.h"

29

#include "llvm/Support/DataTypes.h"

30

#include <functional>

namespace llvm {

namespace Intrinsic {

enum ID : unsigned;

}

class Function;

class GlobalValue;

class IntrinsicInst;

class LoadInst;

class Loop;

class SCEV;

class ScalarEvolution;

class StoreInst;

class SwitchInst;

class Type;

class User;

class Value;

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

51

/// Information about a load/store intrinsic defined by the target.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

52

struct MemIntrinsicInfo {

53

/// This is the pointer that the intrinsic is loading from or storing to.

54

/// If this is non-null, then analysis/optimization passes can assume that

55

/// this intrinsic is functionally equivalent to a load/store from this

56

/// pointer.

57

Value *PtrVal = nullptr;

58

59

// Ordering for atomic operations.

60

AtomicOrdering Ordering = AtomicOrdering::NotAtomic;

61

62

// Same Id is set by the target for corresponding load/store intrinsics.

63

unsigned short MatchingId = 0;

64

65

bool ReadMem = false;

66

bool WriteMem = false;

67

bool IsVolatile = false;

68

69

bool isUnordered() const {

70

return (Ordering == AtomicOrdering::NotAtomic ||

71

Ordering == AtomicOrdering::Unordered) && !IsVolatile;

}

};

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

75

/// This pass provides access to the codegen interfaces that are needed

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

76

/// for IR-level transformations.

77

class TargetTransformInfo {

78

public:

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

79

/// Construct a TTI object using a type implementing the \c Concept

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

80

/// API below.

81

///

82

/// This is used by targets to construct a TTI wrapping their target-specific

83

/// implementaion that encodes appropriate costs for their target.

84

template <typename T> TargetTransformInfo(T Impl);

85

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

86

/// Construct a baseline TTI object using a minimal implementation of

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

87

/// the \c Concept API below.

88

///

89

/// The TTI implementation will reflect the information in the DataLayout

90

/// provided if non-null.

91

explicit TargetTransformInfo(const DataLayout &DL);

92

93

// Provide move semantics.

94

TargetTransformInfo(TargetTransformInfo &&Arg);

95

TargetTransformInfo &operator=(TargetTransformInfo &&RHS);

96

97

// We need to define the destructor out-of-line to define our sub-classes

98

// out-of-line.

99

~TargetTransformInfo();

100

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

101

/// Handle the invalidation of this information.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

102

///

103

/// When used as a result of \c TargetIRAnalysis this method will be called

104

/// when the function this was computed for changes. When it returns false,

105

/// the information is preserved across those changes.

106

bool invalidate(Function &, const PreservedAnalyses &,

107

FunctionAnalysisManager::Invalidator &) {

108

// FIXME: We should probably in some way ensure that the subtarget

109

// information for a function hasn't changed.

return false;

}

/// \name Generic Target Information

114

/// @{

115

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

116

/// The kind of cost model.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

117

///

118

/// There are several different cost models that can be customized by the

119

/// target. The normalization of each cost model may be target specific.

120

enum TargetCostKind {

121

TCK_RecipThroughput, ///< Reciprocal throughput.

122

TCK_Latency, ///< The latency of instruction.

123

TCK_CodeSize ///< Instruction code size.

124

};

125

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

126

/// Query the cost of a specified instruction.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

127

///

128

/// Clients should use this interface to query the cost of an existing

129

/// instruction. The instruction must have a valid parent (basic block).

130

///

131

/// Note, this method does not cache the cost calculation and it

132

/// can be expensive in some cases.

133

int getInstructionCost(const Instruction *I, enum TargetCostKind kind) const {

134

switch (kind){

135

case TCK_RecipThroughput:

136

return getInstructionThroughput(I);

137

138

case TCK_Latency:

139

return getInstructionLatency(I);

140

141

case TCK_CodeSize:

142

return getUserCost(I);

143

}

144

llvm_unreachable("Unknown instruction cost kind");

145

}

146

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

147

/// Underlying constants for 'cost' values in this interface.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

148

///

149

/// Many APIs in this interface return a cost. This enum defines the

150

/// fundamental values that should be used to interpret (and produce) those

151

/// costs. The costs are returned as an int rather than a member of this

152

/// enumeration because it is expected that the cost of one IR instruction

153

/// may have a multiplicative factor to it or otherwise won't fit directly

154

/// into the enum. Moreover, it is common to sum or average costs which works

155

/// better as simple integral values. Thus this enum only provides constants.

156

/// Also note that the returned costs are signed integers to make it natural

157

/// to add, subtract, and test with zero (a common boundary condition). It is

158

/// not expected that 2^32 is a realistic cost to be modeling at any point.

159

///

160

/// Note that these costs should usually reflect the intersection of code-size

161

/// cost and execution cost. A free instruction is typically one that folds

162

/// into another instruction. For example, reg-to-reg moves can often be

163

/// skipped by renaming the registers in the CPU, but they still are encoded

164

/// and thus wouldn't be considered 'free' here.

165

enum TargetCostConstants {

166

TCC_Free = 0, ///< Expected to fold away in lowering.

167

TCC_Basic = 1, ///< The cost of a typical 'add' instruction.

168

TCC_Expensive = 4 ///< The cost of a 'div' instruction on x86.

169

};

170

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

171

/// Estimate the cost of a specific operation when lowered.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

172

///

173

/// Note that this is designed to work on an arbitrary synthetic opcode, and

174

/// thus work for hypothetical queries before an instruction has even been

175

/// formed. However, this does *not* work for GEPs, and must not be called

176

/// for a GEP instruction. Instead, use the dedicated getGEPCost interface as

177

/// analyzing a GEP's cost required more information.

178

///

179

/// Typically only the result type is required, and the operand type can be

180

/// omitted. However, if the opcode is one of the cast instructions, the

181

/// operand type is required.

182

///

183

/// The returned cost is defined in terms of \c TargetCostConstants, see its

184

/// comments for a detailed explanation of the cost values.

185

int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy = nullptr) const;

186

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

187

/// Estimate the cost of a GEP operation when lowered.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

188

///

189

/// The contract for this function is the same as \c getOperationCost except

190

/// that it supports an interface that provides extra information specific to

191

/// the GEP operation.

192

int getGEPCost(Type *PointeeType, const Value *Ptr,

193

ArrayRef<const Value *> Operands) const;

194

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

195

/// Estimate the cost of a EXT operation when lowered.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

196

///

197

/// The contract for this function is the same as \c getOperationCost except

198

/// that it supports an interface that provides extra information specific to

199

/// the EXT operation.

200

int getExtCost(const Instruction *I, const Value *Src) const;

201

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

202

/// Estimate the cost of a function call when lowered.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

203

///

204

/// The contract for this is the same as \c getOperationCost except that it

205

/// supports an interface that provides extra information specific to call

206

/// instructions.

207

///

208

/// This is the most basic query for estimating call cost: it only knows the

209

/// function type and (potentially) the number of arguments at the call site.

210

/// The latter is only interesting for varargs function types.

211

int getCallCost(FunctionType *FTy, int NumArgs = -1) const;

212

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

213

/// Estimate the cost of calling a specific function when lowered.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

214

///

215

/// This overload adds the ability to reason about the particular function

216

/// being called in the event it is a library call with special lowering.

217

int getCallCost(const Function *F, int NumArgs = -1) const;

218

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

219

/// Estimate the cost of calling a specific function when lowered.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

220

///

221

/// This overload allows specifying a set of candidate argument values.

222

int getCallCost(const Function *F, ArrayRef<const Value *> Arguments) const;

223

224

/// \returns A value by which our inlining threshold should be multiplied.

225

/// This is primarily used to bump up the inlining threshold wholesale on

226

/// targets where calls are unusually expensive.

227

///

228

/// TODO: This is a rather blunt instrument. Perhaps altering the costs of

229

/// individual classes of instructions would be better.

230

unsigned getInliningThresholdMultiplier() const;

231

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

232

/// Estimate the cost of an intrinsic when lowered.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

233

///

234

/// Mirrors the \c getCallCost method but uses an intrinsic identifier.

235

int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,

236

ArrayRef<Type *> ParamTys) const;

237

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

238

/// Estimate the cost of an intrinsic when lowered.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

239

///

240

/// Mirrors the \c getCallCost method but uses an intrinsic identifier.

241

int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,

242

ArrayRef<const Value *> Arguments) const;

243

244

/// \return The estimated number of case clusters when lowering \p 'SI'.

245

/// \p JTSize Set a jump table size only when \p SI is suitable for a jump

246

/// table.

247

unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,

248

unsigned &JTSize) const;

249

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

250

/// Estimate the cost of a given IR user when lowered.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

251

///

252

/// This can estimate the cost of either a ConstantExpr or Instruction when

253

/// lowered. It has two primary advantages over the \c getOperationCost and

254

/// \c getGEPCost above, and one significant disadvantage: it can only be

255

/// used when the IR construct has already been formed.

256

///

257

/// The advantages are that it can inspect the SSA use graph to reason more

258

/// accurately about the cost. For example, all-constant-GEPs can often be

259

/// folded into a load or other instruction, but if they are used in some

260

/// other context they may not be folded. This routine can distinguish such

261

/// cases.

262

///

263

/// \p Operands is a list of operands which can be a result of transformations

264

/// of the current operands. The number of the operands on the list must equal

265

/// to the number of the current operands the IR user has. Their order on the

266

/// list must be the same as the order of the current operands the IR user

267

/// has.

268

///

269

/// The returned cost is defined in terms of \c TargetCostConstants, see its

270

/// comments for a detailed explanation of the cost values.

271

int getUserCost(const User *U, ArrayRef<const Value *> Operands) const;

272

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

273

/// This is a helper function which calls the two-argument getUserCost

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

274

/// with \p Operands which are the current operands U has.

275

int getUserCost(const User *U) const {

276

SmallVector<const Value *, 4> Operands(U->value_op_begin(),

277

U->value_op_end());

278

return getUserCost(U, Operands);

279

}

280

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

281

/// Return true if branch divergence exists.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

282

///

283

/// Branch divergence has a significantly negative impact on GPU performance

284

/// when threads in the same wavefront take different paths due to conditional

285

/// branches.

286

bool hasBranchDivergence() const;

287

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

288

/// Returns whether V is a source of divergence.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

289

///

290

/// This function provides the target-dependent information for

Andrew Scull

0372a57

2018-11-16 15:47:06 +0000

[diff] [blame]

291

/// the target-independent LegacyDivergenceAnalysis. LegacyDivergenceAnalysis first

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

292

/// builds the dependency graph, and then runs the reachability algorithm

293

/// starting with the sources of divergence.

294

bool isSourceOfDivergence(const Value *V) const;

295

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

296

// Returns true for the target specific

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

297

// set of operations which produce uniform result

298

// even taking non-unform arguments

299

bool isAlwaysUniform(const Value *V) const;

300

301

/// Returns the address space ID for a target's 'flat' address space. Note

302

/// this is not necessarily the same as addrspace(0), which LLVM sometimes

303

/// refers to as the generic address space. The flat address space is a

304

/// generic address space that can be used access multiple segments of memory

305

/// with different address spaces. Access of a memory location through a

306

/// pointer with this address space is expected to be legal but slower

307

/// compared to the same memory location accessed through a pointer with a

308

/// different address space.

309

//

310

/// This is for targets with different pointer representations which can

311

/// be converted with the addrspacecast instruction. If a pointer is converted

312

/// to this address space, optimizations should attempt to replace the access

313

/// with the source address space.

314

///

315

/// \returns ~0u if the target does not have such a flat address space to

316

/// optimize away.

317

unsigned getFlatAddressSpace() const;

318

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

319

/// Test whether calls to a function lower to actual program function

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

320

/// calls.

321

///

322

/// The idea is to test whether the program is likely to require a 'call'

323

/// instruction or equivalent in order to call the given function.

324

///

325

/// FIXME: It's not clear that this is a good or useful query API. Client's

326

/// should probably move to simpler cost metrics using the above.

327

/// Alternatively, we could split the cost interface into distinct code-size

328

/// and execution-speed costs. This would allow modelling the core of this

329

/// query more accurately as a call is a single small instruction, but

330

/// incurs significant execution cost.

331

bool isLoweredToCall(const Function *F) const;

332

333

struct LSRCost {

334

/// TODO: Some of these could be merged. Also, a lexical ordering

335

/// isn't always optimal.

unsigned Insns;

unsigned NumRegs;

unsigned AddRecCost;

unsigned NumIVMuls;

unsigned NumBaseAdds;

unsigned ImmCost;

unsigned SetupCost;

unsigned ScaleCost;

};

/// Parameters that control the generic loop unrolling transformation.

347

struct UnrollingPreferences {

348

/// The cost threshold for the unrolled loop. Should be relative to the

349

/// getUserCost values returned by this API, and the expectation is that

350

/// the unrolled loop's instructions when run through that interface should

351

/// not exceed this cost. However, this is only an estimate. Also, specific

352

/// loops may be unrolled even with a cost above this threshold if deemed

353

/// profitable. Set this to UINT_MAX to disable the loop body cost

354

/// restriction.

355

unsigned Threshold;

356

/// If complete unrolling will reduce the cost of the loop, we will boost

357

/// the Threshold by a certain percent to allow more aggressive complete

358

/// unrolling. This value provides the maximum boost percentage that we

359

/// can apply to Threshold (The value should be no less than 100).

360

/// BoostedThreshold = Threshold * min(RolledCost / UnrolledCost,

361

/// MaxPercentThresholdBoost / 100)

362

/// E.g. if complete unrolling reduces the loop execution time by 50%

363

/// then we boost the threshold by the factor of 2x. If unrolling is not

364

/// expected to reduce the running time, then we do not increase the

365

/// threshold.

366

unsigned MaxPercentThresholdBoost;

367

/// The cost threshold for the unrolled loop when optimizing for size (set

368

/// to UINT_MAX to disable).

369

unsigned OptSizeThreshold;

370

/// The cost threshold for the unrolled loop, like Threshold, but used

371

/// for partial/runtime unrolling (set to UINT_MAX to disable).

372

unsigned PartialThreshold;

373

/// The cost threshold for the unrolled loop when optimizing for size, like

374

/// OptSizeThreshold, but used for partial/runtime unrolling (set to

375

/// UINT_MAX to disable).

376

unsigned PartialOptSizeThreshold;

377

/// A forced unrolling factor (the number of concatenated bodies of the

378

/// original loop in the unrolled loop body). When set to 0, the unrolling

379

/// transformation will select an unrolling factor based on the current cost

380

/// threshold and other factors.

381

unsigned Count;

382

/// A forced peeling factor (the number of bodied of the original loop

383

/// that should be peeled off before the loop body). When set to 0, the

384

/// unrolling transformation will select a peeling factor based on profile

385

/// information and other factors.

386

unsigned PeelCount;

387

/// Default unroll count for loops with run-time trip count.

388

unsigned DefaultUnrollRuntimeCount;

389

// Set the maximum unrolling factor. The unrolling factor may be selected

390

// using the appropriate cost threshold, but may not exceed this number

391

// (set to UINT_MAX to disable). This does not apply in cases where the

392

// loop is being fully unrolled.

393

unsigned MaxCount;

394

/// Set the maximum unrolling factor for full unrolling. Like MaxCount, but

395

/// applies even if full unrolling is selected. This allows a target to fall

396

/// back to Partial unrolling if full unrolling is above FullUnrollMaxCount.

397

unsigned FullUnrollMaxCount;

398

// Represents number of instructions optimized when "back edge"

399

// becomes "fall through" in unrolled loop.

400

// For now we count a conditional branch on a backedge and a comparison

401

// feeding it.

402

unsigned BEInsns;

403

/// Allow partial unrolling (unrolling of loops to expand the size of the

404

/// loop body, not only to eliminate small constant-trip-count loops).

405

bool Partial;

406

/// Allow runtime unrolling (unrolling of loops to expand the size of the

407

/// loop body even when the number of loop iterations is not known at

408

/// compile time).

409

bool Runtime;

410

/// Allow generation of a loop remainder (extra iterations after unroll).

411

bool AllowRemainder;

412

/// Allow emitting expensive instructions (such as divisions) when computing

413

/// the trip count of a loop for runtime unrolling.

414

bool AllowExpensiveTripCount;

415

/// Apply loop unroll on any kind of loop

416

/// (mainly to loops that fail runtime unrolling).

417

bool Force;

418

/// Allow using trip count upper bound to unroll loops.

419

bool UpperBound;

420

/// Allow peeling off loop iterations for loops with low dynamic tripcount.

421

bool AllowPeeling;

422

/// Allow unrolling of all the iterations of the runtime loop remainder.

423

bool UnrollRemainder;

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

424

/// Allow unroll and jam. Used to enable unroll and jam for the target.

425

bool UnrollAndJam;

426

/// Threshold for unroll and jam, for inner loop size. The 'Threshold'

427

/// value above is used during unroll and jam for the outer loop size.

428

/// This value is used in the same manner to limit the size of the inner

429

/// loop.

430

unsigned UnrollAndJamInnerLoopThreshold;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

431

};

432

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

433

/// Get target-customized preferences for the generic loop unrolling

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

434

/// transformation. The caller will initialize UP with the current

435

/// target-independent defaults.

436

void getUnrollingPreferences(Loop *L, ScalarEvolution &,

437

UnrollingPreferences &UP) const;

/// @}

/// \name Scalar Target Information

442

/// @{

443

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

444

/// Flags indicating the kind of support for population count.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

445

///

446

/// Compared to the SW implementation, HW support is supposed to

447

/// significantly boost the performance when the population is dense, and it

448

/// may or may not degrade performance if the population is sparse. A HW

449

/// support is considered as "Fast" if it can outperform, or is on a par

450

/// with, SW implementation when the population is sparse; otherwise, it is

451

/// considered as "Slow".

452

enum PopcntSupportKind { PSK_Software, PSK_SlowHardware, PSK_FastHardware };

453

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

454

/// Return true if the specified immediate is legal add immediate, that

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

455

/// is the target has add instructions which can add a register with the

456

/// immediate without having to materialize the immediate into a register.

457

bool isLegalAddImmediate(int64_t Imm) const;

458

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

459

/// Return true if the specified immediate is legal icmp immediate,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

460

/// that is the target has icmp instructions which can compare a register

461

/// against the immediate without having to materialize the immediate into a

462

/// register.

463

bool isLegalICmpImmediate(int64_t Imm) const;

464

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

465

/// Return true if the addressing mode represented by AM is legal for

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

466

/// this target, for a load/store of the specified type.

467

/// The type may be VoidTy, in which case only return true if the addressing

468

/// mode is legal for a load/store of any legal type.

469

/// If target returns true in LSRWithInstrQueries(), I may be valid.

470

/// TODO: Handle pre/postinc as well.

471

bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,

472

bool HasBaseReg, int64_t Scale,

473

unsigned AddrSpace = 0,

474

Instruction *I = nullptr) const;

475

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

476

/// Return true if LSR cost of C1 is lower than C1.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

477

bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,

478

TargetTransformInfo::LSRCost &C2) const;

479

480

/// Return true if the target can fuse a compare and branch.

481

/// Loop-strength-reduction (LSR) uses that knowledge to adjust its cost

482

/// calculation for the instructions in a loop.

483

bool canMacroFuseCmp() const;

484

485

/// \return True is LSR should make efforts to create/preserve post-inc

486

/// addressing mode expressions.

487

bool shouldFavorPostInc() const;

488

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

489

/// Return true if LSR should make efforts to generate indexed addressing

490

/// modes that operate across loop iterations.

491

bool shouldFavorBackedgeIndex(const Loop *L) const;

492

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

493

/// Return true if the target supports masked load/store

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

494

/// AVX2 and AVX-512 targets allow masks for consecutive load and store

495

bool isLegalMaskedStore(Type *DataType) const;

496

bool isLegalMaskedLoad(Type *DataType) const;

497

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

498

/// Return true if the target supports masked gather/scatter

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

499

/// AVX-512 fully supports gather and scatter for vectors with 32 and 64

500

/// bits scalar type.

501

bool isLegalMaskedScatter(Type *DataType) const;

502

bool isLegalMaskedGather(Type *DataType) const;

503

504

/// Return true if the target has a unified operation to calculate division

505

/// and remainder. If so, the additional implicit multiplication and

506

/// subtraction required to calculate a remainder from division are free. This

507

/// can enable more aggressive transformations for division and remainder than

508

/// would typically be allowed using throughput or size cost models.

509

bool hasDivRemOp(Type *DataType, bool IsSigned) const;

510

511

/// Return true if the given instruction (assumed to be a memory access

512

/// instruction) has a volatile variant. If that's the case then we can avoid

513

/// addrspacecast to generic AS for volatile loads/stores. Default

514

/// implementation returns false, which prevents address space inference for

515

/// volatile loads/stores.

516

bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) const;

517

518

/// Return true if target doesn't mind addresses in vectors.

519

bool prefersVectorizedAddressing() const;

520

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

521

/// Return the cost of the scaling factor used in the addressing

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

522

/// mode represented by AM for this target, for a load/store

523

/// of the specified type.

524

/// If the AM is supported, the return value must be >= 0.

525

/// If the AM is not supported, it returns a negative value.

526

/// TODO: Handle pre/postinc as well.

527

int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,

528

bool HasBaseReg, int64_t Scale,

529

unsigned AddrSpace = 0) const;

530

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

531

/// Return true if the loop strength reduce pass should make

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

532

/// Instruction* based TTI queries to isLegalAddressingMode(). This is

533

/// needed on SystemZ, where e.g. a memcpy can only have a 12 bit unsigned

534

/// immediate offset and no index register.

535

bool LSRWithInstrQueries() const;

536

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

537

/// Return true if it's free to truncate a value of type Ty1 to type

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

538

/// Ty2. e.g. On x86 it's free to truncate a i32 value in register EAX to i16

539

/// by referencing its sub-register AX.

540

bool isTruncateFree(Type *Ty1, Type *Ty2) const;

541

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

542

/// Return true if it is profitable to hoist instruction in the

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

543

/// then/else to before if.

544

bool isProfitableToHoist(Instruction *I) const;

bool useAA() const;

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

548

/// Return true if this type is legal.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

549

bool isTypeLegal(Type *Ty) const;

550

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

551

/// Returns the target's jmp_buf alignment in bytes.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

552

unsigned getJumpBufAlignment() const;

553

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

554

/// Returns the target's jmp_buf size in bytes.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

555

unsigned getJumpBufSize() const;

556

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

557

/// Return true if switches should be turned into lookup tables for the

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

558

/// target.

559

bool shouldBuildLookupTables() const;

560

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

561

/// Return true if switches should be turned into lookup tables

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

562

/// containing this constant value for the target.

563

bool shouldBuildLookupTablesForConstant(Constant *C) const;

564

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

565

/// Return true if the input function which is cold at all call sites,

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

566

/// should use coldcc calling convention.

567

bool useColdCCForColdCall(Function &F) const;

568

569

unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const;

570

571

unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,

572

unsigned VF) const;

573

574

/// If target has efficient vector element load/store instructions, it can

575

/// return true here so that insertion/extraction costs are not added to

576

/// the scalarization cost of a load/store.

577

bool supportsEfficientVectorElementLoadStore() const;

578

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

579

/// Don't restrict interleaved unrolling to small loops.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

580

bool enableAggressiveInterleaving(bool LoopHasReductions) const;

581

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

582

/// If not nullptr, enable inline expansion of memcmp. IsZeroCmp is

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

583

/// true if this is the expansion of memcmp(p1, p2, s) == 0.

584

struct MemCmpExpansionOptions {

585

// The list of available load sizes (in bytes), sorted in decreasing order.

586

SmallVector<unsigned, 8> LoadSizes;

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

587

// Set to true to allow overlapping loads. For example, 7-byte compares can

588

// be done with two 4-byte compares instead of 4+2+1-byte compares. This

589

// requires all loads in LoadSizes to be doable in an unaligned way.

590

bool AllowOverlappingLoads = false;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

591

};

592

const MemCmpExpansionOptions *enableMemCmpExpansion(bool IsZeroCmp) const;

593

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

594

/// Enable matching of interleaved access groups.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

595

bool enableInterleavedAccessVectorization() const;

596

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

597

/// Enable matching of interleaved access groups that contain predicated

598

/// accesses or gaps and therefore vectorized using masked

599

/// vector loads/stores.

600

bool enableMaskedInterleavedAccessVectorization() const;

601

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

602

/// Indicate that it is potentially unsafe to automatically vectorize

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

603

/// floating-point operations because the semantics of vector and scalar

604

/// floating-point semantics may differ. For example, ARM NEON v7 SIMD math

605

/// does not support IEEE-754 denormal numbers, while depending on the

606

/// platform, scalar floating-point math does.

607

/// This applies to floating-point math operations and calls, not memory

608

/// operations, shuffles, or casts.

609

bool isFPVectorizationPotentiallyUnsafe() const;

610

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

611

/// Determine if the target supports unaligned memory accesses.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

612

bool allowsMisalignedMemoryAccesses(LLVMContext &Context,

613

unsigned BitWidth, unsigned AddressSpace = 0,

614

unsigned Alignment = 1,

615

bool *Fast = nullptr) const;

616

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

617

/// Return hardware support for population count.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

618

PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const;

619

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

620

/// Return true if the hardware has a fast square-root instruction.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

621

bool haveFastSqrt(Type *Ty) const;

622

623

/// Return true if it is faster to check if a floating-point value is NaN

624

/// (or not-NaN) versus a comparison against a constant FP zero value.

625

/// Targets should override this if materializing a 0.0 for comparison is

626

/// generally as cheap as checking for ordered/unordered.

627

bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) const;

628

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

629

/// Return the expected cost of supporting the floating point operation

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

630

/// of the specified type.

631

int getFPOpCost(Type *Ty) const;

632

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

633

/// Return the expected cost of materializing for the given integer

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

634

/// immediate of the specified type.

635

int getIntImmCost(const APInt &Imm, Type *Ty) const;

636

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

637

/// Return the expected cost of materialization for the given integer

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

638

/// immediate of the specified type for a given instruction. The cost can be

639

/// zero if the immediate can be folded into the specified instruction.

640

int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,

641

Type *Ty) const;

642

int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,

643

Type *Ty) const;

644

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

645

/// Return the expected cost for the given integer when optimising

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

646

/// for size. This is different than the other integer immediate cost

647

/// functions in that it is subtarget agnostic. This is useful when you e.g.

648

/// target one ISA such as Aarch32 but smaller encodings could be possible

649

/// with another such as Thumb. This return value is used as a penalty when

650

/// the total costs for a constant is calculated (the bigger the cost, the

651

/// more beneficial constant hoisting is).

652

int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,

Type *Ty) const;

/// @}

/// \name Vector Target Information

657

/// @{

658

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

659

/// The various kinds of shuffle patterns for vector queries.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

660

enum ShuffleKind {

661

SK_Broadcast, ///< Broadcast element 0 to all other elements.

662

SK_Reverse, ///< Reverse the order of the vector.

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

663

SK_Select, ///< Selects elements from the corresponding lane of

664

///< either source operand. This is equivalent to a

665

///< vector select with a constant condition operand.

666

SK_Transpose, ///< Transpose two vectors.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

667

SK_InsertSubvector, ///< InsertSubvector. Index indicates start offset.

668

SK_ExtractSubvector,///< ExtractSubvector Index indicates start offset.

669

SK_PermuteTwoSrc, ///< Merge elements from two source vectors into one

670

///< with any shuffle mask.

671

SK_PermuteSingleSrc ///< Shuffle elements of single source vector with any

///< shuffle mask.

};

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

675

/// Additional information about an operand's possible values.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

676

enum OperandValueKind {

677

OK_AnyValue, // Operand can have any value.

678

OK_UniformValue, // Operand is uniform (splat of a value).

679

OK_UniformConstantValue, // Operand is uniform constant.

680

OK_NonUniformConstantValue // Operand is a non uniform constant value.

681

};

682

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

683

/// Additional properties of an operand's values.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

684

enum OperandValueProperties { OP_None = 0, OP_PowerOf2 = 1 };

685

686

/// \return The number of scalar or vector registers that the target has.

687

/// If 'Vectors' is true, it returns the number of vector registers. If it is

688

/// set to false, it returns the number of scalar registers.

689

unsigned getNumberOfRegisters(bool Vector) const;

690

691

/// \return The width of the largest scalar or vector register type.

692

unsigned getRegisterBitWidth(bool Vector) const;

693

694

/// \return The width of the smallest vector register type.

695

unsigned getMinVectorRegisterBitWidth() const;

696

697

/// \return True if the vectorization factor should be chosen to

698

/// make the vector of the smallest element type match the size of a

699

/// vector register. For wider element types, this could result in

700

/// creating vectors that span multiple vector registers.

701

/// If false, the vectorization factor will be chosen based on the

702

/// size of the widest element type.

703

bool shouldMaximizeVectorBandwidth(bool OptSize) const;

704

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

705

/// \return The minimum vectorization factor for types of given element

706

/// bit width, or 0 if there is no mimimum VF. The returned value only

707

/// applies when shouldMaximizeVectorBandwidth returns true.

708

unsigned getMinimumVF(unsigned ElemWidth) const;

709

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

710

/// \return True if it should be considered for address type promotion.

711

/// \p AllowPromotionWithoutCommonHeader Set true if promoting \p I is

712

/// profitable without finding other extensions fed by the same input.

713

bool shouldConsiderAddressTypePromotion(

714

const Instruction &I, bool &AllowPromotionWithoutCommonHeader) const;

715

716

/// \return The size of a cache line in bytes.

717

unsigned getCacheLineSize() const;

718

719

/// The possible cache levels

720

enum class CacheLevel {

721

L1D, // The L1 data cache

722

L2D, // The L2 data cache

723

724

// We currently do not model L3 caches, as their sizes differ widely between

725

// microarchitectures. Also, we currently do not have a use for L3 cache

726

// size modeling yet.

727

};

728

729

/// \return The size of the cache level in bytes, if available.

730

llvm::Optional<unsigned> getCacheSize(CacheLevel Level) const;

731

732

/// \return The associativity of the cache level, if available.

733

llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) const;

734

735

/// \return How much before a load we should place the prefetch instruction.

736

/// This is currently measured in number of instructions.

737

unsigned getPrefetchDistance() const;

738

739

/// \return Some HW prefetchers can handle accesses up to a certain constant

740

/// stride. This is the minimum stride in bytes where it makes sense to start

741

/// adding SW prefetches. The default is 1, i.e. prefetch with any stride.

742

unsigned getMinPrefetchStride() const;

743

744

/// \return The maximum number of iterations to prefetch ahead. If the

745

/// required number of iterations is more than this number, no prefetching is

746

/// performed.

747

unsigned getMaxPrefetchIterationsAhead() const;

748

749

/// \return The maximum interleave factor that any transform should try to

750

/// perform for this target. This number depends on the level of parallelism

751

/// and the number of execution units in the CPU.

752

unsigned getMaxInterleaveFactor(unsigned VF) const;

753

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

754

/// Collect properties of V used in cost analysis, e.g. OP_PowerOf2.

755

static OperandValueKind getOperandInfo(Value *V,

756

OperandValueProperties &OpProps);

Andrew Scull

0372a57

2018-11-16 15:47:06 +0000

[diff] [blame]

757

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

758

/// This is an approximation of reciprocal throughput of a math/logic op.

759

/// A higher cost indicates less expected throughput.

760

/// From Agner Fog's guides, reciprocal throughput is "the average number of

761

/// clock cycles per instruction when the instructions are not part of a

762

/// limiting dependency chain."

763

/// Therefore, costs should be scaled to account for multiple execution units

764

/// on the target that can process this type of instruction. For example, if

765

/// there are 5 scalar integer units and 2 vector integer units that can

766

/// calculate an 'add' in a single cycle, this model should indicate that the

767

/// cost of the vector add instruction is 2.5 times the cost of the scalar

768

/// add instruction.

769

/// \p Args is an optional argument which holds the instruction operands

770

/// values so the TTI can analyze those values searching for special

771

/// cases or optimizations based on those values.

772

int getArithmeticInstrCost(

773

unsigned Opcode, Type *Ty, OperandValueKind Opd1Info = OK_AnyValue,

774

OperandValueKind Opd2Info = OK_AnyValue,

775

OperandValueProperties Opd1PropInfo = OP_None,

776

OperandValueProperties Opd2PropInfo = OP_None,

777

ArrayRef<const Value *> Args = ArrayRef<const Value *>()) const;

778

779

/// \return The cost of a shuffle instruction of kind Kind and of type Tp.

780

/// The index and subtype parameters are used by the subvector insertion and

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

781

/// extraction shuffle kinds to show the insert/extract point and the type of

782

/// the subvector being inserted/extracted.

783

/// NOTE: For subvector extractions Tp represents the source type.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

784

int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index = 0,

785

Type *SubTp = nullptr) const;

786

787

/// \return The expected cost of cast instructions, such as bitcast, trunc,

788

/// zext, etc. If there is an existing instruction that holds Opcode, it

789

/// may be passed in the 'I' parameter.

790

int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,

791

const Instruction *I = nullptr) const;

792

793

/// \return The expected cost of a sign- or zero-extended vector extract. Use

794

/// -1 to indicate that there is no information about the index value.

795

int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,

796

unsigned Index = -1) const;

797

798

/// \return The expected cost of control-flow related instructions such as

799

/// Phi, Ret, Br.

800

int getCFInstrCost(unsigned Opcode) const;

801

802

/// \returns The expected cost of compare and select instructions. If there

803

/// is an existing instruction that holds Opcode, it may be passed in the

804

/// 'I' parameter.

805

int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,

806

Type *CondTy = nullptr, const Instruction *I = nullptr) const;

807

808

/// \return The expected cost of vector Insert and Extract.

809

/// Use -1 to indicate that there is no information on the index value.

810

int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index = -1) const;

811

812

/// \return The cost of Load and Store instructions.

813

int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,

814

unsigned AddressSpace, const Instruction *I = nullptr) const;

815

816

/// \return The cost of masked Load and Store instructions.

817

int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,

818

unsigned AddressSpace) const;

819

820

/// \return The cost of Gather or Scatter operation

821

/// \p Opcode - is a type of memory access Load or Store

822

/// \p DataTy - a vector type of the data to be loaded or stored

823

/// \p Ptr - pointer [or vector of pointers] - address[es] in memory

824

/// \p VariableMask - true when the memory access is predicated with a mask

825

/// that is not a compile-time constant

826

/// \p Alignment - alignment of single element

827

int getGatherScatterOpCost(unsigned Opcode, Type *DataTy, Value *Ptr,

828

bool VariableMask, unsigned Alignment) const;

829

830

/// \return The cost of the interleaved memory operation.

831

/// \p Opcode is the memory operation code

832

/// \p VecTy is the vector type of the interleaved access.

833

/// \p Factor is the interleave factor

834

/// \p Indices is the indices for interleaved load members (as interleaved

835

/// load allows gaps)

836

/// \p Alignment is the alignment of the memory operation

837

/// \p AddressSpace is address space of the pointer.

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

838

/// \p UseMaskForCond indicates if the memory access is predicated.

839

/// \p UseMaskForGaps indicates if gaps should be masked.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

840

int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,

841

ArrayRef<unsigned> Indices, unsigned Alignment,

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

842

unsigned AddressSpace,

843

bool UseMaskForCond = false,

844

bool UseMaskForGaps = false) const;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

845

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

846

/// Calculate the cost of performing a vector reduction.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

847

///

848

/// This is the cost of reducing the vector value of type \p Ty to a scalar

849

/// value using the operation denoted by \p Opcode. The form of the reduction

850

/// can either be a pairwise reduction or a reduction that splits the vector

851

/// at every reduction level.

///

/// Pairwise:

/// (v0, v1, v2, v3)

/// ((v0+v1), (v2+v3), undef, undef)

856

/// Split:

857

/// (v0, v1, v2, v3)

858

/// ((v0+v2), (v1+v3), undef, undef)

859

int getArithmeticReductionCost(unsigned Opcode, Type *Ty,

860

bool IsPairwiseForm) const;

861

int getMinMaxReductionCost(Type *Ty, Type *CondTy, bool IsPairwiseForm,

862

bool IsUnsigned) const;

863

864

/// \returns The cost of Intrinsic instructions. Analyses the real arguments.

865

/// Three cases are handled: 1. scalar instruction 2. vector instruction

866

/// 3. scalar instruction which is to be vectorized with VF.

867

int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,

868

ArrayRef<Value *> Args, FastMathFlags FMF,

869

unsigned VF = 1) const;

870

871

/// \returns The cost of Intrinsic instructions. Types analysis only.

872

/// If ScalarizationCostPassed is UINT_MAX, the cost of scalarizing the

873

/// arguments and the return value will be computed based on types.

874

int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,

875

ArrayRef<Type *> Tys, FastMathFlags FMF,

876

unsigned ScalarizationCostPassed = UINT_MAX) const;

877

878

/// \returns The cost of Call instructions.

879

int getCallInstrCost(Function *F, Type *RetTy, ArrayRef<Type *> Tys) const;

880

881

/// \returns The number of pieces into which the provided type must be

882

/// split during legalization. Zero is returned when the answer is unknown.

883

unsigned getNumberOfParts(Type *Tp) const;

884

885

/// \returns The cost of the address computation. For most targets this can be

886

/// merged into the instruction indexing mode. Some targets might want to

887

/// distinguish between address computation for memory operations on vector

888

/// types and scalar types. Such targets should override this function.

889

/// The 'SE' parameter holds pointer for the scalar evolution object which

890

/// is used in order to get the Ptr step value in case of constant stride.

891

/// The 'Ptr' parameter holds SCEV of the access pointer.

892

int getAddressComputationCost(Type *Ty, ScalarEvolution *SE = nullptr,

893

const SCEV *Ptr = nullptr) const;

894

895

/// \returns The cost, if any, of keeping values of the given types alive

896

/// over a callsite.

897

///

898

/// Some types may require the use of register classes that do not have

899

/// any callee-saved registers, so would require a spill and fill.

900

unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) const;

901

902

/// \returns True if the intrinsic is a supported memory intrinsic. Info

903

/// will contain additional information - whether the intrinsic may write

904

/// or read to memory, volatility and the pointer. Info is undefined

905

/// if false is returned.

906

bool getTgtMemIntrinsic(IntrinsicInst *Inst, MemIntrinsicInfo &Info) const;

907

908

/// \returns The maximum element size, in bytes, for an element

909

/// unordered-atomic memory intrinsic.

910

unsigned getAtomicMemIntrinsicMaxElementSize() const;

911

912

/// \returns A value which is the result of the given memory intrinsic. New

913

/// instructions may be created to extract the result from the given intrinsic

914

/// memory operation. Returns nullptr if the target cannot create a result

915

/// from the given intrinsic.

916

Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,

917

Type *ExpectedType) const;

918

919

/// \returns The type to use in a loop expansion of a memcpy call.

920

Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,

921

unsigned SrcAlign, unsigned DestAlign) const;

922

923

/// \param[out] OpsOut The operand types to copy RemainingBytes of memory.

924

/// \param RemainingBytes The number of bytes to copy.

925

///

926

/// Calculates the operand types to use when copying \p RemainingBytes of

927

/// memory, where source and destination alignments are \p SrcAlign and

928

/// \p DestAlign respectively.

929

void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,

930

LLVMContext &Context,

931

unsigned RemainingBytes,

932

unsigned SrcAlign,

933

unsigned DestAlign) const;

934

935

/// \returns True if the two functions have compatible attributes for inlining

936

/// purposes.

937

bool areInlineCompatible(const Function *Caller,

938

const Function *Callee) const;

939

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

940

/// \returns True if the caller and callee agree on how \p Args will be passed

941

/// to the callee.

942

/// \param[out] Args The list of compatible arguments. The implementation may

943

/// filter out any incompatible args from this list.

944

bool areFunctionArgsABICompatible(const Function *Caller,

945

const Function *Callee,

946

SmallPtrSetImpl<Argument *> &Args) const;

947

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

948

/// The type of load/store indexing.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

949

enum MemIndexedMode {

950

MIM_Unindexed, ///< No indexing.

951

MIM_PreInc, ///< Pre-incrementing.

952

MIM_PreDec, ///< Pre-decrementing.

953

MIM_PostInc, ///< Post-incrementing.

954

MIM_PostDec ///< Post-decrementing.

955

};

956

957

/// \returns True if the specified indexed load for the given type is legal.

958

bool isIndexedLoadLegal(enum MemIndexedMode Mode, Type *Ty) const;

959

960

/// \returns True if the specified indexed store for the given type is legal.

961

bool isIndexedStoreLegal(enum MemIndexedMode Mode, Type *Ty) const;

962

963

/// \returns The bitwidth of the largest vector type that should be used to

964

/// load/store in the given address space.

965

unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const;

966

967

/// \returns True if the load instruction is legal to vectorize.

968

bool isLegalToVectorizeLoad(LoadInst *LI) const;

969

970

/// \returns True if the store instruction is legal to vectorize.

971

bool isLegalToVectorizeStore(StoreInst *SI) const;

972

973

/// \returns True if it is legal to vectorize the given load chain.

974

bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,

975

unsigned Alignment,

976

unsigned AddrSpace) const;

977

978

/// \returns True if it is legal to vectorize the given store chain.

979

bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,

980

unsigned Alignment,

981

unsigned AddrSpace) const;

982

983

/// \returns The new vector factor value if the target doesn't support \p

984

/// SizeInBytes loads or has a better vector factor.

985

unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,

986

unsigned ChainSizeInBytes,

987

VectorType *VecTy) const;

988

989

/// \returns The new vector factor value if the target doesn't support \p

990

/// SizeInBytes stores or has a better vector factor.

991

unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,

992

unsigned ChainSizeInBytes,

993

VectorType *VecTy) const;

994

995

/// Flags describing the kind of vector reduction.

996

struct ReductionFlags {

997

ReductionFlags() : IsMaxOp(false), IsSigned(false), NoNaN(false) {}

998

bool IsMaxOp; ///< If the op a min/max kind, true if it's a max operation.

999

bool IsSigned; ///< Whether the operation is a signed int reduction.

1000

bool NoNaN; ///< If op is an fp min/max, whether NaNs may be present.

1001

};

1002

1003

/// \returns True if the target wants to handle the given reduction idiom in

1004

/// the intrinsics form instead of the shuffle form.

1005

bool useReductionIntrinsic(unsigned Opcode, Type *Ty,

1006

ReductionFlags Flags) const;

1007

1008

/// \returns True if the target wants to expand the given reduction intrinsic

1009

/// into a shuffle sequence.

1010

bool shouldExpandReduction(const IntrinsicInst *II) const;

1011

/// @}

1012

1013

private:

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1014

/// Estimate the latency of specified instruction.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1015

/// Returns 1 as the default value.

1016

int getInstructionLatency(const Instruction *I) const;

1017

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1018

/// Returns the expected throughput cost of the instruction.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1019

/// Returns -1 if the cost is unknown.

1020

int getInstructionThroughput(const Instruction *I) const;

1021

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1022

/// The abstract base class used to type erase specific TTI

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

/// implementations.

class Concept;

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1026

/// The template model for the base class which wraps a concrete

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1027

/// implementation in a type erased interface.

1028

template <typename T> class Model;

1029

1030

std::unique_ptr<Concept> TTIImpl;

1031

};

1032

1033

class TargetTransformInfo::Concept {

1034

public:

1035

virtual ~Concept() = 0;

1036

virtual const DataLayout &getDataLayout() const = 0;

1037

virtual int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) = 0;

1038

virtual int getGEPCost(Type *PointeeType, const Value *Ptr,

1039

ArrayRef<const Value *> Operands) = 0;

1040

virtual int getExtCost(const Instruction *I, const Value *Src) = 0;

1041

virtual int getCallCost(FunctionType *FTy, int NumArgs) = 0;

1042

virtual int getCallCost(const Function *F, int NumArgs) = 0;

1043

virtual int getCallCost(const Function *F,

1044

ArrayRef<const Value *> Arguments) = 0;

1045

virtual unsigned getInliningThresholdMultiplier() = 0;

1046

virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,

1047

ArrayRef<Type *> ParamTys) = 0;

1048

virtual int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,

1049

ArrayRef<const Value *> Arguments) = 0;

1050

virtual unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,

1051

unsigned &JTSize) = 0;

1052

virtual int

1053

getUserCost(const User *U, ArrayRef<const Value *> Operands) = 0;

1054

virtual bool hasBranchDivergence() = 0;

1055

virtual bool isSourceOfDivergence(const Value *V) = 0;

1056

virtual bool isAlwaysUniform(const Value *V) = 0;

1057

virtual unsigned getFlatAddressSpace() = 0;

1058

virtual bool isLoweredToCall(const Function *F) = 0;

1059

virtual void getUnrollingPreferences(Loop *L, ScalarEvolution &,

1060

UnrollingPreferences &UP) = 0;

1061

virtual bool isLegalAddImmediate(int64_t Imm) = 0;

1062

virtual bool isLegalICmpImmediate(int64_t Imm) = 0;

1063

virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV,

1064

int64_t BaseOffset, bool HasBaseReg,

int64_t Scale,

unsigned AddrSpace,

Instruction *I) = 0;

virtual bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,

1069

TargetTransformInfo::LSRCost &C2) = 0;

1070

virtual bool canMacroFuseCmp() = 0;

1071

virtual bool shouldFavorPostInc() const = 0;

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1072

virtual bool shouldFavorBackedgeIndex(const Loop *L) const = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1073

virtual bool isLegalMaskedStore(Type *DataType) = 0;

1074

virtual bool isLegalMaskedLoad(Type *DataType) = 0;

1075

virtual bool isLegalMaskedScatter(Type *DataType) = 0;

1076

virtual bool isLegalMaskedGather(Type *DataType) = 0;

1077

virtual bool hasDivRemOp(Type *DataType, bool IsSigned) = 0;

1078

virtual bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) = 0;

1079

virtual bool prefersVectorizedAddressing() = 0;

1080

virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV,

1081

int64_t BaseOffset, bool HasBaseReg,

1082

int64_t Scale, unsigned AddrSpace) = 0;

1083

virtual bool LSRWithInstrQueries() = 0;

1084

virtual bool isTruncateFree(Type *Ty1, Type *Ty2) = 0;

1085

virtual bool isProfitableToHoist(Instruction *I) = 0;

1086

virtual bool useAA() = 0;

1087

virtual bool isTypeLegal(Type *Ty) = 0;

1088

virtual unsigned getJumpBufAlignment() = 0;

1089

virtual unsigned getJumpBufSize() = 0;

1090

virtual bool shouldBuildLookupTables() = 0;

1091

virtual bool shouldBuildLookupTablesForConstant(Constant *C) = 0;

1092

virtual bool useColdCCForColdCall(Function &F) = 0;

1093

virtual unsigned

1094

getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) = 0;

1095

virtual unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,

1096

unsigned VF) = 0;

1097

virtual bool supportsEfficientVectorElementLoadStore() = 0;

1098

virtual bool enableAggressiveInterleaving(bool LoopHasReductions) = 0;

1099

virtual const MemCmpExpansionOptions *enableMemCmpExpansion(

1100

bool IsZeroCmp) const = 0;

1101

virtual bool enableInterleavedAccessVectorization() = 0;

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1102

virtual bool enableMaskedInterleavedAccessVectorization() = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1103

virtual bool isFPVectorizationPotentiallyUnsafe() = 0;

1104

virtual bool allowsMisalignedMemoryAccesses(LLVMContext &Context,

1105

unsigned BitWidth,

1106

unsigned AddressSpace,

1107

unsigned Alignment,

1108

bool *Fast) = 0;

1109

virtual PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) = 0;

1110

virtual bool haveFastSqrt(Type *Ty) = 0;

1111

virtual bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) = 0;

1112

virtual int getFPOpCost(Type *Ty) = 0;

1113

virtual int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,

1114

Type *Ty) = 0;

1115

virtual int getIntImmCost(const APInt &Imm, Type *Ty) = 0;

1116

virtual int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,

1117

Type *Ty) = 0;

1118

virtual int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,

1119

Type *Ty) = 0;

1120

virtual unsigned getNumberOfRegisters(bool Vector) = 0;

1121

virtual unsigned getRegisterBitWidth(bool Vector) const = 0;

1122

virtual unsigned getMinVectorRegisterBitWidth() = 0;

1123

virtual bool shouldMaximizeVectorBandwidth(bool OptSize) const = 0;

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1124

virtual unsigned getMinimumVF(unsigned ElemWidth) const = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1125

virtual bool shouldConsiderAddressTypePromotion(

1126

const Instruction &I, bool &AllowPromotionWithoutCommonHeader) = 0;

1127

virtual unsigned getCacheLineSize() = 0;

1128

virtual llvm::Optional<unsigned> getCacheSize(CacheLevel Level) = 0;

1129

virtual llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) = 0;

1130

virtual unsigned getPrefetchDistance() = 0;

1131

virtual unsigned getMinPrefetchStride() = 0;

1132

virtual unsigned getMaxPrefetchIterationsAhead() = 0;

1133

virtual unsigned getMaxInterleaveFactor(unsigned VF) = 0;

1134

virtual unsigned

1135

getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,

1136

OperandValueKind Opd2Info,

1137

OperandValueProperties Opd1PropInfo,

1138

OperandValueProperties Opd2PropInfo,

1139

ArrayRef<const Value *> Args) = 0;

1140

virtual int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,

1141

Type *SubTp) = 0;

1142

virtual int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,

1143

const Instruction *I) = 0;

1144

virtual int getExtractWithExtendCost(unsigned Opcode, Type *Dst,

1145

VectorType *VecTy, unsigned Index) = 0;

1146

virtual int getCFInstrCost(unsigned Opcode) = 0;

1147

virtual int getCmpSelInstrCost(unsigned Opcode, Type *ValTy,

1148

Type *CondTy, const Instruction *I) = 0;

1149

virtual int getVectorInstrCost(unsigned Opcode, Type *Val,

1150

unsigned Index) = 0;

1151

virtual int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,

1152

unsigned AddressSpace, const Instruction *I) = 0;

1153

virtual int getMaskedMemoryOpCost(unsigned Opcode, Type *Src,

1154

unsigned Alignment,

1155

unsigned AddressSpace) = 0;

1156

virtual int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,

1157

Value *Ptr, bool VariableMask,

1158

unsigned Alignment) = 0;

1159

virtual int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy,

1160

unsigned Factor,

1161

ArrayRef<unsigned> Indices,

1162

unsigned Alignment,

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1163

unsigned AddressSpace,

1164

bool UseMaskForCond = false,

1165

bool UseMaskForGaps = false) = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1166

virtual int getArithmeticReductionCost(unsigned Opcode, Type *Ty,

1167

bool IsPairwiseForm) = 0;

1168

virtual int getMinMaxReductionCost(Type *Ty, Type *CondTy,

1169

bool IsPairwiseForm, bool IsUnsigned) = 0;

1170

virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,

1171

ArrayRef<Type *> Tys, FastMathFlags FMF,

1172

unsigned ScalarizationCostPassed) = 0;

1173

virtual int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,

1174

ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) = 0;

1175

virtual int getCallInstrCost(Function *F, Type *RetTy,

1176

ArrayRef<Type *> Tys) = 0;

1177

virtual unsigned getNumberOfParts(Type *Tp) = 0;

1178

virtual int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,

1179

const SCEV *Ptr) = 0;

1180

virtual unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) = 0;

1181

virtual bool getTgtMemIntrinsic(IntrinsicInst *Inst,

1182

MemIntrinsicInfo &Info) = 0;

1183

virtual unsigned getAtomicMemIntrinsicMaxElementSize() const = 0;

1184

virtual Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,

1185

Type *ExpectedType) = 0;

1186

virtual Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,

1187

unsigned SrcAlign,

1188

unsigned DestAlign) const = 0;

1189

virtual void getMemcpyLoopResidualLoweringType(

1190

SmallVectorImpl<Type *> &OpsOut, LLVMContext &Context,

1191

unsigned RemainingBytes, unsigned SrcAlign, unsigned DestAlign) const = 0;

1192

virtual bool areInlineCompatible(const Function *Caller,

1193

const Function *Callee) const = 0;

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1194

virtual bool

1195

areFunctionArgsABICompatible(const Function *Caller, const Function *Callee,

1196

SmallPtrSetImpl<Argument *> &Args) const = 0;

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1197

virtual bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const = 0;

1198

virtual bool isIndexedStoreLegal(MemIndexedMode Mode,Type *Ty) const = 0;

1199

virtual unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const = 0;

1200

virtual bool isLegalToVectorizeLoad(LoadInst *LI) const = 0;

1201

virtual bool isLegalToVectorizeStore(StoreInst *SI) const = 0;

1202

virtual bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,

1203

unsigned Alignment,

1204

unsigned AddrSpace) const = 0;

1205

virtual bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,

1206

unsigned Alignment,

1207

unsigned AddrSpace) const = 0;

1208

virtual unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,

1209

unsigned ChainSizeInBytes,

1210

VectorType *VecTy) const = 0;

1211

virtual unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,

1212

unsigned ChainSizeInBytes,

1213

VectorType *VecTy) const = 0;

1214

virtual bool useReductionIntrinsic(unsigned Opcode, Type *Ty,

1215

ReductionFlags) const = 0;

1216

virtual bool shouldExpandReduction(const IntrinsicInst *II) const = 0;

1217

virtual int getInstructionLatency(const Instruction *I) = 0;

1218

};

1219

1220

template <typename T>

1221

class TargetTransformInfo::Model final : public TargetTransformInfo::Concept {

T Impl;

public:

Model(T Impl) : Impl(std::move(Impl)) {}

1226

~Model() override {}

1227

1228

const DataLayout &getDataLayout() const override {

1229

return Impl.getDataLayout();

1230

}

1231

1232

int getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) override {

1233

return Impl.getOperationCost(Opcode, Ty, OpTy);

1234

}

1235

int getGEPCost(Type *PointeeType, const Value *Ptr,

1236

ArrayRef<const Value *> Operands) override {

1237

return Impl.getGEPCost(PointeeType, Ptr, Operands);

1238

}

1239

int getExtCost(const Instruction *I, const Value *Src) override {

1240

return Impl.getExtCost(I, Src);

1241

}

1242

int getCallCost(FunctionType *FTy, int NumArgs) override {

1243

return Impl.getCallCost(FTy, NumArgs);

1244

}

1245

int getCallCost(const Function *F, int NumArgs) override {

1246

return Impl.getCallCost(F, NumArgs);

1247

}

1248

int getCallCost(const Function *F,

1249

ArrayRef<const Value *> Arguments) override {

1250

return Impl.getCallCost(F, Arguments);

1251

}

1252

unsigned getInliningThresholdMultiplier() override {

1253

return Impl.getInliningThresholdMultiplier();

1254

}

1255

int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,

1256

ArrayRef<Type *> ParamTys) override {

1257

return Impl.getIntrinsicCost(IID, RetTy, ParamTys);

1258

}

1259

int getIntrinsicCost(Intrinsic::ID IID, Type *RetTy,

1260

ArrayRef<const Value *> Arguments) override {

1261

return Impl.getIntrinsicCost(IID, RetTy, Arguments);

1262

}

1263

int getUserCost(const User *U, ArrayRef<const Value *> Operands) override {

1264

return Impl.getUserCost(U, Operands);

1265

}

1266

bool hasBranchDivergence() override { return Impl.hasBranchDivergence(); }

1267

bool isSourceOfDivergence(const Value *V) override {

1268

return Impl.isSourceOfDivergence(V);

1269

}

1270

1271

bool isAlwaysUniform(const Value *V) override {

1272

return Impl.isAlwaysUniform(V);

1273

}

1274

1275

unsigned getFlatAddressSpace() override {

1276

return Impl.getFlatAddressSpace();

1277

}

1278

1279

bool isLoweredToCall(const Function *F) override {

1280

return Impl.isLoweredToCall(F);

1281

}

1282

void getUnrollingPreferences(Loop *L, ScalarEvolution &SE,

1283

UnrollingPreferences &UP) override {

1284

return Impl.getUnrollingPreferences(L, SE, UP);

1285

}

1286

bool isLegalAddImmediate(int64_t Imm) override {

1287

return Impl.isLegalAddImmediate(Imm);

1288

}

1289

bool isLegalICmpImmediate(int64_t Imm) override {

1290

return Impl.isLegalICmpImmediate(Imm);

1291

}

1292

bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,

1293

bool HasBaseReg, int64_t Scale,

1294

unsigned AddrSpace,

1295

Instruction *I) override {

1296

return Impl.isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg,

1297

Scale, AddrSpace, I);

1298

}

1299

bool isLSRCostLess(TargetTransformInfo::LSRCost &C1,

1300

TargetTransformInfo::LSRCost &C2) override {

1301

return Impl.isLSRCostLess(C1, C2);

1302

}

1303

bool canMacroFuseCmp() override {

1304

return Impl.canMacroFuseCmp();

1305

}

1306

bool shouldFavorPostInc() const override {

1307

return Impl.shouldFavorPostInc();

1308

}

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1309

bool shouldFavorBackedgeIndex(const Loop *L) const override {

1310

return Impl.shouldFavorBackedgeIndex(L);

1311

}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1312

bool isLegalMaskedStore(Type *DataType) override {

1313

return Impl.isLegalMaskedStore(DataType);

1314

}

1315

bool isLegalMaskedLoad(Type *DataType) override {

1316

return Impl.isLegalMaskedLoad(DataType);

1317

}

1318

bool isLegalMaskedScatter(Type *DataType) override {

1319

return Impl.isLegalMaskedScatter(DataType);

1320

}

1321

bool isLegalMaskedGather(Type *DataType) override {

1322

return Impl.isLegalMaskedGather(DataType);

1323

}

1324

bool hasDivRemOp(Type *DataType, bool IsSigned) override {

1325

return Impl.hasDivRemOp(DataType, IsSigned);

1326

}

1327

bool hasVolatileVariant(Instruction *I, unsigned AddrSpace) override {

1328

return Impl.hasVolatileVariant(I, AddrSpace);

1329

}

1330

bool prefersVectorizedAddressing() override {

1331

return Impl.prefersVectorizedAddressing();

1332

}

1333

int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset,

1334

bool HasBaseReg, int64_t Scale,

1335

unsigned AddrSpace) override {

1336

return Impl.getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg,

1337

Scale, AddrSpace);

1338

}

1339

bool LSRWithInstrQueries() override {

1340

return Impl.LSRWithInstrQueries();

1341

}

1342

bool isTruncateFree(Type *Ty1, Type *Ty2) override {

1343

return Impl.isTruncateFree(Ty1, Ty2);

1344

}

1345

bool isProfitableToHoist(Instruction *I) override {

1346

return Impl.isProfitableToHoist(I);

1347

}

1348

bool useAA() override { return Impl.useAA(); }

1349

bool isTypeLegal(Type *Ty) override { return Impl.isTypeLegal(Ty); }

1350

unsigned getJumpBufAlignment() override { return Impl.getJumpBufAlignment(); }

1351

unsigned getJumpBufSize() override { return Impl.getJumpBufSize(); }

1352

bool shouldBuildLookupTables() override {

1353

return Impl.shouldBuildLookupTables();

1354

}

1355

bool shouldBuildLookupTablesForConstant(Constant *C) override {

1356

return Impl.shouldBuildLookupTablesForConstant(C);

1357

}

1358

bool useColdCCForColdCall(Function &F) override {

1359

return Impl.useColdCCForColdCall(F);

1360

}

1361

1362

unsigned getScalarizationOverhead(Type *Ty, bool Insert,

1363

bool Extract) override {

1364

return Impl.getScalarizationOverhead(Ty, Insert, Extract);

1365

}

1366

unsigned getOperandsScalarizationOverhead(ArrayRef<const Value *> Args,

1367

unsigned VF) override {

1368

return Impl.getOperandsScalarizationOverhead(Args, VF);

1369

}

1370

1371

bool supportsEfficientVectorElementLoadStore() override {

1372

return Impl.supportsEfficientVectorElementLoadStore();

1373

}

1374

1375

bool enableAggressiveInterleaving(bool LoopHasReductions) override {

1376

return Impl.enableAggressiveInterleaving(LoopHasReductions);

1377

}

1378

const MemCmpExpansionOptions *enableMemCmpExpansion(

1379

bool IsZeroCmp) const override {

1380

return Impl.enableMemCmpExpansion(IsZeroCmp);

1381

}

1382

bool enableInterleavedAccessVectorization() override {

1383

return Impl.enableInterleavedAccessVectorization();

1384

}

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1385

bool enableMaskedInterleavedAccessVectorization() override {

1386

return Impl.enableMaskedInterleavedAccessVectorization();

1387

}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1388

bool isFPVectorizationPotentiallyUnsafe() override {

1389

return Impl.isFPVectorizationPotentiallyUnsafe();

1390

}

1391

bool allowsMisalignedMemoryAccesses(LLVMContext &Context,

1392

unsigned BitWidth, unsigned AddressSpace,

1393

unsigned Alignment, bool *Fast) override {

1394

return Impl.allowsMisalignedMemoryAccesses(Context, BitWidth, AddressSpace,

1395

Alignment, Fast);

1396

}

1397

PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) override {

1398

return Impl.getPopcntSupport(IntTyWidthInBit);

1399

}

1400

bool haveFastSqrt(Type *Ty) override { return Impl.haveFastSqrt(Ty); }

1401

1402

bool isFCmpOrdCheaperThanFCmpZero(Type *Ty) override {

1403

return Impl.isFCmpOrdCheaperThanFCmpZero(Ty);

1404

}

1405

1406

int getFPOpCost(Type *Ty) override { return Impl.getFPOpCost(Ty); }

1407

1408

int getIntImmCodeSizeCost(unsigned Opc, unsigned Idx, const APInt &Imm,

1409

Type *Ty) override {

1410

return Impl.getIntImmCodeSizeCost(Opc, Idx, Imm, Ty);

1411

}

1412

int getIntImmCost(const APInt &Imm, Type *Ty) override {

1413

return Impl.getIntImmCost(Imm, Ty);

1414

}

1415

int getIntImmCost(unsigned Opc, unsigned Idx, const APInt &Imm,

1416

Type *Ty) override {

1417

return Impl.getIntImmCost(Opc, Idx, Imm, Ty);

1418

}

1419

int getIntImmCost(Intrinsic::ID IID, unsigned Idx, const APInt &Imm,

1420

Type *Ty) override {

1421

return Impl.getIntImmCost(IID, Idx, Imm, Ty);

1422

}

1423

unsigned getNumberOfRegisters(bool Vector) override {

1424

return Impl.getNumberOfRegisters(Vector);

1425

}

1426

unsigned getRegisterBitWidth(bool Vector) const override {

1427

return Impl.getRegisterBitWidth(Vector);

1428

}

1429

unsigned getMinVectorRegisterBitWidth() override {

1430

return Impl.getMinVectorRegisterBitWidth();

1431

}

1432

bool shouldMaximizeVectorBandwidth(bool OptSize) const override {

1433

return Impl.shouldMaximizeVectorBandwidth(OptSize);

1434

}

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1435

unsigned getMinimumVF(unsigned ElemWidth) const override {

1436

return Impl.getMinimumVF(ElemWidth);

1437

}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1438

bool shouldConsiderAddressTypePromotion(

1439

const Instruction &I, bool &AllowPromotionWithoutCommonHeader) override {

1440

return Impl.shouldConsiderAddressTypePromotion(

1441

I, AllowPromotionWithoutCommonHeader);

1442

}

1443

unsigned getCacheLineSize() override {

1444

return Impl.getCacheLineSize();

1445

}

1446

llvm::Optional<unsigned> getCacheSize(CacheLevel Level) override {

1447

return Impl.getCacheSize(Level);

1448

}

1449

llvm::Optional<unsigned> getCacheAssociativity(CacheLevel Level) override {

1450

return Impl.getCacheAssociativity(Level);

1451

}

1452

unsigned getPrefetchDistance() override { return Impl.getPrefetchDistance(); }

1453

unsigned getMinPrefetchStride() override {

1454

return Impl.getMinPrefetchStride();

1455

}

1456

unsigned getMaxPrefetchIterationsAhead() override {

1457

return Impl.getMaxPrefetchIterationsAhead();

1458

}

1459

unsigned getMaxInterleaveFactor(unsigned VF) override {

1460

return Impl.getMaxInterleaveFactor(VF);

1461

}

1462

unsigned getEstimatedNumberOfCaseClusters(const SwitchInst &SI,

1463

unsigned &JTSize) override {

1464

return Impl.getEstimatedNumberOfCaseClusters(SI, JTSize);

1465

}

1466

unsigned

1467

getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind Opd1Info,

1468

OperandValueKind Opd2Info,

1469

OperandValueProperties Opd1PropInfo,

1470

OperandValueProperties Opd2PropInfo,

1471

ArrayRef<const Value *> Args) override {

1472

return Impl.getArithmeticInstrCost(Opcode, Ty, Opd1Info, Opd2Info,

1473

Opd1PropInfo, Opd2PropInfo, Args);

1474

}

1475

int getShuffleCost(ShuffleKind Kind, Type *Tp, int Index,

1476

Type *SubTp) override {

1477

return Impl.getShuffleCost(Kind, Tp, Index, SubTp);

1478

}

1479

int getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src,

1480

const Instruction *I) override {

1481

return Impl.getCastInstrCost(Opcode, Dst, Src, I);

1482

}

1483

int getExtractWithExtendCost(unsigned Opcode, Type *Dst, VectorType *VecTy,

1484

unsigned Index) override {

1485

return Impl.getExtractWithExtendCost(Opcode, Dst, VecTy, Index);

1486

}

1487

int getCFInstrCost(unsigned Opcode) override {

1488

return Impl.getCFInstrCost(Opcode);

1489

}

1490

int getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy,

1491

const Instruction *I) override {

1492

return Impl.getCmpSelInstrCost(Opcode, ValTy, CondTy, I);

1493

}

1494

int getVectorInstrCost(unsigned Opcode, Type *Val, unsigned Index) override {

1495

return Impl.getVectorInstrCost(Opcode, Val, Index);

1496

}

1497

int getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,

1498

unsigned AddressSpace, const Instruction *I) override {

1499

return Impl.getMemoryOpCost(Opcode, Src, Alignment, AddressSpace, I);

1500

}

1501

int getMaskedMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment,

1502

unsigned AddressSpace) override {

1503

return Impl.getMaskedMemoryOpCost(Opcode, Src, Alignment, AddressSpace);

1504

}

1505

int getGatherScatterOpCost(unsigned Opcode, Type *DataTy,

1506

Value *Ptr, bool VariableMask,

1507

unsigned Alignment) override {

1508

return Impl.getGatherScatterOpCost(Opcode, DataTy, Ptr, VariableMask,

1509

Alignment);

1510

}

1511

int getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, unsigned Factor,

1512

ArrayRef<unsigned> Indices, unsigned Alignment,

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1513

unsigned AddressSpace, bool UseMaskForCond,

1514

bool UseMaskForGaps) override {

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1515

return Impl.getInterleavedMemoryOpCost(Opcode, VecTy, Factor, Indices,

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1516

Alignment, AddressSpace,

1517

UseMaskForCond, UseMaskForGaps);

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1518

}

1519

int getArithmeticReductionCost(unsigned Opcode, Type *Ty,

1520

bool IsPairwiseForm) override {

1521

return Impl.getArithmeticReductionCost(Opcode, Ty, IsPairwiseForm);

1522

}

1523

int getMinMaxReductionCost(Type *Ty, Type *CondTy,

1524

bool IsPairwiseForm, bool IsUnsigned) override {

1525

return Impl.getMinMaxReductionCost(Ty, CondTy, IsPairwiseForm, IsUnsigned);

1526

}

1527

int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy, ArrayRef<Type *> Tys,

1528

FastMathFlags FMF, unsigned ScalarizationCostPassed) override {

1529

return Impl.getIntrinsicInstrCost(ID, RetTy, Tys, FMF,

1530

ScalarizationCostPassed);

1531

}

1532

int getIntrinsicInstrCost(Intrinsic::ID ID, Type *RetTy,

1533

ArrayRef<Value *> Args, FastMathFlags FMF, unsigned VF) override {

1534

return Impl.getIntrinsicInstrCost(ID, RetTy, Args, FMF, VF);

1535

}

1536

int getCallInstrCost(Function *F, Type *RetTy,

1537

ArrayRef<Type *> Tys) override {

1538

return Impl.getCallInstrCost(F, RetTy, Tys);

1539

}

1540

unsigned getNumberOfParts(Type *Tp) override {

1541

return Impl.getNumberOfParts(Tp);

1542

}

1543

int getAddressComputationCost(Type *Ty, ScalarEvolution *SE,

1544

const SCEV *Ptr) override {

1545

return Impl.getAddressComputationCost(Ty, SE, Ptr);

1546

}

1547

unsigned getCostOfKeepingLiveOverCall(ArrayRef<Type *> Tys) override {

1548

return Impl.getCostOfKeepingLiveOverCall(Tys);

1549

}

1550

bool getTgtMemIntrinsic(IntrinsicInst *Inst,

1551

MemIntrinsicInfo &Info) override {

1552

return Impl.getTgtMemIntrinsic(Inst, Info);

1553

}

1554

unsigned getAtomicMemIntrinsicMaxElementSize() const override {

1555

return Impl.getAtomicMemIntrinsicMaxElementSize();

1556

}

1557

Value *getOrCreateResultFromMemIntrinsic(IntrinsicInst *Inst,

1558

Type *ExpectedType) override {

1559

return Impl.getOrCreateResultFromMemIntrinsic(Inst, ExpectedType);

1560

}

1561

Type *getMemcpyLoopLoweringType(LLVMContext &Context, Value *Length,

1562

unsigned SrcAlign,

1563

unsigned DestAlign) const override {

1564

return Impl.getMemcpyLoopLoweringType(Context, Length, SrcAlign, DestAlign);

1565

}

1566

void getMemcpyLoopResidualLoweringType(SmallVectorImpl<Type *> &OpsOut,

1567

LLVMContext &Context,

1568

unsigned RemainingBytes,

1569

unsigned SrcAlign,

1570

unsigned DestAlign) const override {

1571

Impl.getMemcpyLoopResidualLoweringType(OpsOut, Context, RemainingBytes,

1572

SrcAlign, DestAlign);

1573

}

1574

bool areInlineCompatible(const Function *Caller,

1575

const Function *Callee) const override {

1576

return Impl.areInlineCompatible(Caller, Callee);

1577

}

Andrew Walbran

2019-10-22 13:54:20 +0100

[diff] [blame]

1578

bool areFunctionArgsABICompatible(

1579

const Function *Caller, const Function *Callee,

1580

SmallPtrSetImpl<Argument *> &Args) const override {

1581

return Impl.areFunctionArgsABICompatible(Caller, Callee, Args);

1582

}

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1583

bool isIndexedLoadLegal(MemIndexedMode Mode, Type *Ty) const override {

1584

return Impl.isIndexedLoadLegal(Mode, Ty, getDataLayout());

1585

}

1586

bool isIndexedStoreLegal(MemIndexedMode Mode, Type *Ty) const override {

1587

return Impl.isIndexedStoreLegal(Mode, Ty, getDataLayout());

1588

}

1589

unsigned getLoadStoreVecRegBitWidth(unsigned AddrSpace) const override {

1590

return Impl.getLoadStoreVecRegBitWidth(AddrSpace);

1591

}

1592

bool isLegalToVectorizeLoad(LoadInst *LI) const override {

1593

return Impl.isLegalToVectorizeLoad(LI);

1594

}

1595

bool isLegalToVectorizeStore(StoreInst *SI) const override {

1596

return Impl.isLegalToVectorizeStore(SI);

1597

}

1598

bool isLegalToVectorizeLoadChain(unsigned ChainSizeInBytes,

1599

unsigned Alignment,

1600

unsigned AddrSpace) const override {

1601

return Impl.isLegalToVectorizeLoadChain(ChainSizeInBytes, Alignment,

1602

AddrSpace);

1603

}

1604

bool isLegalToVectorizeStoreChain(unsigned ChainSizeInBytes,

1605

unsigned Alignment,

1606

unsigned AddrSpace) const override {

1607

return Impl.isLegalToVectorizeStoreChain(ChainSizeInBytes, Alignment,

1608

AddrSpace);

1609

}

1610

unsigned getLoadVectorFactor(unsigned VF, unsigned LoadSize,

1611

unsigned ChainSizeInBytes,

1612

VectorType *VecTy) const override {

1613

return Impl.getLoadVectorFactor(VF, LoadSize, ChainSizeInBytes, VecTy);

1614

}

1615

unsigned getStoreVectorFactor(unsigned VF, unsigned StoreSize,

1616

unsigned ChainSizeInBytes,

1617

VectorType *VecTy) const override {

1618

return Impl.getStoreVectorFactor(VF, StoreSize, ChainSizeInBytes, VecTy);

1619

}

1620

bool useReductionIntrinsic(unsigned Opcode, Type *Ty,

1621

ReductionFlags Flags) const override {

1622

return Impl.useReductionIntrinsic(Opcode, Ty, Flags);

1623

}

1624

bool shouldExpandReduction(const IntrinsicInst *II) const override {

1625

return Impl.shouldExpandReduction(II);

1626

}

1627

int getInstructionLatency(const Instruction *I) override {

1628

return Impl.getInstructionLatency(I);

}

};

template <typename T>

1633

TargetTransformInfo::TargetTransformInfo(T Impl)

1634

: TTIImpl(new Model<T>(Impl)) {}

1635

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1636

/// Analysis pass providing the \c TargetTransformInfo.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1637

///

1638

/// The core idea of the TargetIRAnalysis is to expose an interface through

1639

/// which LLVM targets can analyze and provide information about the middle

1640

/// end's target-independent IR. This supports use cases such as target-aware

1641

/// cost modeling of IR constructs.

1642

///

1643

/// This is a function analysis because much of the cost modeling for targets

1644

/// is done in a subtarget specific way and LLVM supports compiling different

1645

/// functions targeting different subtargets in order to support runtime

1646

/// dispatch according to the observed subtarget.

1647

class TargetIRAnalysis : public AnalysisInfoMixin<TargetIRAnalysis> {

1648

public:

1649

typedef TargetTransformInfo Result;

1650

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1651

/// Default construct a target IR analysis.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1652

///

1653

/// This will use the module's datalayout to construct a baseline

1654

/// conservative TTI result.

1655

TargetIRAnalysis();

1656

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1657

/// Construct an IR analysis pass around a target-provide callback.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1658

///

1659

/// The callback will be called with a particular function for which the TTI

1660

/// is needed and must return a TTI object for that function.

1661

TargetIRAnalysis(std::function<Result(const Function &)> TTICallback);

1662

1663

// Value semantics. We spell out the constructors for MSVC.

1664

TargetIRAnalysis(const TargetIRAnalysis &Arg)

1665

: TTICallback(Arg.TTICallback) {}

1666

TargetIRAnalysis(TargetIRAnalysis &&Arg)

1667

: TTICallback(std::move(Arg.TTICallback)) {}

1668

TargetIRAnalysis &operator=(const TargetIRAnalysis &RHS) {

1669

TTICallback = RHS.TTICallback;

1670

return *this;

1671

}

1672

TargetIRAnalysis &operator=(TargetIRAnalysis &&RHS) {

1673

TTICallback = std::move(RHS.TTICallback);

return *this;

}

Result run(const Function &F, FunctionAnalysisManager &);

1678

1679

private:

1680

friend AnalysisInfoMixin<TargetIRAnalysis>;

1681

static AnalysisKey Key;

1682

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1683

/// The callback used to produce a result.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1684

///

1685

/// We use a completely opaque callback so that targets can provide whatever

1686

/// mechanism they desire for constructing the TTI for a given function.

1687

///

1688

/// FIXME: Should we really use std::function? It's relatively inefficient.

1689

/// It might be possible to arrange for even stateful callbacks to outlive

1690

/// the analysis and thus use a function_ref which would be lighter weight.

1691

/// This may also be less error prone as the callback is likely to reference

1692

/// the external TargetMachine, and that reference needs to never dangle.

1693

std::function<Result(const Function &)> TTICallback;

1694

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1695

/// Helper function used as the callback in the default constructor.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1696

static Result getDefaultTTI(const Function &F);

1697

};

1698

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1699

/// Wrapper pass for TargetTransformInfo.

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1700

///

1701

/// This pass can be constructed from a TTI object which it stores internally

1702

/// and is queried by passes.

1703

class TargetTransformInfoWrapperPass : public ImmutablePass {

1704

TargetIRAnalysis TIRA;

1705

Optional<TargetTransformInfo> TTI;

1706

1707

virtual void anchor();

public:

static char ID;

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1712

/// We must provide a default constructor for the pass but it should

Andrew Scull

2018-08-14 10:06:54 +0100

[diff] [blame]

1713

/// never be used.

1714

///

1715

/// Use the constructor below or call one of the creation routines.

1716

TargetTransformInfoWrapperPass();

1717

1718

explicit TargetTransformInfoWrapperPass(TargetIRAnalysis TIRA);

1719

1720

TargetTransformInfo &getTTI(const Function &F);

1721

};

1722

Andrew Scull

2018-10-05 20:58:37 +0100

[diff] [blame]

1723

/// Create an analysis pass wrapper around a TTI object.

Andrew Scull