Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1 | //===- llvm/CodeGen/TargetLowering.h - Target Lowering Info -----*- C++ -*-===// |
| 2 | // |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3 | // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| 4 | // See https://llvm.org/LICENSE.txt for license information. |
| 5 | // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 6 | // |
| 7 | //===----------------------------------------------------------------------===// |
| 8 | /// |
| 9 | /// \file |
| 10 | /// This file describes how to lower LLVM code to machine code. This has two |
| 11 | /// main components: |
| 12 | /// |
| 13 | /// 1. Which ValueTypes are natively supported by the target. |
| 14 | /// 2. Which operations are supported for supported ValueTypes. |
| 15 | /// 3. Cost thresholds for alternative implementations of certain operations. |
| 16 | /// |
| 17 | /// In addition it has a few other components, like information about FP |
| 18 | /// immediates. |
| 19 | /// |
| 20 | //===----------------------------------------------------------------------===// |
| 21 | |
| 22 | #ifndef LLVM_CODEGEN_TARGETLOWERING_H |
| 23 | #define LLVM_CODEGEN_TARGETLOWERING_H |
| 24 | |
| 25 | #include "llvm/ADT/APInt.h" |
| 26 | #include "llvm/ADT/ArrayRef.h" |
| 27 | #include "llvm/ADT/DenseMap.h" |
| 28 | #include "llvm/ADT/STLExtras.h" |
| 29 | #include "llvm/ADT/SmallVector.h" |
| 30 | #include "llvm/ADT/StringRef.h" |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 31 | #include "llvm/Analysis/LegacyDivergenceAnalysis.h" |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 32 | #include "llvm/CodeGen/DAGCombine.h" |
| 33 | #include "llvm/CodeGen/ISDOpcodes.h" |
| 34 | #include "llvm/CodeGen/RuntimeLibcalls.h" |
| 35 | #include "llvm/CodeGen/SelectionDAG.h" |
| 36 | #include "llvm/CodeGen/SelectionDAGNodes.h" |
| 37 | #include "llvm/CodeGen/TargetCallingConv.h" |
| 38 | #include "llvm/CodeGen/ValueTypes.h" |
| 39 | #include "llvm/IR/Attributes.h" |
| 40 | #include "llvm/IR/CallSite.h" |
| 41 | #include "llvm/IR/CallingConv.h" |
| 42 | #include "llvm/IR/DataLayout.h" |
| 43 | #include "llvm/IR/DerivedTypes.h" |
| 44 | #include "llvm/IR/Function.h" |
| 45 | #include "llvm/IR/IRBuilder.h" |
| 46 | #include "llvm/IR/InlineAsm.h" |
| 47 | #include "llvm/IR/Instruction.h" |
| 48 | #include "llvm/IR/Instructions.h" |
| 49 | #include "llvm/IR/Type.h" |
| 50 | #include "llvm/MC/MCRegisterInfo.h" |
| 51 | #include "llvm/Support/AtomicOrdering.h" |
| 52 | #include "llvm/Support/Casting.h" |
| 53 | #include "llvm/Support/ErrorHandling.h" |
| 54 | #include "llvm/Support/MachineValueType.h" |
| 55 | #include "llvm/Target/TargetMachine.h" |
| 56 | #include <algorithm> |
| 57 | #include <cassert> |
| 58 | #include <climits> |
| 59 | #include <cstdint> |
| 60 | #include <iterator> |
| 61 | #include <map> |
| 62 | #include <string> |
| 63 | #include <utility> |
| 64 | #include <vector> |
| 65 | |
| 66 | namespace llvm { |
| 67 | |
| 68 | class BranchProbability; |
| 69 | class CCState; |
| 70 | class CCValAssign; |
| 71 | class Constant; |
| 72 | class FastISel; |
| 73 | class FunctionLoweringInfo; |
| 74 | class GlobalValue; |
| 75 | class IntrinsicInst; |
| 76 | struct KnownBits; |
| 77 | class LLVMContext; |
| 78 | class MachineBasicBlock; |
| 79 | class MachineFunction; |
| 80 | class MachineInstr; |
| 81 | class MachineJumpTableInfo; |
| 82 | class MachineLoop; |
| 83 | class MachineRegisterInfo; |
| 84 | class MCContext; |
| 85 | class MCExpr; |
| 86 | class Module; |
| 87 | class TargetRegisterClass; |
| 88 | class TargetLibraryInfo; |
| 89 | class TargetRegisterInfo; |
| 90 | class Value; |
| 91 | |
| 92 | namespace Sched { |
| 93 | |
| 94 | enum Preference { |
| 95 | None, // No preference |
| 96 | Source, // Follow source order. |
| 97 | RegPressure, // Scheduling for lowest register pressure. |
| 98 | Hybrid, // Scheduling for both latency and register pressure. |
| 99 | ILP, // Scheduling for ILP in low register pressure mode. |
| 100 | VLIW // Scheduling for VLIW targets. |
| 101 | }; |
| 102 | |
| 103 | } // end namespace Sched |
| 104 | |
| 105 | /// This base class for TargetLowering contains the SelectionDAG-independent |
| 106 | /// parts that can be used from the rest of CodeGen. |
| 107 | class TargetLoweringBase { |
| 108 | public: |
| 109 | /// This enum indicates whether operations are valid for a target, and if not, |
| 110 | /// what action should be used to make them valid. |
| 111 | enum LegalizeAction : uint8_t { |
| 112 | Legal, // The target natively supports this operation. |
| 113 | Promote, // This operation should be executed in a larger type. |
| 114 | Expand, // Try to expand this to other ops, otherwise use a libcall. |
| 115 | LibCall, // Don't try to expand this to other ops, always use a libcall. |
| 116 | Custom // Use the LowerOperation hook to implement custom lowering. |
| 117 | }; |
| 118 | |
| 119 | /// This enum indicates whether a types are legal for a target, and if not, |
| 120 | /// what action should be used to make them valid. |
| 121 | enum LegalizeTypeAction : uint8_t { |
| 122 | TypeLegal, // The target natively supports this type. |
| 123 | TypePromoteInteger, // Replace this integer with a larger one. |
| 124 | TypeExpandInteger, // Split this integer into two of half the size. |
| 125 | TypeSoftenFloat, // Convert this float to a same size integer type, |
| 126 | // if an operation is not supported in target HW. |
| 127 | TypeExpandFloat, // Split this float into two of half the size. |
| 128 | TypeScalarizeVector, // Replace this one-element vector with its element. |
| 129 | TypeSplitVector, // Split this vector into two of half the size. |
| 130 | TypeWidenVector, // This vector should be widened into a larger vector. |
| 131 | TypePromoteFloat // Replace this float with a larger one. |
| 132 | }; |
| 133 | |
| 134 | /// LegalizeKind holds the legalization kind that needs to happen to EVT |
| 135 | /// in order to type-legalize it. |
| 136 | using LegalizeKind = std::pair<LegalizeTypeAction, EVT>; |
| 137 | |
| 138 | /// Enum that describes how the target represents true/false values. |
| 139 | enum BooleanContent { |
| 140 | UndefinedBooleanContent, // Only bit 0 counts, the rest can hold garbage. |
| 141 | ZeroOrOneBooleanContent, // All bits zero except for bit 0. |
| 142 | ZeroOrNegativeOneBooleanContent // All bits equal to bit 0. |
| 143 | }; |
| 144 | |
| 145 | /// Enum that describes what type of support for selects the target has. |
| 146 | enum SelectSupportKind { |
| 147 | ScalarValSelect, // The target supports scalar selects (ex: cmov). |
| 148 | ScalarCondVectorVal, // The target supports selects with a scalar condition |
| 149 | // and vector values (ex: cmov). |
| 150 | VectorMaskSelect // The target supports vector selects with a vector |
| 151 | // mask (ex: x86 blends). |
| 152 | }; |
| 153 | |
| 154 | /// Enum that specifies what an atomic load/AtomicRMWInst is expanded |
| 155 | /// to, if at all. Exists because different targets have different levels of |
| 156 | /// support for these atomic instructions, and also have different options |
| 157 | /// w.r.t. what they should expand to. |
| 158 | enum class AtomicExpansionKind { |
| 159 | None, // Don't expand the instruction. |
| 160 | LLSC, // Expand the instruction into loadlinked/storeconditional; used |
| 161 | // by ARM/AArch64. |
| 162 | LLOnly, // Expand the (load) instruction into just a load-linked, which has |
| 163 | // greater atomic guarantees than a normal load. |
| 164 | CmpXChg, // Expand the instruction into cmpxchg; used by at least X86. |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 165 | MaskedIntrinsic, // Use a target-specific intrinsic for the LL/SC loop. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 166 | }; |
| 167 | |
| 168 | /// Enum that specifies when a multiplication should be expanded. |
| 169 | enum class MulExpansionKind { |
| 170 | Always, // Always expand the instruction. |
| 171 | OnlyLegalOrCustom, // Only expand when the resulting instructions are legal |
| 172 | // or custom. |
| 173 | }; |
| 174 | |
| 175 | class ArgListEntry { |
| 176 | public: |
| 177 | Value *Val = nullptr; |
| 178 | SDValue Node = SDValue(); |
| 179 | Type *Ty = nullptr; |
| 180 | bool IsSExt : 1; |
| 181 | bool IsZExt : 1; |
| 182 | bool IsInReg : 1; |
| 183 | bool IsSRet : 1; |
| 184 | bool IsNest : 1; |
| 185 | bool IsByVal : 1; |
| 186 | bool IsInAlloca : 1; |
| 187 | bool IsReturned : 1; |
| 188 | bool IsSwiftSelf : 1; |
| 189 | bool IsSwiftError : 1; |
| 190 | uint16_t Alignment = 0; |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 191 | Type *ByValType = nullptr; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 192 | |
| 193 | ArgListEntry() |
| 194 | : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false), |
| 195 | IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false), |
| 196 | IsSwiftSelf(false), IsSwiftError(false) {} |
| 197 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 198 | void setAttributes(const CallBase *Call, unsigned ArgIdx); |
| 199 | |
| 200 | void setAttributes(ImmutableCallSite *CS, unsigned ArgIdx) { |
| 201 | return setAttributes(cast<CallBase>(CS->getInstruction()), ArgIdx); |
| 202 | } |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 203 | }; |
| 204 | using ArgListTy = std::vector<ArgListEntry>; |
| 205 | |
| 206 | virtual void markLibCallAttributes(MachineFunction *MF, unsigned CC, |
| 207 | ArgListTy &Args) const {}; |
| 208 | |
| 209 | static ISD::NodeType getExtendForContent(BooleanContent Content) { |
| 210 | switch (Content) { |
| 211 | case UndefinedBooleanContent: |
| 212 | // Extend by adding rubbish bits. |
| 213 | return ISD::ANY_EXTEND; |
| 214 | case ZeroOrOneBooleanContent: |
| 215 | // Extend by adding zero bits. |
| 216 | return ISD::ZERO_EXTEND; |
| 217 | case ZeroOrNegativeOneBooleanContent: |
| 218 | // Extend by copying the sign bit. |
| 219 | return ISD::SIGN_EXTEND; |
| 220 | } |
| 221 | llvm_unreachable("Invalid content kind"); |
| 222 | } |
| 223 | |
| 224 | /// NOTE: The TargetMachine owns TLOF. |
| 225 | explicit TargetLoweringBase(const TargetMachine &TM); |
| 226 | TargetLoweringBase(const TargetLoweringBase &) = delete; |
| 227 | TargetLoweringBase &operator=(const TargetLoweringBase &) = delete; |
| 228 | virtual ~TargetLoweringBase() = default; |
| 229 | |
| 230 | protected: |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 231 | /// Initialize all of the actions to default values. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 232 | void initActions(); |
| 233 | |
| 234 | public: |
| 235 | const TargetMachine &getTargetMachine() const { return TM; } |
| 236 | |
| 237 | virtual bool useSoftFloat() const { return false; } |
| 238 | |
| 239 | /// Return the pointer type for the given address space, defaults to |
| 240 | /// the pointer type from the data layout. |
| 241 | /// FIXME: The default needs to be removed once all the code is updated. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 242 | virtual MVT getPointerTy(const DataLayout &DL, uint32_t AS = 0) const { |
| 243 | return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); |
| 244 | } |
| 245 | |
| 246 | /// Return the in-memory pointer type for the given address space, defaults to |
| 247 | /// the pointer type from the data layout. FIXME: The default needs to be |
| 248 | /// removed once all the code is updated. |
| 249 | MVT getPointerMemTy(const DataLayout &DL, uint32_t AS = 0) const { |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 250 | return MVT::getIntegerVT(DL.getPointerSizeInBits(AS)); |
| 251 | } |
| 252 | |
| 253 | /// Return the type for frame index, which is determined by |
| 254 | /// the alloca address space specified through the data layout. |
| 255 | MVT getFrameIndexTy(const DataLayout &DL) const { |
| 256 | return getPointerTy(DL, DL.getAllocaAddrSpace()); |
| 257 | } |
| 258 | |
| 259 | /// Return the type for operands of fence. |
| 260 | /// TODO: Let fence operands be of i32 type and remove this. |
| 261 | virtual MVT getFenceOperandTy(const DataLayout &DL) const { |
| 262 | return getPointerTy(DL); |
| 263 | } |
| 264 | |
| 265 | /// EVT is not used in-tree, but is used by out-of-tree target. |
| 266 | /// A documentation for this function would be nice... |
| 267 | virtual MVT getScalarShiftAmountTy(const DataLayout &, EVT) const; |
| 268 | |
| 269 | EVT getShiftAmountTy(EVT LHSTy, const DataLayout &DL, |
| 270 | bool LegalTypes = true) const; |
| 271 | |
| 272 | /// Returns the type to be used for the index operand of: |
| 273 | /// ISD::INSERT_VECTOR_ELT, ISD::EXTRACT_VECTOR_ELT, |
| 274 | /// ISD::INSERT_SUBVECTOR, and ISD::EXTRACT_SUBVECTOR |
| 275 | virtual MVT getVectorIdxTy(const DataLayout &DL) const { |
| 276 | return getPointerTy(DL); |
| 277 | } |
| 278 | |
| 279 | virtual bool isSelectSupported(SelectSupportKind /*kind*/) const { |
| 280 | return true; |
| 281 | } |
| 282 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 283 | /// Return true if it is profitable to convert a select of FP constants into |
| 284 | /// a constant pool load whose address depends on the select condition. The |
| 285 | /// parameter may be used to differentiate a select with FP compare from |
| 286 | /// integer compare. |
| 287 | virtual bool reduceSelectOfFPConstantLoads(bool IsFPSetCC) const { |
| 288 | return true; |
| 289 | } |
| 290 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 291 | /// Return true if multiple condition registers are available. |
| 292 | bool hasMultipleConditionRegisters() const { |
| 293 | return HasMultipleConditionRegisters; |
| 294 | } |
| 295 | |
| 296 | /// Return true if the target has BitExtract instructions. |
| 297 | bool hasExtractBitsInsn() const { return HasExtractBitsInsn; } |
| 298 | |
| 299 | /// Return the preferred vector type legalization action. |
| 300 | virtual TargetLoweringBase::LegalizeTypeAction |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 301 | getPreferredVectorAction(MVT VT) const { |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 302 | // The default action for one element vectors is to scalarize |
| 303 | if (VT.getVectorNumElements() == 1) |
| 304 | return TypeScalarizeVector; |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 305 | // The default action for an odd-width vector is to widen. |
| 306 | if (!VT.isPow2VectorType()) |
| 307 | return TypeWidenVector; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 308 | // The default action for other vectors is to promote |
| 309 | return TypePromoteInteger; |
| 310 | } |
| 311 | |
| 312 | // There are two general methods for expanding a BUILD_VECTOR node: |
| 313 | // 1. Use SCALAR_TO_VECTOR on the defined scalar values and then shuffle |
| 314 | // them together. |
| 315 | // 2. Build the vector on the stack and then load it. |
| 316 | // If this function returns true, then method (1) will be used, subject to |
| 317 | // the constraint that all of the necessary shuffles are legal (as determined |
| 318 | // by isShuffleMaskLegal). If this function returns false, then method (2) is |
| 319 | // always used. The vector type, and the number of defined values, are |
| 320 | // provided. |
| 321 | virtual bool |
| 322 | shouldExpandBuildVectorWithShuffles(EVT /* VT */, |
| 323 | unsigned DefinedValues) const { |
| 324 | return DefinedValues < 3; |
| 325 | } |
| 326 | |
| 327 | /// Return true if integer divide is usually cheaper than a sequence of |
| 328 | /// several shifts, adds, and multiplies for this target. |
| 329 | /// The definition of "cheaper" may depend on whether we're optimizing |
| 330 | /// for speed or for size. |
| 331 | virtual bool isIntDivCheap(EVT VT, AttributeList Attr) const { return false; } |
| 332 | |
| 333 | /// Return true if the target can handle a standalone remainder operation. |
| 334 | virtual bool hasStandaloneRem(EVT VT) const { |
| 335 | return true; |
| 336 | } |
| 337 | |
| 338 | /// Return true if SQRT(X) shouldn't be replaced with X*RSQRT(X). |
| 339 | virtual bool isFsqrtCheap(SDValue X, SelectionDAG &DAG) const { |
| 340 | // Default behavior is to replace SQRT(X) with X*RSQRT(X). |
| 341 | return false; |
| 342 | } |
| 343 | |
| 344 | /// Reciprocal estimate status values used by the functions below. |
| 345 | enum ReciprocalEstimate : int { |
| 346 | Unspecified = -1, |
| 347 | Disabled = 0, |
| 348 | Enabled = 1 |
| 349 | }; |
| 350 | |
| 351 | /// Return a ReciprocalEstimate enum value for a square root of the given type |
| 352 | /// based on the function's attributes. If the operation is not overridden by |
| 353 | /// the function's attributes, "Unspecified" is returned and target defaults |
| 354 | /// are expected to be used for instruction selection. |
| 355 | int getRecipEstimateSqrtEnabled(EVT VT, MachineFunction &MF) const; |
| 356 | |
| 357 | /// Return a ReciprocalEstimate enum value for a division of the given type |
| 358 | /// based on the function's attributes. If the operation is not overridden by |
| 359 | /// the function's attributes, "Unspecified" is returned and target defaults |
| 360 | /// are expected to be used for instruction selection. |
| 361 | int getRecipEstimateDivEnabled(EVT VT, MachineFunction &MF) const; |
| 362 | |
| 363 | /// Return the refinement step count for a square root of the given type based |
| 364 | /// on the function's attributes. If the operation is not overridden by |
| 365 | /// the function's attributes, "Unspecified" is returned and target defaults |
| 366 | /// are expected to be used for instruction selection. |
| 367 | int getSqrtRefinementSteps(EVT VT, MachineFunction &MF) const; |
| 368 | |
| 369 | /// Return the refinement step count for a division of the given type based |
| 370 | /// on the function's attributes. If the operation is not overridden by |
| 371 | /// the function's attributes, "Unspecified" is returned and target defaults |
| 372 | /// are expected to be used for instruction selection. |
| 373 | int getDivRefinementSteps(EVT VT, MachineFunction &MF) const; |
| 374 | |
| 375 | /// Returns true if target has indicated at least one type should be bypassed. |
| 376 | bool isSlowDivBypassed() const { return !BypassSlowDivWidths.empty(); } |
| 377 | |
| 378 | /// Returns map of slow types for division or remainder with corresponding |
| 379 | /// fast types |
| 380 | const DenseMap<unsigned int, unsigned int> &getBypassSlowDivWidths() const { |
| 381 | return BypassSlowDivWidths; |
| 382 | } |
| 383 | |
| 384 | /// Return true if Flow Control is an expensive operation that should be |
| 385 | /// avoided. |
| 386 | bool isJumpExpensive() const { return JumpIsExpensive; } |
| 387 | |
| 388 | /// Return true if selects are only cheaper than branches if the branch is |
| 389 | /// unlikely to be predicted right. |
| 390 | bool isPredictableSelectExpensive() const { |
| 391 | return PredictableSelectIsExpensive; |
| 392 | } |
| 393 | |
| 394 | /// If a branch or a select condition is skewed in one direction by more than |
| 395 | /// this factor, it is very likely to be predicted correctly. |
| 396 | virtual BranchProbability getPredictableBranchThreshold() const; |
| 397 | |
| 398 | /// Return true if the following transform is beneficial: |
| 399 | /// fold (conv (load x)) -> (load (conv*)x) |
| 400 | /// On architectures that don't natively support some vector loads |
| 401 | /// efficiently, casting the load to a smaller vector of larger types and |
| 402 | /// loading is more efficient, however, this can be undone by optimizations in |
| 403 | /// dag combiner. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 404 | virtual bool isLoadBitCastBeneficial(EVT LoadVT, EVT BitcastVT, |
| 405 | const SelectionDAG &DAG, |
| 406 | const MachineMemOperand &MMO) const { |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 407 | // Don't do if we could do an indexed load on the original type, but not on |
| 408 | // the new one. |
| 409 | if (!LoadVT.isSimple() || !BitcastVT.isSimple()) |
| 410 | return true; |
| 411 | |
| 412 | MVT LoadMVT = LoadVT.getSimpleVT(); |
| 413 | |
| 414 | // Don't bother doing this if it's just going to be promoted again later, as |
| 415 | // doing so might interfere with other combines. |
| 416 | if (getOperationAction(ISD::LOAD, LoadMVT) == Promote && |
| 417 | getTypeToPromoteTo(ISD::LOAD, LoadMVT) == BitcastVT.getSimpleVT()) |
| 418 | return false; |
| 419 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 420 | bool Fast = false; |
| 421 | return allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), BitcastVT, |
| 422 | MMO, &Fast) && Fast; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 423 | } |
| 424 | |
| 425 | /// Return true if the following transform is beneficial: |
| 426 | /// (store (y (conv x)), y*)) -> (store x, (x*)) |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 427 | virtual bool isStoreBitCastBeneficial(EVT StoreVT, EVT BitcastVT, |
| 428 | const SelectionDAG &DAG, |
| 429 | const MachineMemOperand &MMO) const { |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 430 | // Default to the same logic as loads. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 431 | return isLoadBitCastBeneficial(StoreVT, BitcastVT, DAG, MMO); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 432 | } |
| 433 | |
| 434 | /// Return true if it is expected to be cheaper to do a store of a non-zero |
| 435 | /// vector constant with the given size and type for the address space than to |
| 436 | /// store the individual scalar element constants. |
| 437 | virtual bool storeOfVectorConstantIsCheap(EVT MemVT, |
| 438 | unsigned NumElem, |
| 439 | unsigned AddrSpace) const { |
| 440 | return false; |
| 441 | } |
| 442 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 443 | /// Allow store merging for the specified type after legalization in addition |
| 444 | /// to before legalization. This may transform stores that do not exist |
| 445 | /// earlier (for example, stores created from intrinsics). |
| 446 | virtual bool mergeStoresAfterLegalization(EVT MemVT) const { |
| 447 | return true; |
| 448 | } |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 449 | |
| 450 | /// Returns if it's reasonable to merge stores to MemVT size. |
| 451 | virtual bool canMergeStoresTo(unsigned AS, EVT MemVT, |
| 452 | const SelectionDAG &DAG) const { |
| 453 | return true; |
| 454 | } |
| 455 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 456 | /// Return true if it is cheap to speculate a call to intrinsic cttz. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 457 | virtual bool isCheapToSpeculateCttz() const { |
| 458 | return false; |
| 459 | } |
| 460 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 461 | /// Return true if it is cheap to speculate a call to intrinsic ctlz. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 462 | virtual bool isCheapToSpeculateCtlz() const { |
| 463 | return false; |
| 464 | } |
| 465 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 466 | /// Return true if ctlz instruction is fast. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 467 | virtual bool isCtlzFast() const { |
| 468 | return false; |
| 469 | } |
| 470 | |
| 471 | /// Return true if it is safe to transform an integer-domain bitwise operation |
| 472 | /// into the equivalent floating-point operation. This should be set to true |
| 473 | /// if the target has IEEE-754-compliant fabs/fneg operations for the input |
| 474 | /// type. |
| 475 | virtual bool hasBitPreservingFPLogic(EVT VT) const { |
| 476 | return false; |
| 477 | } |
| 478 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 479 | /// Return true if it is cheaper to split the store of a merged int val |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 480 | /// from a pair of smaller values into multiple stores. |
| 481 | virtual bool isMultiStoresCheaperThanBitsMerge(EVT LTy, EVT HTy) const { |
| 482 | return false; |
| 483 | } |
| 484 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 485 | /// Return if the target supports combining a |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 486 | /// chain like: |
| 487 | /// \code |
| 488 | /// %andResult = and %val1, #mask |
| 489 | /// %icmpResult = icmp %andResult, 0 |
| 490 | /// \endcode |
| 491 | /// into a single machine instruction of a form like: |
| 492 | /// \code |
| 493 | /// cc = test %register, #mask |
| 494 | /// \endcode |
| 495 | virtual bool isMaskAndCmp0FoldingBeneficial(const Instruction &AndI) const { |
| 496 | return false; |
| 497 | } |
| 498 | |
| 499 | /// Use bitwise logic to make pairs of compares more efficient. For example: |
| 500 | /// and (seteq A, B), (seteq C, D) --> seteq (or (xor A, B), (xor C, D)), 0 |
| 501 | /// This should be true when it takes more than one instruction to lower |
| 502 | /// setcc (cmp+set on x86 scalar), when bitwise ops are faster than logic on |
| 503 | /// condition bits (crand on PowerPC), and/or when reducing cmp+br is a win. |
| 504 | virtual bool convertSetCCLogicToBitwiseLogic(EVT VT) const { |
| 505 | return false; |
| 506 | } |
| 507 | |
| 508 | /// Return the preferred operand type if the target has a quick way to compare |
| 509 | /// integer values of the given size. Assume that any legal integer type can |
| 510 | /// be compared efficiently. Targets may override this to allow illegal wide |
| 511 | /// types to return a vector type if there is support to compare that type. |
| 512 | virtual MVT hasFastEqualityCompare(unsigned NumBits) const { |
| 513 | MVT VT = MVT::getIntegerVT(NumBits); |
| 514 | return isTypeLegal(VT) ? VT : MVT::INVALID_SIMPLE_VALUE_TYPE; |
| 515 | } |
| 516 | |
| 517 | /// Return true if the target should transform: |
| 518 | /// (X & Y) == Y ---> (~X & Y) == 0 |
| 519 | /// (X & Y) != Y ---> (~X & Y) != 0 |
| 520 | /// |
| 521 | /// This may be profitable if the target has a bitwise and-not operation that |
| 522 | /// sets comparison flags. A target may want to limit the transformation based |
| 523 | /// on the type of Y or if Y is a constant. |
| 524 | /// |
| 525 | /// Note that the transform will not occur if Y is known to be a power-of-2 |
| 526 | /// because a mask and compare of a single bit can be handled by inverting the |
| 527 | /// predicate, for example: |
| 528 | /// (X & 8) == 8 ---> (X & 8) != 0 |
| 529 | virtual bool hasAndNotCompare(SDValue Y) const { |
| 530 | return false; |
| 531 | } |
| 532 | |
| 533 | /// Return true if the target has a bitwise and-not operation: |
| 534 | /// X = ~A & B |
| 535 | /// This can be used to simplify select or other instructions. |
| 536 | virtual bool hasAndNot(SDValue X) const { |
| 537 | // If the target has the more complex version of this operation, assume that |
| 538 | // it has this operation too. |
| 539 | return hasAndNotCompare(X); |
| 540 | } |
| 541 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 542 | /// There are two ways to clear extreme bits (either low or high): |
| 543 | /// Mask: x & (-1 << y) (the instcombine canonical form) |
| 544 | /// Shifts: x >> y << y |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 545 | /// Return true if the variant with 2 variable shifts is preferred. |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 546 | /// Return false if there is no preference. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 547 | virtual bool shouldFoldMaskToVariableShiftPair(SDValue X) const { |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 548 | // By default, let's assume that no one prefers shifts. |
| 549 | return false; |
| 550 | } |
| 551 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 552 | /// Return true if it is profitable to fold a pair of shifts into a mask. |
| 553 | /// This is usually true on most targets. But some targets, like Thumb1, |
| 554 | /// have immediate shift instructions, but no immediate "and" instruction; |
| 555 | /// this makes the fold unprofitable. |
| 556 | virtual bool shouldFoldConstantShiftPairToMask(const SDNode *N, |
| 557 | CombineLevel Level) const { |
| 558 | return true; |
| 559 | } |
| 560 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 561 | /// Should we tranform the IR-optimal check for whether given truncation |
| 562 | /// down into KeptBits would be truncating or not: |
| 563 | /// (add %x, (1 << (KeptBits-1))) srccond (1 << KeptBits) |
| 564 | /// Into it's more traditional form: |
| 565 | /// ((%x << C) a>> C) dstcond %x |
| 566 | /// Return true if we should transform. |
| 567 | /// Return false if there is no preference. |
| 568 | virtual bool shouldTransformSignedTruncationCheck(EVT XVT, |
| 569 | unsigned KeptBits) const { |
| 570 | // By default, let's assume that no one prefers shifts. |
| 571 | return false; |
| 572 | } |
| 573 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 574 | /// These two forms are equivalent: |
| 575 | /// sub %y, (xor %x, -1) |
| 576 | /// add (add %x, 1), %y |
| 577 | /// The variant with two add's is IR-canonical. |
| 578 | /// Some targets may prefer one to the other. |
| 579 | virtual bool preferIncOfAddToSubOfNot(EVT VT) const { |
| 580 | // By default, let's assume that everyone prefers the form with two add's. |
| 581 | return true; |
| 582 | } |
| 583 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 584 | /// Return true if the target wants to use the optimization that |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 585 | /// turns ext(promotableInst1(...(promotableInstN(load)))) into |
| 586 | /// promotedInst1(...(promotedInstN(ext(load)))). |
| 587 | bool enableExtLdPromotion() const { return EnableExtLdPromotion; } |
| 588 | |
| 589 | /// Return true if the target can combine store(extractelement VectorTy, |
| 590 | /// Idx). |
| 591 | /// \p Cost[out] gives the cost of that transformation when this is true. |
| 592 | virtual bool canCombineStoreAndExtract(Type *VectorTy, Value *Idx, |
| 593 | unsigned &Cost) const { |
| 594 | return false; |
| 595 | } |
| 596 | |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 597 | /// Return true if inserting a scalar into a variable element of an undef |
| 598 | /// vector is more efficiently handled by splatting the scalar instead. |
| 599 | virtual bool shouldSplatInsEltVarIndex(EVT) const { |
| 600 | return false; |
| 601 | } |
| 602 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 603 | /// Return true if target always beneficiates from combining into FMA for a |
| 604 | /// given value type. This must typically return false on targets where FMA |
| 605 | /// takes more cycles to execute than FADD. |
| 606 | virtual bool enableAggressiveFMAFusion(EVT VT) const { |
| 607 | return false; |
| 608 | } |
| 609 | |
| 610 | /// Return the ValueType of the result of SETCC operations. |
| 611 | virtual EVT getSetCCResultType(const DataLayout &DL, LLVMContext &Context, |
| 612 | EVT VT) const; |
| 613 | |
| 614 | /// Return the ValueType for comparison libcalls. Comparions libcalls include |
| 615 | /// floating point comparion calls, and Ordered/Unordered check calls on |
| 616 | /// floating point numbers. |
| 617 | virtual |
| 618 | MVT::SimpleValueType getCmpLibcallReturnType() const; |
| 619 | |
| 620 | /// For targets without i1 registers, this gives the nature of the high-bits |
| 621 | /// of boolean values held in types wider than i1. |
| 622 | /// |
| 623 | /// "Boolean values" are special true/false values produced by nodes like |
| 624 | /// SETCC and consumed (as the condition) by nodes like SELECT and BRCOND. |
| 625 | /// Not to be confused with general values promoted from i1. Some cpus |
| 626 | /// distinguish between vectors of boolean and scalars; the isVec parameter |
| 627 | /// selects between the two kinds. For example on X86 a scalar boolean should |
| 628 | /// be zero extended from i1, while the elements of a vector of booleans |
| 629 | /// should be sign extended from i1. |
| 630 | /// |
| 631 | /// Some cpus also treat floating point types the same way as they treat |
| 632 | /// vectors instead of the way they treat scalars. |
| 633 | BooleanContent getBooleanContents(bool isVec, bool isFloat) const { |
| 634 | if (isVec) |
| 635 | return BooleanVectorContents; |
| 636 | return isFloat ? BooleanFloatContents : BooleanContents; |
| 637 | } |
| 638 | |
| 639 | BooleanContent getBooleanContents(EVT Type) const { |
| 640 | return getBooleanContents(Type.isVector(), Type.isFloatingPoint()); |
| 641 | } |
| 642 | |
| 643 | /// Return target scheduling preference. |
| 644 | Sched::Preference getSchedulingPreference() const { |
| 645 | return SchedPreferenceInfo; |
| 646 | } |
| 647 | |
| 648 | /// Some scheduler, e.g. hybrid, can switch to different scheduling heuristics |
| 649 | /// for different nodes. This function returns the preference (or none) for |
| 650 | /// the given node. |
| 651 | virtual Sched::Preference getSchedulingPreference(SDNode *) const { |
| 652 | return Sched::None; |
| 653 | } |
| 654 | |
| 655 | /// Return the register class that should be used for the specified value |
| 656 | /// type. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 657 | virtual const TargetRegisterClass *getRegClassFor(MVT VT, bool isDivergent = false) const { |
| 658 | (void)isDivergent; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 659 | const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; |
| 660 | assert(RC && "This value type is not natively supported!"); |
| 661 | return RC; |
| 662 | } |
| 663 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 664 | /// Allows target to decide about the register class of the |
| 665 | /// specific value that is live outside the defining block. |
| 666 | /// Returns true if the value needs uniform register class. |
| 667 | virtual bool requiresUniformRegister(MachineFunction &MF, |
| 668 | const Value *) const { |
| 669 | return false; |
| 670 | } |
| 671 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 672 | /// Return the 'representative' register class for the specified value |
| 673 | /// type. |
| 674 | /// |
| 675 | /// The 'representative' register class is the largest legal super-reg |
| 676 | /// register class for the register class of the value type. For example, on |
| 677 | /// i386 the rep register class for i8, i16, and i32 are GR32; while the rep |
| 678 | /// register class is GR64 on x86_64. |
| 679 | virtual const TargetRegisterClass *getRepRegClassFor(MVT VT) const { |
| 680 | const TargetRegisterClass *RC = RepRegClassForVT[VT.SimpleTy]; |
| 681 | return RC; |
| 682 | } |
| 683 | |
| 684 | /// Return the cost of the 'representative' register class for the specified |
| 685 | /// value type. |
| 686 | virtual uint8_t getRepRegClassCostFor(MVT VT) const { |
| 687 | return RepRegClassCostForVT[VT.SimpleTy]; |
| 688 | } |
| 689 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 690 | /// Return true if SHIFT instructions should be expanded to SHIFT_PARTS |
| 691 | /// instructions, and false if a library call is preferred (e.g for code-size |
| 692 | /// reasons). |
| 693 | virtual bool shouldExpandShift(SelectionDAG &DAG, SDNode *N) const { |
| 694 | return true; |
| 695 | } |
| 696 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 697 | /// Return true if the target has native support for the specified value type. |
| 698 | /// This means that it has a register that directly holds it without |
| 699 | /// promotions or expansions. |
| 700 | bool isTypeLegal(EVT VT) const { |
| 701 | assert(!VT.isSimple() || |
| 702 | (unsigned)VT.getSimpleVT().SimpleTy < array_lengthof(RegClassForVT)); |
| 703 | return VT.isSimple() && RegClassForVT[VT.getSimpleVT().SimpleTy] != nullptr; |
| 704 | } |
| 705 | |
| 706 | class ValueTypeActionImpl { |
| 707 | /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum |
| 708 | /// that indicates how instruction selection should deal with the type. |
| 709 | LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE]; |
| 710 | |
| 711 | public: |
| 712 | ValueTypeActionImpl() { |
| 713 | std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), |
| 714 | TypeLegal); |
| 715 | } |
| 716 | |
| 717 | LegalizeTypeAction getTypeAction(MVT VT) const { |
| 718 | return ValueTypeActions[VT.SimpleTy]; |
| 719 | } |
| 720 | |
| 721 | void setTypeAction(MVT VT, LegalizeTypeAction Action) { |
| 722 | ValueTypeActions[VT.SimpleTy] = Action; |
| 723 | } |
| 724 | }; |
| 725 | |
| 726 | const ValueTypeActionImpl &getValueTypeActions() const { |
| 727 | return ValueTypeActions; |
| 728 | } |
| 729 | |
| 730 | /// Return how we should legalize values of this type, either it is already |
| 731 | /// legal (return 'Legal') or we need to promote it to a larger type (return |
| 732 | /// 'Promote'), or we need to expand it into multiple registers of smaller |
| 733 | /// integer type (return 'Expand'). 'Custom' is not an option. |
| 734 | LegalizeTypeAction getTypeAction(LLVMContext &Context, EVT VT) const { |
| 735 | return getTypeConversion(Context, VT).first; |
| 736 | } |
| 737 | LegalizeTypeAction getTypeAction(MVT VT) const { |
| 738 | return ValueTypeActions.getTypeAction(VT); |
| 739 | } |
| 740 | |
| 741 | /// For types supported by the target, this is an identity function. For |
| 742 | /// types that must be promoted to larger types, this returns the larger type |
| 743 | /// to promote to. For integer types that are larger than the largest integer |
| 744 | /// register, this contains one step in the expansion to get to the smaller |
| 745 | /// register. For illegal floating point types, this returns the integer type |
| 746 | /// to transform to. |
| 747 | EVT getTypeToTransformTo(LLVMContext &Context, EVT VT) const { |
| 748 | return getTypeConversion(Context, VT).second; |
| 749 | } |
| 750 | |
| 751 | /// For types supported by the target, this is an identity function. For |
| 752 | /// types that must be expanded (i.e. integer types that are larger than the |
| 753 | /// largest integer register or illegal floating point types), this returns |
| 754 | /// the largest legal type it will be expanded to. |
| 755 | EVT getTypeToExpandTo(LLVMContext &Context, EVT VT) const { |
| 756 | assert(!VT.isVector()); |
| 757 | while (true) { |
| 758 | switch (getTypeAction(Context, VT)) { |
| 759 | case TypeLegal: |
| 760 | return VT; |
| 761 | case TypeExpandInteger: |
| 762 | VT = getTypeToTransformTo(Context, VT); |
| 763 | break; |
| 764 | default: |
| 765 | llvm_unreachable("Type is not legal nor is it to be expanded!"); |
| 766 | } |
| 767 | } |
| 768 | } |
| 769 | |
| 770 | /// Vector types are broken down into some number of legal first class types. |
| 771 | /// For example, EVT::v8f32 maps to 2 EVT::v4f32 with Altivec or SSE1, or 8 |
| 772 | /// promoted EVT::f64 values with the X86 FP stack. Similarly, EVT::v2i64 |
| 773 | /// turns into 4 EVT::i32 values with both PPC and X86. |
| 774 | /// |
| 775 | /// This method returns the number of registers needed, and the VT for each |
| 776 | /// register. It also returns the VT and quantity of the intermediate values |
| 777 | /// before they are promoted/expanded. |
| 778 | unsigned getVectorTypeBreakdown(LLVMContext &Context, EVT VT, |
| 779 | EVT &IntermediateVT, |
| 780 | unsigned &NumIntermediates, |
| 781 | MVT &RegisterVT) const; |
| 782 | |
| 783 | /// Certain targets such as MIPS require that some types such as vectors are |
| 784 | /// always broken down into scalars in some contexts. This occurs even if the |
| 785 | /// vector type is legal. |
| 786 | virtual unsigned getVectorTypeBreakdownForCallingConv( |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 787 | LLVMContext &Context, CallingConv::ID CC, EVT VT, EVT &IntermediateVT, |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 788 | unsigned &NumIntermediates, MVT &RegisterVT) const { |
| 789 | return getVectorTypeBreakdown(Context, VT, IntermediateVT, NumIntermediates, |
| 790 | RegisterVT); |
| 791 | } |
| 792 | |
| 793 | struct IntrinsicInfo { |
| 794 | unsigned opc = 0; // target opcode |
| 795 | EVT memVT; // memory VT |
| 796 | |
| 797 | // value representing memory location |
| 798 | PointerUnion<const Value *, const PseudoSourceValue *> ptrVal; |
| 799 | |
| 800 | int offset = 0; // offset off of ptrVal |
| 801 | unsigned size = 0; // the size of the memory location |
| 802 | // (taken from memVT if zero) |
| 803 | unsigned align = 1; // alignment |
| 804 | |
| 805 | MachineMemOperand::Flags flags = MachineMemOperand::MONone; |
| 806 | IntrinsicInfo() = default; |
| 807 | }; |
| 808 | |
| 809 | /// Given an intrinsic, checks if on the target the intrinsic will need to map |
| 810 | /// to a MemIntrinsicNode (touches memory). If this is the case, it returns |
| 811 | /// true and store the intrinsic information into the IntrinsicInfo that was |
| 812 | /// passed to the function. |
| 813 | virtual bool getTgtMemIntrinsic(IntrinsicInfo &, const CallInst &, |
| 814 | MachineFunction &, |
| 815 | unsigned /*Intrinsic*/) const { |
| 816 | return false; |
| 817 | } |
| 818 | |
| 819 | /// Returns true if the target can instruction select the specified FP |
| 820 | /// immediate natively. If false, the legalizer will materialize the FP |
| 821 | /// immediate as a load from a constant pool. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 822 | virtual bool isFPImmLegal(const APFloat & /*Imm*/, EVT /*VT*/, |
| 823 | bool ForCodeSize = false) const { |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 824 | return false; |
| 825 | } |
| 826 | |
| 827 | /// Targets can use this to indicate that they only support *some* |
| 828 | /// VECTOR_SHUFFLE operations, those with specific masks. By default, if a |
| 829 | /// target supports the VECTOR_SHUFFLE node, all mask values are assumed to be |
| 830 | /// legal. |
| 831 | virtual bool isShuffleMaskLegal(ArrayRef<int> /*Mask*/, EVT /*VT*/) const { |
| 832 | return true; |
| 833 | } |
| 834 | |
| 835 | /// Returns true if the operation can trap for the value type. |
| 836 | /// |
| 837 | /// VT must be a legal type. By default, we optimistically assume most |
| 838 | /// operations don't trap except for integer divide and remainder. |
| 839 | virtual bool canOpTrap(unsigned Op, EVT VT) const; |
| 840 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 841 | /// Similar to isShuffleMaskLegal. Targets can use this to indicate if there |
| 842 | /// is a suitable VECTOR_SHUFFLE that can be used to replace a VAND with a |
| 843 | /// constant pool entry. |
| 844 | virtual bool isVectorClearMaskLegal(ArrayRef<int> /*Mask*/, |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 845 | EVT /*VT*/) const { |
| 846 | return false; |
| 847 | } |
| 848 | |
| 849 | /// Return how this operation should be treated: either it is legal, needs to |
| 850 | /// be promoted to a larger size, needs to be expanded to some other code |
| 851 | /// sequence, or the target has a custom expander for it. |
| 852 | LegalizeAction getOperationAction(unsigned Op, EVT VT) const { |
| 853 | if (VT.isExtended()) return Expand; |
| 854 | // If a target-specific SDNode requires legalization, require the target |
| 855 | // to provide custom legalization for it. |
| 856 | if (Op >= array_lengthof(OpActions[0])) return Custom; |
| 857 | return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; |
| 858 | } |
| 859 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 860 | /// Custom method defined by each target to indicate if an operation which |
| 861 | /// may require a scale is supported natively by the target. |
| 862 | /// If not, the operation is illegal. |
| 863 | virtual bool isSupportedFixedPointOperation(unsigned Op, EVT VT, |
| 864 | unsigned Scale) const { |
| 865 | return false; |
| 866 | } |
| 867 | |
| 868 | /// Some fixed point operations may be natively supported by the target but |
| 869 | /// only for specific scales. This method allows for checking |
| 870 | /// if the width is supported by the target for a given operation that may |
| 871 | /// depend on scale. |
| 872 | LegalizeAction getFixedPointOperationAction(unsigned Op, EVT VT, |
| 873 | unsigned Scale) const { |
| 874 | auto Action = getOperationAction(Op, VT); |
| 875 | if (Action != Legal) |
| 876 | return Action; |
| 877 | |
| 878 | // This operation is supported in this type but may only work on specific |
| 879 | // scales. |
| 880 | bool Supported; |
| 881 | switch (Op) { |
| 882 | default: |
| 883 | llvm_unreachable("Unexpected fixed point operation."); |
| 884 | case ISD::SMULFIX: |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 885 | case ISD::SMULFIXSAT: |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 886 | case ISD::UMULFIX: |
| 887 | Supported = isSupportedFixedPointOperation(Op, VT, Scale); |
| 888 | break; |
| 889 | } |
| 890 | |
| 891 | return Supported ? Action : Expand; |
| 892 | } |
| 893 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 894 | LegalizeAction getStrictFPOperationAction(unsigned Op, EVT VT) const { |
| 895 | unsigned EqOpc; |
| 896 | switch (Op) { |
| 897 | default: llvm_unreachable("Unexpected FP pseudo-opcode"); |
| 898 | case ISD::STRICT_FADD: EqOpc = ISD::FADD; break; |
| 899 | case ISD::STRICT_FSUB: EqOpc = ISD::FSUB; break; |
| 900 | case ISD::STRICT_FMUL: EqOpc = ISD::FMUL; break; |
| 901 | case ISD::STRICT_FDIV: EqOpc = ISD::FDIV; break; |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 902 | case ISD::STRICT_FREM: EqOpc = ISD::FREM; break; |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 903 | case ISD::STRICT_FSQRT: EqOpc = ISD::FSQRT; break; |
| 904 | case ISD::STRICT_FPOW: EqOpc = ISD::FPOW; break; |
| 905 | case ISD::STRICT_FPOWI: EqOpc = ISD::FPOWI; break; |
| 906 | case ISD::STRICT_FMA: EqOpc = ISD::FMA; break; |
| 907 | case ISD::STRICT_FSIN: EqOpc = ISD::FSIN; break; |
| 908 | case ISD::STRICT_FCOS: EqOpc = ISD::FCOS; break; |
| 909 | case ISD::STRICT_FEXP: EqOpc = ISD::FEXP; break; |
| 910 | case ISD::STRICT_FEXP2: EqOpc = ISD::FEXP2; break; |
| 911 | case ISD::STRICT_FLOG: EqOpc = ISD::FLOG; break; |
| 912 | case ISD::STRICT_FLOG10: EqOpc = ISD::FLOG10; break; |
| 913 | case ISD::STRICT_FLOG2: EqOpc = ISD::FLOG2; break; |
| 914 | case ISD::STRICT_FRINT: EqOpc = ISD::FRINT; break; |
| 915 | case ISD::STRICT_FNEARBYINT: EqOpc = ISD::FNEARBYINT; break; |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 916 | case ISD::STRICT_FMAXNUM: EqOpc = ISD::FMAXNUM; break; |
| 917 | case ISD::STRICT_FMINNUM: EqOpc = ISD::FMINNUM; break; |
| 918 | case ISD::STRICT_FCEIL: EqOpc = ISD::FCEIL; break; |
| 919 | case ISD::STRICT_FFLOOR: EqOpc = ISD::FFLOOR; break; |
| 920 | case ISD::STRICT_FROUND: EqOpc = ISD::FROUND; break; |
| 921 | case ISD::STRICT_FTRUNC: EqOpc = ISD::FTRUNC; break; |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 922 | case ISD::STRICT_FP_ROUND: EqOpc = ISD::FP_ROUND; break; |
| 923 | case ISD::STRICT_FP_EXTEND: EqOpc = ISD::FP_EXTEND; break; |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 924 | } |
| 925 | |
| 926 | auto Action = getOperationAction(EqOpc, VT); |
| 927 | |
| 928 | // We don't currently handle Custom or Promote for strict FP pseudo-ops. |
| 929 | // For now, we just expand for those cases. |
| 930 | if (Action != Legal) |
| 931 | Action = Expand; |
| 932 | |
| 933 | return Action; |
| 934 | } |
| 935 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 936 | /// Return true if the specified operation is legal on this target or can be |
| 937 | /// made legal with custom lowering. This is used to help guide high-level |
| 938 | /// lowering decisions. |
| 939 | bool isOperationLegalOrCustom(unsigned Op, EVT VT) const { |
| 940 | return (VT == MVT::Other || isTypeLegal(VT)) && |
| 941 | (getOperationAction(Op, VT) == Legal || |
| 942 | getOperationAction(Op, VT) == Custom); |
| 943 | } |
| 944 | |
| 945 | /// Return true if the specified operation is legal on this target or can be |
| 946 | /// made legal using promotion. This is used to help guide high-level lowering |
| 947 | /// decisions. |
| 948 | bool isOperationLegalOrPromote(unsigned Op, EVT VT) const { |
| 949 | return (VT == MVT::Other || isTypeLegal(VT)) && |
| 950 | (getOperationAction(Op, VT) == Legal || |
| 951 | getOperationAction(Op, VT) == Promote); |
| 952 | } |
| 953 | |
| 954 | /// Return true if the specified operation is legal on this target or can be |
| 955 | /// made legal with custom lowering or using promotion. This is used to help |
| 956 | /// guide high-level lowering decisions. |
| 957 | bool isOperationLegalOrCustomOrPromote(unsigned Op, EVT VT) const { |
| 958 | return (VT == MVT::Other || isTypeLegal(VT)) && |
| 959 | (getOperationAction(Op, VT) == Legal || |
| 960 | getOperationAction(Op, VT) == Custom || |
| 961 | getOperationAction(Op, VT) == Promote); |
| 962 | } |
| 963 | |
| 964 | /// Return true if the operation uses custom lowering, regardless of whether |
| 965 | /// the type is legal or not. |
| 966 | bool isOperationCustom(unsigned Op, EVT VT) const { |
| 967 | return getOperationAction(Op, VT) == Custom; |
| 968 | } |
| 969 | |
| 970 | /// Return true if lowering to a jump table is allowed. |
| 971 | virtual bool areJTsAllowed(const Function *Fn) const { |
| 972 | if (Fn->getFnAttribute("no-jump-tables").getValueAsString() == "true") |
| 973 | return false; |
| 974 | |
| 975 | return isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || |
| 976 | isOperationLegalOrCustom(ISD::BRIND, MVT::Other); |
| 977 | } |
| 978 | |
| 979 | /// Check whether the range [Low,High] fits in a machine word. |
| 980 | bool rangeFitsInWord(const APInt &Low, const APInt &High, |
| 981 | const DataLayout &DL) const { |
| 982 | // FIXME: Using the pointer type doesn't seem ideal. |
| 983 | uint64_t BW = DL.getIndexSizeInBits(0u); |
| 984 | uint64_t Range = (High - Low).getLimitedValue(UINT64_MAX - 1) + 1; |
| 985 | return Range <= BW; |
| 986 | } |
| 987 | |
| 988 | /// Return true if lowering to a jump table is suitable for a set of case |
| 989 | /// clusters which may contain \p NumCases cases, \p Range range of values. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 990 | virtual bool isSuitableForJumpTable(const SwitchInst *SI, uint64_t NumCases, |
| 991 | uint64_t Range) const { |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 992 | // FIXME: This function check the maximum table size and density, but the |
| 993 | // minimum size is not checked. It would be nice if the minimum size is |
| 994 | // also combined within this function. Currently, the minimum size check is |
| 995 | // performed in findJumpTable() in SelectionDAGBuiler and |
| 996 | // getEstimatedNumberOfCaseClusters() in BasicTTIImpl. |
| 997 | const bool OptForSize = SI->getParent()->getParent()->hasOptSize(); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 998 | const unsigned MinDensity = getMinimumJumpTableDensity(OptForSize); |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 999 | const unsigned MaxJumpTableSize = getMaximumJumpTableSize(); |
| 1000 | |
| 1001 | // Check whether the number of cases is small enough and |
| 1002 | // the range is dense enough for a jump table. |
| 1003 | if ((OptForSize || Range <= MaxJumpTableSize) && |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1004 | (NumCases * 100 >= Range * MinDensity)) { |
| 1005 | return true; |
| 1006 | } |
| 1007 | return false; |
| 1008 | } |
| 1009 | |
| 1010 | /// Return true if lowering to a bit test is suitable for a set of case |
| 1011 | /// clusters which contains \p NumDests unique destinations, \p Low and |
| 1012 | /// \p High as its lowest and highest case values, and expects \p NumCmps |
| 1013 | /// case value comparisons. Check if the number of destinations, comparison |
| 1014 | /// metric, and range are all suitable. |
| 1015 | bool isSuitableForBitTests(unsigned NumDests, unsigned NumCmps, |
| 1016 | const APInt &Low, const APInt &High, |
| 1017 | const DataLayout &DL) const { |
| 1018 | // FIXME: I don't think NumCmps is the correct metric: a single case and a |
| 1019 | // range of cases both require only one branch to lower. Just looking at the |
| 1020 | // number of clusters and destinations should be enough to decide whether to |
| 1021 | // build bit tests. |
| 1022 | |
| 1023 | // To lower a range with bit tests, the range must fit the bitwidth of a |
| 1024 | // machine word. |
| 1025 | if (!rangeFitsInWord(Low, High, DL)) |
| 1026 | return false; |
| 1027 | |
| 1028 | // Decide whether it's profitable to lower this range with bit tests. Each |
| 1029 | // destination requires a bit test and branch, and there is an overall range |
| 1030 | // check branch. For a small number of clusters, separate comparisons might |
| 1031 | // be cheaper, and for many destinations, splitting the range might be |
| 1032 | // better. |
| 1033 | return (NumDests == 1 && NumCmps >= 3) || (NumDests == 2 && NumCmps >= 5) || |
| 1034 | (NumDests == 3 && NumCmps >= 6); |
| 1035 | } |
| 1036 | |
| 1037 | /// Return true if the specified operation is illegal on this target or |
| 1038 | /// unlikely to be made legal with custom lowering. This is used to help guide |
| 1039 | /// high-level lowering decisions. |
| 1040 | bool isOperationExpand(unsigned Op, EVT VT) const { |
| 1041 | return (!isTypeLegal(VT) || getOperationAction(Op, VT) == Expand); |
| 1042 | } |
| 1043 | |
| 1044 | /// Return true if the specified operation is legal on this target. |
| 1045 | bool isOperationLegal(unsigned Op, EVT VT) const { |
| 1046 | return (VT == MVT::Other || isTypeLegal(VT)) && |
| 1047 | getOperationAction(Op, VT) == Legal; |
| 1048 | } |
| 1049 | |
| 1050 | /// Return how this load with extension should be treated: either it is legal, |
| 1051 | /// needs to be promoted to a larger size, needs to be expanded to some other |
| 1052 | /// code sequence, or the target has a custom expander for it. |
| 1053 | LegalizeAction getLoadExtAction(unsigned ExtType, EVT ValVT, |
| 1054 | EVT MemVT) const { |
| 1055 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
| 1056 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
| 1057 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
| 1058 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && |
| 1059 | MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!"); |
| 1060 | unsigned Shift = 4 * ExtType; |
| 1061 | return (LegalizeAction)((LoadExtActions[ValI][MemI] >> Shift) & 0xf); |
| 1062 | } |
| 1063 | |
| 1064 | /// Return true if the specified load with extension is legal on this target. |
| 1065 | bool isLoadExtLegal(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
| 1066 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal; |
| 1067 | } |
| 1068 | |
| 1069 | /// Return true if the specified load with extension is legal or custom |
| 1070 | /// on this target. |
| 1071 | bool isLoadExtLegalOrCustom(unsigned ExtType, EVT ValVT, EVT MemVT) const { |
| 1072 | return getLoadExtAction(ExtType, ValVT, MemVT) == Legal || |
| 1073 | getLoadExtAction(ExtType, ValVT, MemVT) == Custom; |
| 1074 | } |
| 1075 | |
| 1076 | /// Return how this store with truncation should be treated: either it is |
| 1077 | /// legal, needs to be promoted to a larger size, needs to be expanded to some |
| 1078 | /// other code sequence, or the target has a custom expander for it. |
| 1079 | LegalizeAction getTruncStoreAction(EVT ValVT, EVT MemVT) const { |
| 1080 | if (ValVT.isExtended() || MemVT.isExtended()) return Expand; |
| 1081 | unsigned ValI = (unsigned) ValVT.getSimpleVT().SimpleTy; |
| 1082 | unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; |
| 1083 | assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && |
| 1084 | "Table isn't big enough!"); |
| 1085 | return TruncStoreActions[ValI][MemI]; |
| 1086 | } |
| 1087 | |
| 1088 | /// Return true if the specified store with truncation is legal on this |
| 1089 | /// target. |
| 1090 | bool isTruncStoreLegal(EVT ValVT, EVT MemVT) const { |
| 1091 | return isTypeLegal(ValVT) && getTruncStoreAction(ValVT, MemVT) == Legal; |
| 1092 | } |
| 1093 | |
| 1094 | /// Return true if the specified store with truncation has solution on this |
| 1095 | /// target. |
| 1096 | bool isTruncStoreLegalOrCustom(EVT ValVT, EVT MemVT) const { |
| 1097 | return isTypeLegal(ValVT) && |
| 1098 | (getTruncStoreAction(ValVT, MemVT) == Legal || |
| 1099 | getTruncStoreAction(ValVT, MemVT) == Custom); |
| 1100 | } |
| 1101 | |
| 1102 | /// Return how the indexed load should be treated: either it is legal, needs |
| 1103 | /// to be promoted to a larger size, needs to be expanded to some other code |
| 1104 | /// sequence, or the target has a custom expander for it. |
| 1105 | LegalizeAction |
| 1106 | getIndexedLoadAction(unsigned IdxMode, MVT VT) const { |
| 1107 | assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && |
| 1108 | "Table isn't big enough!"); |
| 1109 | unsigned Ty = (unsigned)VT.SimpleTy; |
| 1110 | return (LegalizeAction)((IndexedModeActions[Ty][IdxMode] & 0xf0) >> 4); |
| 1111 | } |
| 1112 | |
| 1113 | /// Return true if the specified indexed load is legal on this target. |
| 1114 | bool isIndexedLoadLegal(unsigned IdxMode, EVT VT) const { |
| 1115 | return VT.isSimple() && |
| 1116 | (getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Legal || |
| 1117 | getIndexedLoadAction(IdxMode, VT.getSimpleVT()) == Custom); |
| 1118 | } |
| 1119 | |
| 1120 | /// Return how the indexed store should be treated: either it is legal, needs |
| 1121 | /// to be promoted to a larger size, needs to be expanded to some other code |
| 1122 | /// sequence, or the target has a custom expander for it. |
| 1123 | LegalizeAction |
| 1124 | getIndexedStoreAction(unsigned IdxMode, MVT VT) const { |
| 1125 | assert(IdxMode < ISD::LAST_INDEXED_MODE && VT.isValid() && |
| 1126 | "Table isn't big enough!"); |
| 1127 | unsigned Ty = (unsigned)VT.SimpleTy; |
| 1128 | return (LegalizeAction)(IndexedModeActions[Ty][IdxMode] & 0x0f); |
| 1129 | } |
| 1130 | |
| 1131 | /// Return true if the specified indexed load is legal on this target. |
| 1132 | bool isIndexedStoreLegal(unsigned IdxMode, EVT VT) const { |
| 1133 | return VT.isSimple() && |
| 1134 | (getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Legal || |
| 1135 | getIndexedStoreAction(IdxMode, VT.getSimpleVT()) == Custom); |
| 1136 | } |
| 1137 | |
| 1138 | /// Return how the condition code should be treated: either it is legal, needs |
| 1139 | /// to be expanded to some other code sequence, or the target has a custom |
| 1140 | /// expander for it. |
| 1141 | LegalizeAction |
| 1142 | getCondCodeAction(ISD::CondCode CC, MVT VT) const { |
| 1143 | assert((unsigned)CC < array_lengthof(CondCodeActions) && |
| 1144 | ((unsigned)VT.SimpleTy >> 3) < array_lengthof(CondCodeActions[0]) && |
| 1145 | "Table isn't big enough!"); |
| 1146 | // See setCondCodeAction for how this is encoded. |
| 1147 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
| 1148 | uint32_t Value = CondCodeActions[CC][VT.SimpleTy >> 3]; |
| 1149 | LegalizeAction Action = (LegalizeAction) ((Value >> Shift) & 0xF); |
| 1150 | assert(Action != Promote && "Can't promote condition code!"); |
| 1151 | return Action; |
| 1152 | } |
| 1153 | |
| 1154 | /// Return true if the specified condition code is legal on this target. |
| 1155 | bool isCondCodeLegal(ISD::CondCode CC, MVT VT) const { |
| 1156 | return getCondCodeAction(CC, VT) == Legal; |
| 1157 | } |
| 1158 | |
| 1159 | /// Return true if the specified condition code is legal or custom on this |
| 1160 | /// target. |
| 1161 | bool isCondCodeLegalOrCustom(ISD::CondCode CC, MVT VT) const { |
| 1162 | return getCondCodeAction(CC, VT) == Legal || |
| 1163 | getCondCodeAction(CC, VT) == Custom; |
| 1164 | } |
| 1165 | |
| 1166 | /// If the action for this operation is to promote, this method returns the |
| 1167 | /// ValueType to promote to. |
| 1168 | MVT getTypeToPromoteTo(unsigned Op, MVT VT) const { |
| 1169 | assert(getOperationAction(Op, VT) == Promote && |
| 1170 | "This operation isn't promoted!"); |
| 1171 | |
| 1172 | // See if this has an explicit type specified. |
| 1173 | std::map<std::pair<unsigned, MVT::SimpleValueType>, |
| 1174 | MVT::SimpleValueType>::const_iterator PTTI = |
| 1175 | PromoteToType.find(std::make_pair(Op, VT.SimpleTy)); |
| 1176 | if (PTTI != PromoteToType.end()) return PTTI->second; |
| 1177 | |
| 1178 | assert((VT.isInteger() || VT.isFloatingPoint()) && |
| 1179 | "Cannot autopromote this type, add it with AddPromotedToType."); |
| 1180 | |
| 1181 | MVT NVT = VT; |
| 1182 | do { |
| 1183 | NVT = (MVT::SimpleValueType)(NVT.SimpleTy+1); |
| 1184 | assert(NVT.isInteger() == VT.isInteger() && NVT != MVT::isVoid && |
| 1185 | "Didn't find type to promote to!"); |
| 1186 | } while (!isTypeLegal(NVT) || |
| 1187 | getOperationAction(Op, NVT) == Promote); |
| 1188 | return NVT; |
| 1189 | } |
| 1190 | |
| 1191 | /// Return the EVT corresponding to this LLVM type. This is fixed by the LLVM |
| 1192 | /// operations except for the pointer size. If AllowUnknown is true, this |
| 1193 | /// will return MVT::Other for types with no EVT counterpart (e.g. structs), |
| 1194 | /// otherwise it will assert. |
| 1195 | EVT getValueType(const DataLayout &DL, Type *Ty, |
| 1196 | bool AllowUnknown = false) const { |
| 1197 | // Lower scalar pointers to native pointer types. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 1198 | if (auto *PTy = dyn_cast<PointerType>(Ty)) |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1199 | return getPointerTy(DL, PTy->getAddressSpace()); |
| 1200 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 1201 | if (auto *VTy = dyn_cast<VectorType>(Ty)) { |
| 1202 | Type *EltTy = VTy->getElementType(); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1203 | // Lower vectors of pointers to native pointer types. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 1204 | if (auto *PTy = dyn_cast<PointerType>(EltTy)) { |
| 1205 | EVT PointerTy(getPointerTy(DL, PTy->getAddressSpace())); |
| 1206 | EltTy = PointerTy.getTypeForEVT(Ty->getContext()); |
| 1207 | } |
| 1208 | return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(EltTy, false), |
| 1209 | VTy->getNumElements()); |
| 1210 | } |
| 1211 | |
| 1212 | return EVT::getEVT(Ty, AllowUnknown); |
| 1213 | } |
| 1214 | |
| 1215 | EVT getMemValueType(const DataLayout &DL, Type *Ty, |
| 1216 | bool AllowUnknown = false) const { |
| 1217 | // Lower scalar pointers to native pointer types. |
| 1218 | if (PointerType *PTy = dyn_cast<PointerType>(Ty)) |
| 1219 | return getPointerMemTy(DL, PTy->getAddressSpace()); |
| 1220 | else if (VectorType *VTy = dyn_cast<VectorType>(Ty)) { |
| 1221 | Type *Elm = VTy->getElementType(); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1222 | if (PointerType *PT = dyn_cast<PointerType>(Elm)) { |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 1223 | EVT PointerTy(getPointerMemTy(DL, PT->getAddressSpace())); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1224 | Elm = PointerTy.getTypeForEVT(Ty->getContext()); |
| 1225 | } |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1226 | return EVT::getVectorVT(Ty->getContext(), EVT::getEVT(Elm, false), |
| 1227 | VTy->getNumElements()); |
| 1228 | } |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 1229 | |
| 1230 | return getValueType(DL, Ty, AllowUnknown); |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1231 | } |
| 1232 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 1233 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1234 | /// Return the MVT corresponding to this LLVM type. See getValueType. |
| 1235 | MVT getSimpleValueType(const DataLayout &DL, Type *Ty, |
| 1236 | bool AllowUnknown = false) const { |
| 1237 | return getValueType(DL, Ty, AllowUnknown).getSimpleVT(); |
| 1238 | } |
| 1239 | |
| 1240 | /// Return the desired alignment for ByVal or InAlloca aggregate function |
| 1241 | /// arguments in the caller parameter area. This is the actual alignment, not |
| 1242 | /// its logarithm. |
| 1243 | virtual unsigned getByValTypeAlignment(Type *Ty, const DataLayout &DL) const; |
| 1244 | |
| 1245 | /// Return the type of registers that this ValueType will eventually require. |
| 1246 | MVT getRegisterType(MVT VT) const { |
| 1247 | assert((unsigned)VT.SimpleTy < array_lengthof(RegisterTypeForVT)); |
| 1248 | return RegisterTypeForVT[VT.SimpleTy]; |
| 1249 | } |
| 1250 | |
| 1251 | /// Return the type of registers that this ValueType will eventually require. |
| 1252 | MVT getRegisterType(LLVMContext &Context, EVT VT) const { |
| 1253 | if (VT.isSimple()) { |
| 1254 | assert((unsigned)VT.getSimpleVT().SimpleTy < |
| 1255 | array_lengthof(RegisterTypeForVT)); |
| 1256 | return RegisterTypeForVT[VT.getSimpleVT().SimpleTy]; |
| 1257 | } |
| 1258 | if (VT.isVector()) { |
| 1259 | EVT VT1; |
| 1260 | MVT RegisterVT; |
| 1261 | unsigned NumIntermediates; |
| 1262 | (void)getVectorTypeBreakdown(Context, VT, VT1, |
| 1263 | NumIntermediates, RegisterVT); |
| 1264 | return RegisterVT; |
| 1265 | } |
| 1266 | if (VT.isInteger()) { |
| 1267 | return getRegisterType(Context, getTypeToTransformTo(Context, VT)); |
| 1268 | } |
| 1269 | llvm_unreachable("Unsupported extended type!"); |
| 1270 | } |
| 1271 | |
| 1272 | /// Return the number of registers that this ValueType will eventually |
| 1273 | /// require. |
| 1274 | /// |
| 1275 | /// This is one for any types promoted to live in larger registers, but may be |
| 1276 | /// more than one for types (like i64) that are split into pieces. For types |
| 1277 | /// like i140, which are first promoted then expanded, it is the number of |
| 1278 | /// registers needed to hold all the bits of the original type. For an i140 |
| 1279 | /// on a 32 bit machine this means 5 registers. |
| 1280 | unsigned getNumRegisters(LLVMContext &Context, EVT VT) const { |
| 1281 | if (VT.isSimple()) { |
| 1282 | assert((unsigned)VT.getSimpleVT().SimpleTy < |
| 1283 | array_lengthof(NumRegistersForVT)); |
| 1284 | return NumRegistersForVT[VT.getSimpleVT().SimpleTy]; |
| 1285 | } |
| 1286 | if (VT.isVector()) { |
| 1287 | EVT VT1; |
| 1288 | MVT VT2; |
| 1289 | unsigned NumIntermediates; |
| 1290 | return getVectorTypeBreakdown(Context, VT, VT1, NumIntermediates, VT2); |
| 1291 | } |
| 1292 | if (VT.isInteger()) { |
| 1293 | unsigned BitWidth = VT.getSizeInBits(); |
| 1294 | unsigned RegWidth = getRegisterType(Context, VT).getSizeInBits(); |
| 1295 | return (BitWidth + RegWidth - 1) / RegWidth; |
| 1296 | } |
| 1297 | llvm_unreachable("Unsupported extended type!"); |
| 1298 | } |
| 1299 | |
| 1300 | /// Certain combinations of ABIs, Targets and features require that types |
| 1301 | /// are legal for some operations and not for other operations. |
| 1302 | /// For MIPS all vector types must be passed through the integer register set. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1303 | virtual MVT getRegisterTypeForCallingConv(LLVMContext &Context, |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 1304 | CallingConv::ID CC, EVT VT) const { |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1305 | return getRegisterType(Context, VT); |
| 1306 | } |
| 1307 | |
| 1308 | /// Certain targets require unusual breakdowns of certain types. For MIPS, |
| 1309 | /// this occurs when a vector type is used, as vector are passed through the |
| 1310 | /// integer register set. |
| 1311 | virtual unsigned getNumRegistersForCallingConv(LLVMContext &Context, |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 1312 | CallingConv::ID CC, |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1313 | EVT VT) const { |
| 1314 | return getNumRegisters(Context, VT); |
| 1315 | } |
| 1316 | |
| 1317 | /// Certain targets have context senstive alignment requirements, where one |
| 1318 | /// type has the alignment requirement of another type. |
| 1319 | virtual unsigned getABIAlignmentForCallingConv(Type *ArgTy, |
| 1320 | DataLayout DL) const { |
| 1321 | return DL.getABITypeAlignment(ArgTy); |
| 1322 | } |
| 1323 | |
| 1324 | /// If true, then instruction selection should seek to shrink the FP constant |
| 1325 | /// of the specified type to a smaller type in order to save space and / or |
| 1326 | /// reduce runtime. |
| 1327 | virtual bool ShouldShrinkFPConstant(EVT) const { return true; } |
| 1328 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 1329 | /// Return true if it is profitable to reduce a load to a smaller type. |
| 1330 | /// Example: (i16 (trunc (i32 (load x))) -> i16 load x |
| 1331 | virtual bool shouldReduceLoadWidth(SDNode *Load, ISD::LoadExtType ExtTy, |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1332 | EVT NewVT) const { |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 1333 | // By default, assume that it is cheaper to extract a subvector from a wide |
| 1334 | // vector load rather than creating multiple narrow vector loads. |
| 1335 | if (NewVT.isVector() && !Load->hasOneUse()) |
| 1336 | return false; |
| 1337 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1338 | return true; |
| 1339 | } |
| 1340 | |
| 1341 | /// When splitting a value of the specified type into parts, does the Lo |
| 1342 | /// or Hi part come first? This usually follows the endianness, except |
| 1343 | /// for ppcf128, where the Hi part always comes first. |
| 1344 | bool hasBigEndianPartOrdering(EVT VT, const DataLayout &DL) const { |
| 1345 | return DL.isBigEndian() || VT == MVT::ppcf128; |
| 1346 | } |
| 1347 | |
| 1348 | /// If true, the target has custom DAG combine transformations that it can |
| 1349 | /// perform for the specified node. |
| 1350 | bool hasTargetDAGCombine(ISD::NodeType NT) const { |
| 1351 | assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); |
| 1352 | return TargetDAGCombineArray[NT >> 3] & (1 << (NT&7)); |
| 1353 | } |
| 1354 | |
| 1355 | unsigned getGatherAllAliasesMaxDepth() const { |
| 1356 | return GatherAllAliasesMaxDepth; |
| 1357 | } |
| 1358 | |
| 1359 | /// Returns the size of the platform's va_list object. |
| 1360 | virtual unsigned getVaListSizeInBits(const DataLayout &DL) const { |
| 1361 | return getPointerTy(DL).getSizeInBits(); |
| 1362 | } |
| 1363 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 1364 | /// Get maximum # of store operations permitted for llvm.memset |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1365 | /// |
| 1366 | /// This function returns the maximum number of store operations permitted |
| 1367 | /// to replace a call to llvm.memset. The value is set by the target at the |
| 1368 | /// performance threshold for such a replacement. If OptSize is true, |
| 1369 | /// return the limit for functions that have OptSize attribute. |
| 1370 | unsigned getMaxStoresPerMemset(bool OptSize) const { |
| 1371 | return OptSize ? MaxStoresPerMemsetOptSize : MaxStoresPerMemset; |
| 1372 | } |
| 1373 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 1374 | /// Get maximum # of store operations permitted for llvm.memcpy |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1375 | /// |
| 1376 | /// This function returns the maximum number of store operations permitted |
| 1377 | /// to replace a call to llvm.memcpy. The value is set by the target at the |
| 1378 | /// performance threshold for such a replacement. If OptSize is true, |
| 1379 | /// return the limit for functions that have OptSize attribute. |
| 1380 | unsigned getMaxStoresPerMemcpy(bool OptSize) const { |
| 1381 | return OptSize ? MaxStoresPerMemcpyOptSize : MaxStoresPerMemcpy; |
| 1382 | } |
| 1383 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 1384 | /// \brief Get maximum # of store operations to be glued together |
| 1385 | /// |
| 1386 | /// This function returns the maximum number of store operations permitted |
| 1387 | /// to glue together during lowering of llvm.memcpy. The value is set by |
| 1388 | // the target at the performance threshold for such a replacement. |
| 1389 | virtual unsigned getMaxGluedStoresPerMemcpy() const { |
| 1390 | return MaxGluedStoresPerMemcpy; |
| 1391 | } |
| 1392 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1393 | /// Get maximum # of load operations permitted for memcmp |
| 1394 | /// |
| 1395 | /// This function returns the maximum number of load operations permitted |
| 1396 | /// to replace a call to memcmp. The value is set by the target at the |
| 1397 | /// performance threshold for such a replacement. If OptSize is true, |
| 1398 | /// return the limit for functions that have OptSize attribute. |
| 1399 | unsigned getMaxExpandSizeMemcmp(bool OptSize) const { |
| 1400 | return OptSize ? MaxLoadsPerMemcmpOptSize : MaxLoadsPerMemcmp; |
| 1401 | } |
| 1402 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 1403 | /// Get maximum # of store operations permitted for llvm.memmove |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1404 | /// |
| 1405 | /// This function returns the maximum number of store operations permitted |
| 1406 | /// to replace a call to llvm.memmove. The value is set by the target at the |
| 1407 | /// performance threshold for such a replacement. If OptSize is true, |
| 1408 | /// return the limit for functions that have OptSize attribute. |
| 1409 | unsigned getMaxStoresPerMemmove(bool OptSize) const { |
| 1410 | return OptSize ? MaxStoresPerMemmoveOptSize : MaxStoresPerMemmove; |
| 1411 | } |
| 1412 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 1413 | /// Determine if the target supports unaligned memory accesses. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1414 | /// |
| 1415 | /// This function returns true if the target allows unaligned memory accesses |
| 1416 | /// of the specified type in the given address space. If true, it also returns |
| 1417 | /// whether the unaligned memory access is "fast" in the last argument by |
| 1418 | /// reference. This is used, for example, in situations where an array |
| 1419 | /// copy/move/set is converted to a sequence of store operations. Its use |
| 1420 | /// helps to ensure that such replacements don't generate code that causes an |
| 1421 | /// alignment error (trap) on the target machine. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 1422 | virtual bool allowsMisalignedMemoryAccesses( |
| 1423 | EVT, unsigned AddrSpace = 0, unsigned Align = 1, |
| 1424 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
| 1425 | bool * /*Fast*/ = nullptr) const { |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1426 | return false; |
| 1427 | } |
| 1428 | |
| 1429 | /// Return true if the target supports a memory access of this type for the |
| 1430 | /// given address space and alignment. If the access is allowed, the optional |
| 1431 | /// final parameter returns if the access is also fast (as defined by the |
| 1432 | /// target). |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 1433 | bool |
| 1434 | allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
| 1435 | unsigned AddrSpace = 0, unsigned Alignment = 1, |
| 1436 | MachineMemOperand::Flags Flags = MachineMemOperand::MONone, |
| 1437 | bool *Fast = nullptr) const; |
| 1438 | |
| 1439 | /// Return true if the target supports a memory access of this type for the |
| 1440 | /// given MachineMemOperand. If the access is allowed, the optional |
| 1441 | /// final parameter returns if the access is also fast (as defined by the |
| 1442 | /// target). |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1443 | bool allowsMemoryAccess(LLVMContext &Context, const DataLayout &DL, EVT VT, |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 1444 | const MachineMemOperand &MMO, |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1445 | bool *Fast = nullptr) const; |
| 1446 | |
| 1447 | /// Returns the target specific optimal type for load and store operations as |
| 1448 | /// a result of memset, memcpy, and memmove lowering. |
| 1449 | /// |
| 1450 | /// If DstAlign is zero that means it's safe to destination alignment can |
| 1451 | /// satisfy any constraint. Similarly if SrcAlign is zero it means there isn't |
| 1452 | /// a need to check it against alignment requirement, probably because the |
| 1453 | /// source does not need to be loaded. If 'IsMemset' is true, that means it's |
| 1454 | /// expanding a memset. If 'ZeroMemset' is true, that means it's a memset of |
| 1455 | /// zero. 'MemcpyStrSrc' indicates whether the memcpy source is constant so it |
| 1456 | /// does not need to be loaded. It returns EVT::Other if the type should be |
| 1457 | /// determined using generic target-independent logic. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 1458 | virtual EVT |
| 1459 | getOptimalMemOpType(uint64_t /*Size*/, unsigned /*DstAlign*/, |
| 1460 | unsigned /*SrcAlign*/, bool /*IsMemset*/, |
| 1461 | bool /*ZeroMemset*/, bool /*MemcpyStrSrc*/, |
| 1462 | const AttributeList & /*FuncAttributes*/) const { |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1463 | return MVT::Other; |
| 1464 | } |
| 1465 | |
| 1466 | /// Returns true if it's safe to use load / store of the specified type to |
| 1467 | /// expand memcpy / memset inline. |
| 1468 | /// |
| 1469 | /// This is mostly true for all types except for some special cases. For |
| 1470 | /// example, on X86 targets without SSE2 f64 load / store are done with fldl / |
| 1471 | /// fstpl which also does type conversion. Note the specified type doesn't |
| 1472 | /// have to be legal as the hook is used before type legalization. |
| 1473 | virtual bool isSafeMemOpType(MVT /*VT*/) const { return true; } |
| 1474 | |
| 1475 | /// Determine if we should use _setjmp or setjmp to implement llvm.setjmp. |
| 1476 | bool usesUnderscoreSetJmp() const { |
| 1477 | return UseUnderscoreSetJmp; |
| 1478 | } |
| 1479 | |
| 1480 | /// Determine if we should use _longjmp or longjmp to implement llvm.longjmp. |
| 1481 | bool usesUnderscoreLongJmp() const { |
| 1482 | return UseUnderscoreLongJmp; |
| 1483 | } |
| 1484 | |
| 1485 | /// Return lower limit for number of blocks in a jump table. |
| 1486 | virtual unsigned getMinimumJumpTableEntries() const; |
| 1487 | |
| 1488 | /// Return lower limit of the density in a jump table. |
| 1489 | unsigned getMinimumJumpTableDensity(bool OptForSize) const; |
| 1490 | |
| 1491 | /// Return upper limit for number of entries in a jump table. |
| 1492 | /// Zero if no limit. |
| 1493 | unsigned getMaximumJumpTableSize() const; |
| 1494 | |
| 1495 | virtual bool isJumpTableRelative() const { |
| 1496 | return TM.isPositionIndependent(); |
| 1497 | } |
| 1498 | |
| 1499 | /// If a physical register, this specifies the register that |
| 1500 | /// llvm.savestack/llvm.restorestack should save and restore. |
| 1501 | unsigned getStackPointerRegisterToSaveRestore() const { |
| 1502 | return StackPointerRegisterToSaveRestore; |
| 1503 | } |
| 1504 | |
| 1505 | /// If a physical register, this returns the register that receives the |
| 1506 | /// exception address on entry to an EH pad. |
| 1507 | virtual unsigned |
| 1508 | getExceptionPointerRegister(const Constant *PersonalityFn) const { |
| 1509 | // 0 is guaranteed to be the NoRegister value on all targets |
| 1510 | return 0; |
| 1511 | } |
| 1512 | |
| 1513 | /// If a physical register, this returns the register that receives the |
| 1514 | /// exception typeid on entry to a landing pad. |
| 1515 | virtual unsigned |
| 1516 | getExceptionSelectorRegister(const Constant *PersonalityFn) const { |
| 1517 | // 0 is guaranteed to be the NoRegister value on all targets |
| 1518 | return 0; |
| 1519 | } |
| 1520 | |
| 1521 | virtual bool needsFixedCatchObjects() const { |
| 1522 | report_fatal_error("Funclet EH is not implemented for this target"); |
| 1523 | } |
| 1524 | |
| 1525 | /// Returns the target's jmp_buf size in bytes (if never set, the default is |
| 1526 | /// 200) |
| 1527 | unsigned getJumpBufSize() const { |
| 1528 | return JumpBufSize; |
| 1529 | } |
| 1530 | |
| 1531 | /// Returns the target's jmp_buf alignment in bytes (if never set, the default |
| 1532 | /// is 0) |
| 1533 | unsigned getJumpBufAlignment() const { |
| 1534 | return JumpBufAlignment; |
| 1535 | } |
| 1536 | |
| 1537 | /// Return the minimum stack alignment of an argument. |
| 1538 | unsigned getMinStackArgumentAlignment() const { |
| 1539 | return MinStackArgumentAlignment; |
| 1540 | } |
| 1541 | |
| 1542 | /// Return the minimum function alignment. |
| 1543 | unsigned getMinFunctionAlignment() const { |
| 1544 | return MinFunctionAlignment; |
| 1545 | } |
| 1546 | |
| 1547 | /// Return the preferred function alignment. |
| 1548 | unsigned getPrefFunctionAlignment() const { |
| 1549 | return PrefFunctionAlignment; |
| 1550 | } |
| 1551 | |
| 1552 | /// Return the preferred loop alignment. |
| 1553 | virtual unsigned getPrefLoopAlignment(MachineLoop *ML = nullptr) const { |
| 1554 | return PrefLoopAlignment; |
| 1555 | } |
| 1556 | |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 1557 | /// Should loops be aligned even when the function is marked OptSize (but not |
| 1558 | /// MinSize). |
| 1559 | virtual bool alignLoopsWithOptSize() const { |
| 1560 | return false; |
| 1561 | } |
| 1562 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1563 | /// If the target has a standard location for the stack protector guard, |
| 1564 | /// returns the address of that location. Otherwise, returns nullptr. |
| 1565 | /// DEPRECATED: please override useLoadStackGuardNode and customize |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 1566 | /// LOAD_STACK_GUARD, or customize \@llvm.stackguard(). |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1567 | virtual Value *getIRStackGuard(IRBuilder<> &IRB) const; |
| 1568 | |
| 1569 | /// Inserts necessary declarations for SSP (stack protection) purpose. |
| 1570 | /// Should be used only when getIRStackGuard returns nullptr. |
| 1571 | virtual void insertSSPDeclarations(Module &M) const; |
| 1572 | |
| 1573 | /// Return the variable that's previously inserted by insertSSPDeclarations, |
| 1574 | /// if any, otherwise return nullptr. Should be used only when |
| 1575 | /// getIRStackGuard returns nullptr. |
| 1576 | virtual Value *getSDagStackGuard(const Module &M) const; |
| 1577 | |
| 1578 | /// If this function returns true, stack protection checks should XOR the |
| 1579 | /// frame pointer (or whichever pointer is used to address locals) into the |
| 1580 | /// stack guard value before checking it. getIRStackGuard must return nullptr |
| 1581 | /// if this returns true. |
| 1582 | virtual bool useStackGuardXorFP() const { return false; } |
| 1583 | |
| 1584 | /// If the target has a standard stack protection check function that |
| 1585 | /// performs validation and error handling, returns the function. Otherwise, |
| 1586 | /// returns nullptr. Must be previously inserted by insertSSPDeclarations. |
| 1587 | /// Should be used only when getIRStackGuard returns nullptr. |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 1588 | virtual Function *getSSPStackGuardCheck(const Module &M) const; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1589 | |
| 1590 | protected: |
| 1591 | Value *getDefaultSafeStackPointerLocation(IRBuilder<> &IRB, |
| 1592 | bool UseTLS) const; |
| 1593 | |
| 1594 | public: |
| 1595 | /// Returns the target-specific address of the unsafe stack pointer. |
| 1596 | virtual Value *getSafeStackPointerLocation(IRBuilder<> &IRB) const; |
| 1597 | |
| 1598 | /// Returns the name of the symbol used to emit stack probes or the empty |
| 1599 | /// string if not applicable. |
| 1600 | virtual StringRef getStackProbeSymbolName(MachineFunction &MF) const { |
| 1601 | return ""; |
| 1602 | } |
| 1603 | |
| 1604 | /// Returns true if a cast between SrcAS and DestAS is a noop. |
| 1605 | virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { |
| 1606 | return false; |
| 1607 | } |
| 1608 | |
| 1609 | /// Returns true if a cast from SrcAS to DestAS is "cheap", such that e.g. we |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 1610 | /// are happy to sink it into basic blocks. A cast may be free, but not |
| 1611 | /// necessarily a no-op. e.g. a free truncate from a 64-bit to 32-bit pointer. |
| 1612 | virtual bool isFreeAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1613 | return isNoopAddrSpaceCast(SrcAS, DestAS); |
| 1614 | } |
| 1615 | |
| 1616 | /// Return true if the pointer arguments to CI should be aligned by aligning |
| 1617 | /// the object whose address is being passed. If so then MinSize is set to the |
| 1618 | /// minimum size the object must be to be aligned and PrefAlign is set to the |
| 1619 | /// preferred alignment. |
| 1620 | virtual bool shouldAlignPointerArgs(CallInst * /*CI*/, unsigned & /*MinSize*/, |
| 1621 | unsigned & /*PrefAlign*/) const { |
| 1622 | return false; |
| 1623 | } |
| 1624 | |
| 1625 | //===--------------------------------------------------------------------===// |
| 1626 | /// \name Helpers for TargetTransformInfo implementations |
| 1627 | /// @{ |
| 1628 | |
| 1629 | /// Get the ISD node that corresponds to the Instruction class opcode. |
| 1630 | int InstructionOpcodeToISD(unsigned Opcode) const; |
| 1631 | |
| 1632 | /// Estimate the cost of type-legalization and the legalized type. |
| 1633 | std::pair<int, MVT> getTypeLegalizationCost(const DataLayout &DL, |
| 1634 | Type *Ty) const; |
| 1635 | |
| 1636 | /// @} |
| 1637 | |
| 1638 | //===--------------------------------------------------------------------===// |
| 1639 | /// \name Helpers for atomic expansion. |
| 1640 | /// @{ |
| 1641 | |
| 1642 | /// Returns the maximum atomic operation size (in bits) supported by |
| 1643 | /// the backend. Atomic operations greater than this size (as well |
| 1644 | /// as ones that are not naturally aligned), will be expanded by |
| 1645 | /// AtomicExpandPass into an __atomic_* library call. |
| 1646 | unsigned getMaxAtomicSizeInBitsSupported() const { |
| 1647 | return MaxAtomicSizeInBitsSupported; |
| 1648 | } |
| 1649 | |
| 1650 | /// Returns the size of the smallest cmpxchg or ll/sc instruction |
| 1651 | /// the backend supports. Any smaller operations are widened in |
| 1652 | /// AtomicExpandPass. |
| 1653 | /// |
| 1654 | /// Note that *unlike* operations above the maximum size, atomic ops |
| 1655 | /// are still natively supported below the minimum; they just |
| 1656 | /// require a more complex expansion. |
| 1657 | unsigned getMinCmpXchgSizeInBits() const { return MinCmpXchgSizeInBits; } |
| 1658 | |
| 1659 | /// Whether the target supports unaligned atomic operations. |
| 1660 | bool supportsUnalignedAtomics() const { return SupportsUnalignedAtomics; } |
| 1661 | |
| 1662 | /// Whether AtomicExpandPass should automatically insert fences and reduce |
| 1663 | /// ordering for this atomic. This should be true for most architectures with |
| 1664 | /// weak memory ordering. Defaults to false. |
| 1665 | virtual bool shouldInsertFencesForAtomic(const Instruction *I) const { |
| 1666 | return false; |
| 1667 | } |
| 1668 | |
| 1669 | /// Perform a load-linked operation on Addr, returning a "Value *" with the |
| 1670 | /// corresponding pointee type. This may entail some non-trivial operations to |
| 1671 | /// truncate or reconstruct types that will be illegal in the backend. See |
| 1672 | /// ARMISelLowering for an example implementation. |
| 1673 | virtual Value *emitLoadLinked(IRBuilder<> &Builder, Value *Addr, |
| 1674 | AtomicOrdering Ord) const { |
| 1675 | llvm_unreachable("Load linked unimplemented on this target"); |
| 1676 | } |
| 1677 | |
| 1678 | /// Perform a store-conditional operation to Addr. Return the status of the |
| 1679 | /// store. This should be 0 if the store succeeded, non-zero otherwise. |
| 1680 | virtual Value *emitStoreConditional(IRBuilder<> &Builder, Value *Val, |
| 1681 | Value *Addr, AtomicOrdering Ord) const { |
| 1682 | llvm_unreachable("Store conditional unimplemented on this target"); |
| 1683 | } |
| 1684 | |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 1685 | /// Perform a masked atomicrmw using a target-specific intrinsic. This |
| 1686 | /// represents the core LL/SC loop which will be lowered at a late stage by |
| 1687 | /// the backend. |
| 1688 | virtual Value *emitMaskedAtomicRMWIntrinsic(IRBuilder<> &Builder, |
| 1689 | AtomicRMWInst *AI, |
| 1690 | Value *AlignedAddr, Value *Incr, |
| 1691 | Value *Mask, Value *ShiftAmt, |
| 1692 | AtomicOrdering Ord) const { |
| 1693 | llvm_unreachable("Masked atomicrmw expansion unimplemented on this target"); |
| 1694 | } |
| 1695 | |
| 1696 | /// Perform a masked cmpxchg using a target-specific intrinsic. This |
| 1697 | /// represents the core LL/SC loop which will be lowered at a late stage by |
| 1698 | /// the backend. |
| 1699 | virtual Value *emitMaskedAtomicCmpXchgIntrinsic( |
| 1700 | IRBuilder<> &Builder, AtomicCmpXchgInst *CI, Value *AlignedAddr, |
| 1701 | Value *CmpVal, Value *NewVal, Value *Mask, AtomicOrdering Ord) const { |
| 1702 | llvm_unreachable("Masked cmpxchg expansion unimplemented on this target"); |
| 1703 | } |
| 1704 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1705 | /// Inserts in the IR a target-specific intrinsic specifying a fence. |
| 1706 | /// It is called by AtomicExpandPass before expanding an |
| 1707 | /// AtomicRMW/AtomicCmpXchg/AtomicStore/AtomicLoad |
| 1708 | /// if shouldInsertFencesForAtomic returns true. |
| 1709 | /// |
| 1710 | /// Inst is the original atomic instruction, prior to other expansions that |
| 1711 | /// may be performed. |
| 1712 | /// |
| 1713 | /// This function should either return a nullptr, or a pointer to an IR-level |
| 1714 | /// Instruction*. Even complex fence sequences can be represented by a |
| 1715 | /// single Instruction* through an intrinsic to be lowered later. |
| 1716 | /// Backends should override this method to produce target-specific intrinsic |
| 1717 | /// for their fences. |
| 1718 | /// FIXME: Please note that the default implementation here in terms of |
| 1719 | /// IR-level fences exists for historical/compatibility reasons and is |
| 1720 | /// *unsound* ! Fences cannot, in general, be used to restore sequential |
| 1721 | /// consistency. For example, consider the following example: |
| 1722 | /// atomic<int> x = y = 0; |
| 1723 | /// int r1, r2, r3, r4; |
| 1724 | /// Thread 0: |
| 1725 | /// x.store(1); |
| 1726 | /// Thread 1: |
| 1727 | /// y.store(1); |
| 1728 | /// Thread 2: |
| 1729 | /// r1 = x.load(); |
| 1730 | /// r2 = y.load(); |
| 1731 | /// Thread 3: |
| 1732 | /// r3 = y.load(); |
| 1733 | /// r4 = x.load(); |
| 1734 | /// r1 = r3 = 1 and r2 = r4 = 0 is impossible as long as the accesses are all |
| 1735 | /// seq_cst. But if they are lowered to monotonic accesses, no amount of |
| 1736 | /// IR-level fences can prevent it. |
| 1737 | /// @{ |
| 1738 | virtual Instruction *emitLeadingFence(IRBuilder<> &Builder, Instruction *Inst, |
| 1739 | AtomicOrdering Ord) const { |
| 1740 | if (isReleaseOrStronger(Ord) && Inst->hasAtomicStore()) |
| 1741 | return Builder.CreateFence(Ord); |
| 1742 | else |
| 1743 | return nullptr; |
| 1744 | } |
| 1745 | |
| 1746 | virtual Instruction *emitTrailingFence(IRBuilder<> &Builder, |
| 1747 | Instruction *Inst, |
| 1748 | AtomicOrdering Ord) const { |
| 1749 | if (isAcquireOrStronger(Ord)) |
| 1750 | return Builder.CreateFence(Ord); |
| 1751 | else |
| 1752 | return nullptr; |
| 1753 | } |
| 1754 | /// @} |
| 1755 | |
| 1756 | // Emits code that executes when the comparison result in the ll/sc |
| 1757 | // expansion of a cmpxchg instruction is such that the store-conditional will |
| 1758 | // not execute. This makes it possible to balance out the load-linked with |
| 1759 | // a dedicated instruction, if desired. |
| 1760 | // E.g., on ARM, if ldrex isn't followed by strex, the exclusive monitor would |
| 1761 | // be unnecessarily held, except if clrex, inserted by this hook, is executed. |
| 1762 | virtual void emitAtomicCmpXchgNoStoreLLBalance(IRBuilder<> &Builder) const {} |
| 1763 | |
| 1764 | /// Returns true if the given (atomic) store should be expanded by the |
| 1765 | /// IR-level AtomicExpand pass into an "atomic xchg" which ignores its input. |
| 1766 | virtual bool shouldExpandAtomicStoreInIR(StoreInst *SI) const { |
| 1767 | return false; |
| 1768 | } |
| 1769 | |
| 1770 | /// Returns true if arguments should be sign-extended in lib calls. |
| 1771 | virtual bool shouldSignExtendTypeInLibCall(EVT Type, bool IsSigned) const { |
| 1772 | return IsSigned; |
| 1773 | } |
| 1774 | |
| 1775 | /// Returns how the given (atomic) load should be expanded by the |
| 1776 | /// IR-level AtomicExpand pass. |
| 1777 | virtual AtomicExpansionKind shouldExpandAtomicLoadInIR(LoadInst *LI) const { |
| 1778 | return AtomicExpansionKind::None; |
| 1779 | } |
| 1780 | |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 1781 | /// Returns how the given atomic cmpxchg should be expanded by the IR-level |
| 1782 | /// AtomicExpand pass. |
| 1783 | virtual AtomicExpansionKind |
| 1784 | shouldExpandAtomicCmpXchgInIR(AtomicCmpXchgInst *AI) const { |
| 1785 | return AtomicExpansionKind::None; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1786 | } |
| 1787 | |
| 1788 | /// Returns how the IR-level AtomicExpand pass should expand the given |
| 1789 | /// AtomicRMW, if at all. Default is to never expand. |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 1790 | virtual AtomicExpansionKind shouldExpandAtomicRMWInIR(AtomicRMWInst *RMW) const { |
| 1791 | return RMW->isFloatingPointOperation() ? |
| 1792 | AtomicExpansionKind::CmpXChg : AtomicExpansionKind::None; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1793 | } |
| 1794 | |
| 1795 | /// On some platforms, an AtomicRMW that never actually modifies the value |
| 1796 | /// (such as fetch_add of 0) can be turned into a fence followed by an |
| 1797 | /// atomic load. This may sound useless, but it makes it possible for the |
| 1798 | /// processor to keep the cacheline shared, dramatically improving |
| 1799 | /// performance. And such idempotent RMWs are useful for implementing some |
| 1800 | /// kinds of locks, see for example (justification + benchmarks): |
| 1801 | /// http://www.hpl.hp.com/techreports/2012/HPL-2012-68.pdf |
| 1802 | /// This method tries doing that transformation, returning the atomic load if |
| 1803 | /// it succeeds, and nullptr otherwise. |
| 1804 | /// If shouldExpandAtomicLoadInIR returns true on that load, it will undergo |
| 1805 | /// another round of expansion. |
| 1806 | virtual LoadInst * |
| 1807 | lowerIdempotentRMWIntoFencedLoad(AtomicRMWInst *RMWI) const { |
| 1808 | return nullptr; |
| 1809 | } |
| 1810 | |
| 1811 | /// Returns how the platform's atomic operations are extended (ZERO_EXTEND, |
| 1812 | /// SIGN_EXTEND, or ANY_EXTEND). |
| 1813 | virtual ISD::NodeType getExtendForAtomicOps() const { |
| 1814 | return ISD::ZERO_EXTEND; |
| 1815 | } |
| 1816 | |
| 1817 | /// @} |
| 1818 | |
| 1819 | /// Returns true if we should normalize |
| 1820 | /// select(N0&N1, X, Y) => select(N0, select(N1, X, Y), Y) and |
| 1821 | /// select(N0|N1, X, Y) => select(N0, select(N1, X, Y, Y)) if it is likely |
| 1822 | /// that it saves us from materializing N0 and N1 in an integer register. |
| 1823 | /// Targets that are able to perform and/or on flags should return false here. |
| 1824 | virtual bool shouldNormalizeToSelectSequence(LLVMContext &Context, |
| 1825 | EVT VT) const { |
| 1826 | // If a target has multiple condition registers, then it likely has logical |
| 1827 | // operations on those registers. |
| 1828 | if (hasMultipleConditionRegisters()) |
| 1829 | return false; |
| 1830 | // Only do the transform if the value won't be split into multiple |
| 1831 | // registers. |
| 1832 | LegalizeTypeAction Action = getTypeAction(Context, VT); |
| 1833 | return Action != TypeExpandInteger && Action != TypeExpandFloat && |
| 1834 | Action != TypeSplitVector; |
| 1835 | } |
| 1836 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 1837 | virtual bool isProfitableToCombineMinNumMaxNum(EVT VT) const { return true; } |
| 1838 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1839 | /// Return true if a select of constants (select Cond, C1, C2) should be |
| 1840 | /// transformed into simple math ops with the condition value. For example: |
| 1841 | /// select Cond, C1, C1-1 --> add (zext Cond), C1-1 |
| 1842 | virtual bool convertSelectOfConstantsToMath(EVT VT) const { |
| 1843 | return false; |
| 1844 | } |
| 1845 | |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 1846 | /// Return true if it is profitable to transform an integer |
| 1847 | /// multiplication-by-constant into simpler operations like shifts and adds. |
| 1848 | /// This may be true if the target does not directly support the |
| 1849 | /// multiplication operation for the specified type or the sequence of simpler |
| 1850 | /// ops is faster than the multiply. |
| 1851 | virtual bool decomposeMulByConstant(EVT VT, SDValue C) const { |
| 1852 | return false; |
| 1853 | } |
| 1854 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 1855 | /// Return true if it is more correct/profitable to use strict FP_TO_INT |
| 1856 | /// conversion operations - canonicalizing the FP source value instead of |
| 1857 | /// converting all cases and then selecting based on value. |
| 1858 | /// This may be true if the target throws exceptions for out of bounds |
| 1859 | /// conversions or has fast FP CMOV. |
| 1860 | virtual bool shouldUseStrictFP_TO_INT(EVT FpVT, EVT IntVT, |
| 1861 | bool IsSigned) const { |
| 1862 | return false; |
| 1863 | } |
| 1864 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1865 | //===--------------------------------------------------------------------===// |
| 1866 | // TargetLowering Configuration Methods - These methods should be invoked by |
| 1867 | // the derived class constructor to configure this object for the target. |
| 1868 | // |
| 1869 | protected: |
| 1870 | /// Specify how the target extends the result of integer and floating point |
| 1871 | /// boolean values from i1 to a wider type. See getBooleanContents. |
| 1872 | void setBooleanContents(BooleanContent Ty) { |
| 1873 | BooleanContents = Ty; |
| 1874 | BooleanFloatContents = Ty; |
| 1875 | } |
| 1876 | |
| 1877 | /// Specify how the target extends the result of integer and floating point |
| 1878 | /// boolean values from i1 to a wider type. See getBooleanContents. |
| 1879 | void setBooleanContents(BooleanContent IntTy, BooleanContent FloatTy) { |
| 1880 | BooleanContents = IntTy; |
| 1881 | BooleanFloatContents = FloatTy; |
| 1882 | } |
| 1883 | |
| 1884 | /// Specify how the target extends the result of a vector boolean value from a |
| 1885 | /// vector of i1 to a wider type. See getBooleanContents. |
| 1886 | void setBooleanVectorContents(BooleanContent Ty) { |
| 1887 | BooleanVectorContents = Ty; |
| 1888 | } |
| 1889 | |
| 1890 | /// Specify the target scheduling preference. |
| 1891 | void setSchedulingPreference(Sched::Preference Pref) { |
| 1892 | SchedPreferenceInfo = Pref; |
| 1893 | } |
| 1894 | |
| 1895 | /// Indicate whether this target prefers to use _setjmp to implement |
| 1896 | /// llvm.setjmp or the version without _. Defaults to false. |
| 1897 | void setUseUnderscoreSetJmp(bool Val) { |
| 1898 | UseUnderscoreSetJmp = Val; |
| 1899 | } |
| 1900 | |
| 1901 | /// Indicate whether this target prefers to use _longjmp to implement |
| 1902 | /// llvm.longjmp or the version without _. Defaults to false. |
| 1903 | void setUseUnderscoreLongJmp(bool Val) { |
| 1904 | UseUnderscoreLongJmp = Val; |
| 1905 | } |
| 1906 | |
| 1907 | /// Indicate the minimum number of blocks to generate jump tables. |
| 1908 | void setMinimumJumpTableEntries(unsigned Val); |
| 1909 | |
| 1910 | /// Indicate the maximum number of entries in jump tables. |
| 1911 | /// Set to zero to generate unlimited jump tables. |
| 1912 | void setMaximumJumpTableSize(unsigned); |
| 1913 | |
| 1914 | /// If set to a physical register, this specifies the register that |
| 1915 | /// llvm.savestack/llvm.restorestack should save and restore. |
| 1916 | void setStackPointerRegisterToSaveRestore(unsigned R) { |
| 1917 | StackPointerRegisterToSaveRestore = R; |
| 1918 | } |
| 1919 | |
| 1920 | /// Tells the code generator that the target has multiple (allocatable) |
| 1921 | /// condition registers that can be used to store the results of comparisons |
| 1922 | /// for use by selects and conditional branches. With multiple condition |
| 1923 | /// registers, the code generator will not aggressively sink comparisons into |
| 1924 | /// the blocks of their users. |
| 1925 | void setHasMultipleConditionRegisters(bool hasManyRegs = true) { |
| 1926 | HasMultipleConditionRegisters = hasManyRegs; |
| 1927 | } |
| 1928 | |
| 1929 | /// Tells the code generator that the target has BitExtract instructions. |
| 1930 | /// The code generator will aggressively sink "shift"s into the blocks of |
| 1931 | /// their users if the users will generate "and" instructions which can be |
| 1932 | /// combined with "shift" to BitExtract instructions. |
| 1933 | void setHasExtractBitsInsn(bool hasExtractInsn = true) { |
| 1934 | HasExtractBitsInsn = hasExtractInsn; |
| 1935 | } |
| 1936 | |
| 1937 | /// Tells the code generator not to expand logic operations on comparison |
| 1938 | /// predicates into separate sequences that increase the amount of flow |
| 1939 | /// control. |
| 1940 | void setJumpIsExpensive(bool isExpensive = true); |
| 1941 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 1942 | /// Tells the code generator which bitwidths to bypass. |
| 1943 | void addBypassSlowDiv(unsigned int SlowBitWidth, unsigned int FastBitWidth) { |
| 1944 | BypassSlowDivWidths[SlowBitWidth] = FastBitWidth; |
| 1945 | } |
| 1946 | |
| 1947 | /// Add the specified register class as an available regclass for the |
| 1948 | /// specified value type. This indicates the selector can handle values of |
| 1949 | /// that class natively. |
| 1950 | void addRegisterClass(MVT VT, const TargetRegisterClass *RC) { |
| 1951 | assert((unsigned)VT.SimpleTy < array_lengthof(RegClassForVT)); |
| 1952 | RegClassForVT[VT.SimpleTy] = RC; |
| 1953 | } |
| 1954 | |
| 1955 | /// Return the largest legal super-reg register class of the register class |
| 1956 | /// for the specified type and its associated "cost". |
| 1957 | virtual std::pair<const TargetRegisterClass *, uint8_t> |
| 1958 | findRepresentativeClass(const TargetRegisterInfo *TRI, MVT VT) const; |
| 1959 | |
| 1960 | /// Once all of the register classes are added, this allows us to compute |
| 1961 | /// derived properties we expose. |
| 1962 | void computeRegisterProperties(const TargetRegisterInfo *TRI); |
| 1963 | |
| 1964 | /// Indicate that the specified operation does not work with the specified |
| 1965 | /// type and indicate what to do about it. Note that VT may refer to either |
| 1966 | /// the type of a result or that of an operand of Op. |
| 1967 | void setOperationAction(unsigned Op, MVT VT, |
| 1968 | LegalizeAction Action) { |
| 1969 | assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); |
| 1970 | OpActions[(unsigned)VT.SimpleTy][Op] = Action; |
| 1971 | } |
| 1972 | |
| 1973 | /// Indicate that the specified load with extension does not work with the |
| 1974 | /// specified type and indicate what to do about it. |
| 1975 | void setLoadExtAction(unsigned ExtType, MVT ValVT, MVT MemVT, |
| 1976 | LegalizeAction Action) { |
| 1977 | assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && |
| 1978 | MemVT.isValid() && "Table isn't big enough!"); |
| 1979 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); |
| 1980 | unsigned Shift = 4 * ExtType; |
| 1981 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] &= ~((uint16_t)0xF << Shift); |
| 1982 | LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy] |= (uint16_t)Action << Shift; |
| 1983 | } |
| 1984 | |
| 1985 | /// Indicate that the specified truncating store does not work with the |
| 1986 | /// specified type and indicate what to do about it. |
| 1987 | void setTruncStoreAction(MVT ValVT, MVT MemVT, |
| 1988 | LegalizeAction Action) { |
| 1989 | assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); |
| 1990 | TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; |
| 1991 | } |
| 1992 | |
| 1993 | /// Indicate that the specified indexed load does or does not work with the |
| 1994 | /// specified type and indicate what to do abort it. |
| 1995 | /// |
| 1996 | /// NOTE: All indexed mode loads are initialized to Expand in |
| 1997 | /// TargetLowering.cpp |
| 1998 | void setIndexedLoadAction(unsigned IdxMode, MVT VT, |
| 1999 | LegalizeAction Action) { |
| 2000 | assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && |
| 2001 | (unsigned)Action < 0xf && "Table isn't big enough!"); |
| 2002 | // Load action are kept in the upper half. |
| 2003 | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0xf0; |
| 2004 | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action) <<4; |
| 2005 | } |
| 2006 | |
| 2007 | /// Indicate that the specified indexed store does or does not work with the |
| 2008 | /// specified type and indicate what to do about it. |
| 2009 | /// |
| 2010 | /// NOTE: All indexed mode stores are initialized to Expand in |
| 2011 | /// TargetLowering.cpp |
| 2012 | void setIndexedStoreAction(unsigned IdxMode, MVT VT, |
| 2013 | LegalizeAction Action) { |
| 2014 | assert(VT.isValid() && IdxMode < ISD::LAST_INDEXED_MODE && |
| 2015 | (unsigned)Action < 0xf && "Table isn't big enough!"); |
| 2016 | // Store action are kept in the lower half. |
| 2017 | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] &= ~0x0f; |
| 2018 | IndexedModeActions[(unsigned)VT.SimpleTy][IdxMode] |= ((uint8_t)Action); |
| 2019 | } |
| 2020 | |
| 2021 | /// Indicate that the specified condition code is or isn't supported on the |
| 2022 | /// target and indicate what to do about it. |
| 2023 | void setCondCodeAction(ISD::CondCode CC, MVT VT, |
| 2024 | LegalizeAction Action) { |
| 2025 | assert(VT.isValid() && (unsigned)CC < array_lengthof(CondCodeActions) && |
| 2026 | "Table isn't big enough!"); |
| 2027 | assert((unsigned)Action < 0x10 && "too many bits for bitfield array"); |
| 2028 | /// The lower 3 bits of the SimpleTy index into Nth 4bit set from the 32-bit |
| 2029 | /// value and the upper 29 bits index into the second dimension of the array |
| 2030 | /// to select what 32-bit value to use. |
| 2031 | uint32_t Shift = 4 * (VT.SimpleTy & 0x7); |
| 2032 | CondCodeActions[CC][VT.SimpleTy >> 3] &= ~((uint32_t)0xF << Shift); |
| 2033 | CondCodeActions[CC][VT.SimpleTy >> 3] |= (uint32_t)Action << Shift; |
| 2034 | } |
| 2035 | |
| 2036 | /// If Opc/OrigVT is specified as being promoted, the promotion code defaults |
| 2037 | /// to trying a larger integer/fp until it can find one that works. If that |
| 2038 | /// default is insufficient, this method can be used by the target to override |
| 2039 | /// the default. |
| 2040 | void AddPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
| 2041 | PromoteToType[std::make_pair(Opc, OrigVT.SimpleTy)] = DestVT.SimpleTy; |
| 2042 | } |
| 2043 | |
| 2044 | /// Convenience method to set an operation to Promote and specify the type |
| 2045 | /// in a single call. |
| 2046 | void setOperationPromotedToType(unsigned Opc, MVT OrigVT, MVT DestVT) { |
| 2047 | setOperationAction(Opc, OrigVT, Promote); |
| 2048 | AddPromotedToType(Opc, OrigVT, DestVT); |
| 2049 | } |
| 2050 | |
| 2051 | /// Targets should invoke this method for each target independent node that |
| 2052 | /// they want to provide a custom DAG combiner for by implementing the |
| 2053 | /// PerformDAGCombine virtual method. |
| 2054 | void setTargetDAGCombine(ISD::NodeType NT) { |
| 2055 | assert(unsigned(NT >> 3) < array_lengthof(TargetDAGCombineArray)); |
| 2056 | TargetDAGCombineArray[NT >> 3] |= 1 << (NT&7); |
| 2057 | } |
| 2058 | |
| 2059 | /// Set the target's required jmp_buf buffer size (in bytes); default is 200 |
| 2060 | void setJumpBufSize(unsigned Size) { |
| 2061 | JumpBufSize = Size; |
| 2062 | } |
| 2063 | |
| 2064 | /// Set the target's required jmp_buf buffer alignment (in bytes); default is |
| 2065 | /// 0 |
| 2066 | void setJumpBufAlignment(unsigned Align) { |
| 2067 | JumpBufAlignment = Align; |
| 2068 | } |
| 2069 | |
| 2070 | /// Set the target's minimum function alignment (in log2(bytes)) |
| 2071 | void setMinFunctionAlignment(unsigned Align) { |
| 2072 | MinFunctionAlignment = Align; |
| 2073 | } |
| 2074 | |
| 2075 | /// Set the target's preferred function alignment. This should be set if |
| 2076 | /// there is a performance benefit to higher-than-minimum alignment (in |
| 2077 | /// log2(bytes)) |
| 2078 | void setPrefFunctionAlignment(unsigned Align) { |
| 2079 | PrefFunctionAlignment = Align; |
| 2080 | } |
| 2081 | |
| 2082 | /// Set the target's preferred loop alignment. Default alignment is zero, it |
| 2083 | /// means the target does not care about loop alignment. The alignment is |
| 2084 | /// specified in log2(bytes). The target may also override |
| 2085 | /// getPrefLoopAlignment to provide per-loop values. |
| 2086 | void setPrefLoopAlignment(unsigned Align) { |
| 2087 | PrefLoopAlignment = Align; |
| 2088 | } |
| 2089 | |
| 2090 | /// Set the minimum stack alignment of an argument (in log2(bytes)). |
| 2091 | void setMinStackArgumentAlignment(unsigned Align) { |
| 2092 | MinStackArgumentAlignment = Align; |
| 2093 | } |
| 2094 | |
| 2095 | /// Set the maximum atomic operation size supported by the |
| 2096 | /// backend. Atomic operations greater than this size (as well as |
| 2097 | /// ones that are not naturally aligned), will be expanded by |
| 2098 | /// AtomicExpandPass into an __atomic_* library call. |
| 2099 | void setMaxAtomicSizeInBitsSupported(unsigned SizeInBits) { |
| 2100 | MaxAtomicSizeInBitsSupported = SizeInBits; |
| 2101 | } |
| 2102 | |
| 2103 | /// Sets the minimum cmpxchg or ll/sc size supported by the backend. |
| 2104 | void setMinCmpXchgSizeInBits(unsigned SizeInBits) { |
| 2105 | MinCmpXchgSizeInBits = SizeInBits; |
| 2106 | } |
| 2107 | |
| 2108 | /// Sets whether unaligned atomic operations are supported. |
| 2109 | void setSupportsUnalignedAtomics(bool UnalignedSupported) { |
| 2110 | SupportsUnalignedAtomics = UnalignedSupported; |
| 2111 | } |
| 2112 | |
| 2113 | public: |
| 2114 | //===--------------------------------------------------------------------===// |
| 2115 | // Addressing mode description hooks (used by LSR etc). |
| 2116 | // |
| 2117 | |
| 2118 | /// CodeGenPrepare sinks address calculations into the same BB as Load/Store |
| 2119 | /// instructions reading the address. This allows as much computation as |
| 2120 | /// possible to be done in the address mode for that operand. This hook lets |
| 2121 | /// targets also pass back when this should be done on intrinsics which |
| 2122 | /// load/store. |
| 2123 | virtual bool getAddrModeArguments(IntrinsicInst * /*I*/, |
| 2124 | SmallVectorImpl<Value*> &/*Ops*/, |
| 2125 | Type *&/*AccessTy*/) const { |
| 2126 | return false; |
| 2127 | } |
| 2128 | |
| 2129 | /// This represents an addressing mode of: |
| 2130 | /// BaseGV + BaseOffs + BaseReg + Scale*ScaleReg |
| 2131 | /// If BaseGV is null, there is no BaseGV. |
| 2132 | /// If BaseOffs is zero, there is no base offset. |
| 2133 | /// If HasBaseReg is false, there is no base register. |
| 2134 | /// If Scale is zero, there is no ScaleReg. Scale of 1 indicates a reg with |
| 2135 | /// no scale. |
| 2136 | struct AddrMode { |
| 2137 | GlobalValue *BaseGV = nullptr; |
| 2138 | int64_t BaseOffs = 0; |
| 2139 | bool HasBaseReg = false; |
| 2140 | int64_t Scale = 0; |
| 2141 | AddrMode() = default; |
| 2142 | }; |
| 2143 | |
| 2144 | /// Return true if the addressing mode represented by AM is legal for this |
| 2145 | /// target, for a load/store of the specified type. |
| 2146 | /// |
| 2147 | /// The type may be VoidTy, in which case only return true if the addressing |
| 2148 | /// mode is legal for a load/store of any legal type. TODO: Handle |
| 2149 | /// pre/postinc as well. |
| 2150 | /// |
| 2151 | /// If the address space cannot be determined, it will be -1. |
| 2152 | /// |
| 2153 | /// TODO: Remove default argument |
| 2154 | virtual bool isLegalAddressingMode(const DataLayout &DL, const AddrMode &AM, |
| 2155 | Type *Ty, unsigned AddrSpace, |
| 2156 | Instruction *I = nullptr) const; |
| 2157 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 2158 | /// Return the cost of the scaling factor used in the addressing mode |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2159 | /// represented by AM for this target, for a load/store of the specified type. |
| 2160 | /// |
| 2161 | /// If the AM is supported, the return value must be >= 0. |
| 2162 | /// If the AM is not supported, it returns a negative value. |
| 2163 | /// TODO: Handle pre/postinc as well. |
| 2164 | /// TODO: Remove default argument |
| 2165 | virtual int getScalingFactorCost(const DataLayout &DL, const AddrMode &AM, |
| 2166 | Type *Ty, unsigned AS = 0) const { |
| 2167 | // Default: assume that any scaling factor used in a legal AM is free. |
| 2168 | if (isLegalAddressingMode(DL, AM, Ty, AS)) |
| 2169 | return 0; |
| 2170 | return -1; |
| 2171 | } |
| 2172 | |
| 2173 | /// Return true if the specified immediate is legal icmp immediate, that is |
| 2174 | /// the target has icmp instructions which can compare a register against the |
| 2175 | /// immediate without having to materialize the immediate into a register. |
| 2176 | virtual bool isLegalICmpImmediate(int64_t) const { |
| 2177 | return true; |
| 2178 | } |
| 2179 | |
| 2180 | /// Return true if the specified immediate is legal add immediate, that is the |
| 2181 | /// target has add instructions which can add a register with the immediate |
| 2182 | /// without having to materialize the immediate into a register. |
| 2183 | virtual bool isLegalAddImmediate(int64_t) const { |
| 2184 | return true; |
| 2185 | } |
| 2186 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 2187 | /// Return true if the specified immediate is legal for the value input of a |
| 2188 | /// store instruction. |
| 2189 | virtual bool isLegalStoreImmediate(int64_t Value) const { |
| 2190 | // Default implementation assumes that at least 0 works since it is likely |
| 2191 | // that a zero register exists or a zero immediate is allowed. |
| 2192 | return Value == 0; |
| 2193 | } |
| 2194 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2195 | /// Return true if it's significantly cheaper to shift a vector by a uniform |
| 2196 | /// scalar than by an amount which will vary across each lane. On x86, for |
| 2197 | /// example, there is a "psllw" instruction for the former case, but no simple |
| 2198 | /// instruction for a general "a << b" operation on vectors. |
| 2199 | virtual bool isVectorShiftByScalarCheap(Type *Ty) const { |
| 2200 | return false; |
| 2201 | } |
| 2202 | |
| 2203 | /// Returns true if the opcode is a commutative binary operation. |
| 2204 | virtual bool isCommutativeBinOp(unsigned Opcode) const { |
| 2205 | // FIXME: This should get its info from the td file. |
| 2206 | switch (Opcode) { |
| 2207 | case ISD::ADD: |
| 2208 | case ISD::SMIN: |
| 2209 | case ISD::SMAX: |
| 2210 | case ISD::UMIN: |
| 2211 | case ISD::UMAX: |
| 2212 | case ISD::MUL: |
| 2213 | case ISD::MULHU: |
| 2214 | case ISD::MULHS: |
| 2215 | case ISD::SMUL_LOHI: |
| 2216 | case ISD::UMUL_LOHI: |
| 2217 | case ISD::FADD: |
| 2218 | case ISD::FMUL: |
| 2219 | case ISD::AND: |
| 2220 | case ISD::OR: |
| 2221 | case ISD::XOR: |
| 2222 | case ISD::SADDO: |
| 2223 | case ISD::UADDO: |
| 2224 | case ISD::ADDC: |
| 2225 | case ISD::ADDE: |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 2226 | case ISD::SADDSAT: |
| 2227 | case ISD::UADDSAT: |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2228 | case ISD::FMINNUM: |
| 2229 | case ISD::FMAXNUM: |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 2230 | case ISD::FMINNUM_IEEE: |
| 2231 | case ISD::FMAXNUM_IEEE: |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 2232 | case ISD::FMINIMUM: |
| 2233 | case ISD::FMAXIMUM: |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2234 | return true; |
| 2235 | default: return false; |
| 2236 | } |
| 2237 | } |
| 2238 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 2239 | /// Return true if the node is a math/logic binary operator. |
| 2240 | virtual bool isBinOp(unsigned Opcode) const { |
| 2241 | // A commutative binop must be a binop. |
| 2242 | if (isCommutativeBinOp(Opcode)) |
| 2243 | return true; |
| 2244 | // These are non-commutative binops. |
| 2245 | switch (Opcode) { |
| 2246 | case ISD::SUB: |
| 2247 | case ISD::SHL: |
| 2248 | case ISD::SRL: |
| 2249 | case ISD::SRA: |
| 2250 | case ISD::SDIV: |
| 2251 | case ISD::UDIV: |
| 2252 | case ISD::SREM: |
| 2253 | case ISD::UREM: |
| 2254 | case ISD::FSUB: |
| 2255 | case ISD::FDIV: |
| 2256 | case ISD::FREM: |
| 2257 | return true; |
| 2258 | default: |
| 2259 | return false; |
| 2260 | } |
| 2261 | } |
| 2262 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2263 | /// Return true if it's free to truncate a value of type FromTy to type |
| 2264 | /// ToTy. e.g. On x86 it's free to truncate a i32 value in register EAX to i16 |
| 2265 | /// by referencing its sub-register AX. |
| 2266 | /// Targets must return false when FromTy <= ToTy. |
| 2267 | virtual bool isTruncateFree(Type *FromTy, Type *ToTy) const { |
| 2268 | return false; |
| 2269 | } |
| 2270 | |
| 2271 | /// Return true if a truncation from FromTy to ToTy is permitted when deciding |
| 2272 | /// whether a call is in tail position. Typically this means that both results |
| 2273 | /// would be assigned to the same register or stack slot, but it could mean |
| 2274 | /// the target performs adequate checks of its own before proceeding with the |
| 2275 | /// tail call. Targets must return false when FromTy <= ToTy. |
| 2276 | virtual bool allowTruncateForTailCall(Type *FromTy, Type *ToTy) const { |
| 2277 | return false; |
| 2278 | } |
| 2279 | |
| 2280 | virtual bool isTruncateFree(EVT FromVT, EVT ToVT) const { |
| 2281 | return false; |
| 2282 | } |
| 2283 | |
| 2284 | virtual bool isProfitableToHoist(Instruction *I) const { return true; } |
| 2285 | |
| 2286 | /// Return true if the extension represented by \p I is free. |
| 2287 | /// Unlikely the is[Z|FP]ExtFree family which is based on types, |
| 2288 | /// this method can use the context provided by \p I to decide |
| 2289 | /// whether or not \p I is free. |
| 2290 | /// This method extends the behavior of the is[Z|FP]ExtFree family. |
| 2291 | /// In other words, if is[Z|FP]Free returns true, then this method |
| 2292 | /// returns true as well. The converse is not true. |
| 2293 | /// The target can perform the adequate checks by overriding isExtFreeImpl. |
| 2294 | /// \pre \p I must be a sign, zero, or fp extension. |
| 2295 | bool isExtFree(const Instruction *I) const { |
| 2296 | switch (I->getOpcode()) { |
| 2297 | case Instruction::FPExt: |
| 2298 | if (isFPExtFree(EVT::getEVT(I->getType()), |
| 2299 | EVT::getEVT(I->getOperand(0)->getType()))) |
| 2300 | return true; |
| 2301 | break; |
| 2302 | case Instruction::ZExt: |
| 2303 | if (isZExtFree(I->getOperand(0)->getType(), I->getType())) |
| 2304 | return true; |
| 2305 | break; |
| 2306 | case Instruction::SExt: |
| 2307 | break; |
| 2308 | default: |
| 2309 | llvm_unreachable("Instruction is not an extension"); |
| 2310 | } |
| 2311 | return isExtFreeImpl(I); |
| 2312 | } |
| 2313 | |
| 2314 | /// Return true if \p Load and \p Ext can form an ExtLoad. |
| 2315 | /// For example, in AArch64 |
| 2316 | /// %L = load i8, i8* %ptr |
| 2317 | /// %E = zext i8 %L to i32 |
| 2318 | /// can be lowered into one load instruction |
| 2319 | /// ldrb w0, [x0] |
| 2320 | bool isExtLoad(const LoadInst *Load, const Instruction *Ext, |
| 2321 | const DataLayout &DL) const { |
| 2322 | EVT VT = getValueType(DL, Ext->getType()); |
| 2323 | EVT LoadVT = getValueType(DL, Load->getType()); |
| 2324 | |
| 2325 | // If the load has other users and the truncate is not free, the ext |
| 2326 | // probably isn't free. |
| 2327 | if (!Load->hasOneUse() && (isTypeLegal(LoadVT) || !isTypeLegal(VT)) && |
| 2328 | !isTruncateFree(Ext->getType(), Load->getType())) |
| 2329 | return false; |
| 2330 | |
| 2331 | // Check whether the target supports casts folded into loads. |
| 2332 | unsigned LType; |
| 2333 | if (isa<ZExtInst>(Ext)) |
| 2334 | LType = ISD::ZEXTLOAD; |
| 2335 | else { |
| 2336 | assert(isa<SExtInst>(Ext) && "Unexpected ext type!"); |
| 2337 | LType = ISD::SEXTLOAD; |
| 2338 | } |
| 2339 | |
| 2340 | return isLoadExtLegal(LType, VT, LoadVT); |
| 2341 | } |
| 2342 | |
| 2343 | /// Return true if any actual instruction that defines a value of type FromTy |
| 2344 | /// implicitly zero-extends the value to ToTy in the result register. |
| 2345 | /// |
| 2346 | /// The function should return true when it is likely that the truncate can |
| 2347 | /// be freely folded with an instruction defining a value of FromTy. If |
| 2348 | /// the defining instruction is unknown (because you're looking at a |
| 2349 | /// function argument, PHI, etc.) then the target may require an |
| 2350 | /// explicit truncate, which is not necessarily free, but this function |
| 2351 | /// does not deal with those cases. |
| 2352 | /// Targets must return false when FromTy >= ToTy. |
| 2353 | virtual bool isZExtFree(Type *FromTy, Type *ToTy) const { |
| 2354 | return false; |
| 2355 | } |
| 2356 | |
| 2357 | virtual bool isZExtFree(EVT FromTy, EVT ToTy) const { |
| 2358 | return false; |
| 2359 | } |
| 2360 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 2361 | /// Return true if sign-extension from FromTy to ToTy is cheaper than |
| 2362 | /// zero-extension. |
| 2363 | virtual bool isSExtCheaperThanZExt(EVT FromTy, EVT ToTy) const { |
| 2364 | return false; |
| 2365 | } |
| 2366 | |
| 2367 | /// Return true if sinking I's operands to the same basic block as I is |
| 2368 | /// profitable, e.g. because the operands can be folded into a target |
| 2369 | /// instruction during instruction selection. After calling the function |
| 2370 | /// \p Ops contains the Uses to sink ordered by dominance (dominating users |
| 2371 | /// come first). |
| 2372 | virtual bool shouldSinkOperands(Instruction *I, |
| 2373 | SmallVectorImpl<Use *> &Ops) const { |
| 2374 | return false; |
| 2375 | } |
| 2376 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2377 | /// Return true if the target supplies and combines to a paired load |
| 2378 | /// two loaded values of type LoadedType next to each other in memory. |
| 2379 | /// RequiredAlignment gives the minimal alignment constraints that must be met |
| 2380 | /// to be able to select this paired load. |
| 2381 | /// |
| 2382 | /// This information is *not* used to generate actual paired loads, but it is |
| 2383 | /// used to generate a sequence of loads that is easier to combine into a |
| 2384 | /// paired load. |
| 2385 | /// For instance, something like this: |
| 2386 | /// a = load i64* addr |
| 2387 | /// b = trunc i64 a to i32 |
| 2388 | /// c = lshr i64 a, 32 |
| 2389 | /// d = trunc i64 c to i32 |
| 2390 | /// will be optimized into: |
| 2391 | /// b = load i32* addr1 |
| 2392 | /// d = load i32* addr2 |
| 2393 | /// Where addr1 = addr2 +/- sizeof(i32). |
| 2394 | /// |
| 2395 | /// In other words, unless the target performs a post-isel load combining, |
| 2396 | /// this information should not be provided because it will generate more |
| 2397 | /// loads. |
| 2398 | virtual bool hasPairedLoad(EVT /*LoadedType*/, |
| 2399 | unsigned & /*RequiredAlignment*/) const { |
| 2400 | return false; |
| 2401 | } |
| 2402 | |
| 2403 | /// Return true if the target has a vector blend instruction. |
| 2404 | virtual bool hasVectorBlend() const { return false; } |
| 2405 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 2406 | /// Get the maximum supported factor for interleaved memory accesses. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2407 | /// Default to be the minimum interleave factor: 2. |
| 2408 | virtual unsigned getMaxSupportedInterleaveFactor() const { return 2; } |
| 2409 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 2410 | /// Lower an interleaved load to target specific intrinsics. Return |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2411 | /// true on success. |
| 2412 | /// |
| 2413 | /// \p LI is the vector load instruction. |
| 2414 | /// \p Shuffles is the shufflevector list to DE-interleave the loaded vector. |
| 2415 | /// \p Indices is the corresponding indices for each shufflevector. |
| 2416 | /// \p Factor is the interleave factor. |
| 2417 | virtual bool lowerInterleavedLoad(LoadInst *LI, |
| 2418 | ArrayRef<ShuffleVectorInst *> Shuffles, |
| 2419 | ArrayRef<unsigned> Indices, |
| 2420 | unsigned Factor) const { |
| 2421 | return false; |
| 2422 | } |
| 2423 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 2424 | /// Lower an interleaved store to target specific intrinsics. Return |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2425 | /// true on success. |
| 2426 | /// |
| 2427 | /// \p SI is the vector store instruction. |
| 2428 | /// \p SVI is the shufflevector to RE-interleave the stored vector. |
| 2429 | /// \p Factor is the interleave factor. |
| 2430 | virtual bool lowerInterleavedStore(StoreInst *SI, ShuffleVectorInst *SVI, |
| 2431 | unsigned Factor) const { |
| 2432 | return false; |
| 2433 | } |
| 2434 | |
| 2435 | /// Return true if zero-extending the specific node Val to type VT2 is free |
| 2436 | /// (either because it's implicitly zero-extended such as ARM ldrb / ldrh or |
| 2437 | /// because it's folded such as X86 zero-extending loads). |
| 2438 | virtual bool isZExtFree(SDValue Val, EVT VT2) const { |
| 2439 | return isZExtFree(Val.getValueType(), VT2); |
| 2440 | } |
| 2441 | |
| 2442 | /// Return true if an fpext operation is free (for instance, because |
| 2443 | /// single-precision floating-point numbers are implicitly extended to |
| 2444 | /// double-precision). |
| 2445 | virtual bool isFPExtFree(EVT DestVT, EVT SrcVT) const { |
| 2446 | assert(SrcVT.isFloatingPoint() && DestVT.isFloatingPoint() && |
| 2447 | "invalid fpext types"); |
| 2448 | return false; |
| 2449 | } |
| 2450 | |
| 2451 | /// Return true if an fpext operation input to an \p Opcode operation is free |
| 2452 | /// (for instance, because half-precision floating-point numbers are |
| 2453 | /// implicitly extended to float-precision) for an FMA instruction. |
| 2454 | virtual bool isFPExtFoldable(unsigned Opcode, EVT DestVT, EVT SrcVT) const { |
| 2455 | assert(DestVT.isFloatingPoint() && SrcVT.isFloatingPoint() && |
| 2456 | "invalid fpext types"); |
| 2457 | return isFPExtFree(DestVT, SrcVT); |
| 2458 | } |
| 2459 | |
| 2460 | /// Return true if folding a vector load into ExtVal (a sign, zero, or any |
| 2461 | /// extend node) is profitable. |
| 2462 | virtual bool isVectorLoadExtDesirable(SDValue ExtVal) const { return false; } |
| 2463 | |
| 2464 | /// Return true if an fneg operation is free to the point where it is never |
| 2465 | /// worthwhile to replace it with a bitwise operation. |
| 2466 | virtual bool isFNegFree(EVT VT) const { |
| 2467 | assert(VT.isFloatingPoint()); |
| 2468 | return false; |
| 2469 | } |
| 2470 | |
| 2471 | /// Return true if an fabs operation is free to the point where it is never |
| 2472 | /// worthwhile to replace it with a bitwise operation. |
| 2473 | virtual bool isFAbsFree(EVT VT) const { |
| 2474 | assert(VT.isFloatingPoint()); |
| 2475 | return false; |
| 2476 | } |
| 2477 | |
| 2478 | /// Return true if an FMA operation is faster than a pair of fmul and fadd |
| 2479 | /// instructions. fmuladd intrinsics will be expanded to FMAs when this method |
| 2480 | /// returns true, otherwise fmuladd is expanded to fmul + fadd. |
| 2481 | /// |
| 2482 | /// NOTE: This may be called before legalization on types for which FMAs are |
| 2483 | /// not legal, but should return true if those types will eventually legalize |
| 2484 | /// to types that support FMAs. After legalization, it will only be called on |
| 2485 | /// types that support FMAs (via Legal or Custom actions) |
| 2486 | virtual bool isFMAFasterThanFMulAndFAdd(EVT) const { |
| 2487 | return false; |
| 2488 | } |
| 2489 | |
| 2490 | /// Return true if it's profitable to narrow operations of type VT1 to |
| 2491 | /// VT2. e.g. on x86, it's profitable to narrow from i32 to i8 but not from |
| 2492 | /// i32 to i16. |
| 2493 | virtual bool isNarrowingProfitable(EVT /*VT1*/, EVT /*VT2*/) const { |
| 2494 | return false; |
| 2495 | } |
| 2496 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 2497 | /// Return true if it is beneficial to convert a load of a constant to |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2498 | /// just the constant itself. |
| 2499 | /// On some targets it might be more efficient to use a combination of |
| 2500 | /// arithmetic instructions to materialize the constant instead of loading it |
| 2501 | /// from a constant pool. |
| 2502 | virtual bool shouldConvertConstantLoadToIntImm(const APInt &Imm, |
| 2503 | Type *Ty) const { |
| 2504 | return false; |
| 2505 | } |
| 2506 | |
| 2507 | /// Return true if EXTRACT_SUBVECTOR is cheap for extracting this result type |
| 2508 | /// from this source type with this index. This is needed because |
| 2509 | /// EXTRACT_SUBVECTOR usually has custom lowering that depends on the index of |
| 2510 | /// the first element, and only the target knows which lowering is cheap. |
| 2511 | virtual bool isExtractSubvectorCheap(EVT ResVT, EVT SrcVT, |
| 2512 | unsigned Index) const { |
| 2513 | return false; |
| 2514 | } |
| 2515 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 2516 | /// Try to convert an extract element of a vector binary operation into an |
| 2517 | /// extract element followed by a scalar operation. |
| 2518 | virtual bool shouldScalarizeBinop(SDValue VecOp) const { |
| 2519 | return false; |
| 2520 | } |
| 2521 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 2522 | /// Return true if extraction of a scalar element from the given vector type |
| 2523 | /// at the given index is cheap. For example, if scalar operations occur on |
| 2524 | /// the same register file as vector operations, then an extract element may |
| 2525 | /// be a sub-register rename rather than an actual instruction. |
| 2526 | virtual bool isExtractVecEltCheap(EVT VT, unsigned Index) const { |
| 2527 | return false; |
| 2528 | } |
| 2529 | |
| 2530 | /// Try to convert math with an overflow comparison into the corresponding DAG |
| 2531 | /// node operation. Targets may want to override this independently of whether |
| 2532 | /// the operation is legal/custom for the given type because it may obscure |
| 2533 | /// matching of other patterns. |
| 2534 | virtual bool shouldFormOverflowOp(unsigned Opcode, EVT VT) const { |
| 2535 | // TODO: The default logic is inherited from code in CodeGenPrepare. |
| 2536 | // The opcode should not make a difference by default? |
| 2537 | if (Opcode != ISD::UADDO) |
| 2538 | return false; |
| 2539 | |
| 2540 | // Allow the transform as long as we have an integer type that is not |
| 2541 | // obviously illegal and unsupported. |
| 2542 | if (VT.isVector()) |
| 2543 | return false; |
| 2544 | return VT.isSimple() || !isOperationExpand(Opcode, VT); |
| 2545 | } |
| 2546 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2547 | // Return true if it is profitable to use a scalar input to a BUILD_VECTOR |
| 2548 | // even if the vector itself has multiple uses. |
| 2549 | virtual bool aggressivelyPreferBuildVectorSources(EVT VecVT) const { |
| 2550 | return false; |
| 2551 | } |
| 2552 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 2553 | // Return true if CodeGenPrepare should consider splitting large offset of a |
| 2554 | // GEP to make the GEP fit into the addressing mode and can be sunk into the |
| 2555 | // same blocks of its users. |
| 2556 | virtual bool shouldConsiderGEPOffsetSplit() const { return false; } |
| 2557 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2558 | //===--------------------------------------------------------------------===// |
| 2559 | // Runtime Library hooks |
| 2560 | // |
| 2561 | |
| 2562 | /// Rename the default libcall routine name for the specified libcall. |
| 2563 | void setLibcallName(RTLIB::Libcall Call, const char *Name) { |
| 2564 | LibcallRoutineNames[Call] = Name; |
| 2565 | } |
| 2566 | |
| 2567 | /// Get the libcall routine name for the specified libcall. |
| 2568 | const char *getLibcallName(RTLIB::Libcall Call) const { |
| 2569 | return LibcallRoutineNames[Call]; |
| 2570 | } |
| 2571 | |
| 2572 | /// Override the default CondCode to be used to test the result of the |
| 2573 | /// comparison libcall against zero. |
| 2574 | void setCmpLibcallCC(RTLIB::Libcall Call, ISD::CondCode CC) { |
| 2575 | CmpLibcallCCs[Call] = CC; |
| 2576 | } |
| 2577 | |
| 2578 | /// Get the CondCode that's to be used to test the result of the comparison |
| 2579 | /// libcall against zero. |
| 2580 | ISD::CondCode getCmpLibcallCC(RTLIB::Libcall Call) const { |
| 2581 | return CmpLibcallCCs[Call]; |
| 2582 | } |
| 2583 | |
| 2584 | /// Set the CallingConv that should be used for the specified libcall. |
| 2585 | void setLibcallCallingConv(RTLIB::Libcall Call, CallingConv::ID CC) { |
| 2586 | LibcallCallingConvs[Call] = CC; |
| 2587 | } |
| 2588 | |
| 2589 | /// Get the CallingConv that should be used for the specified libcall. |
| 2590 | CallingConv::ID getLibcallCallingConv(RTLIB::Libcall Call) const { |
| 2591 | return LibcallCallingConvs[Call]; |
| 2592 | } |
| 2593 | |
| 2594 | /// Execute target specific actions to finalize target lowering. |
| 2595 | /// This is used to set extra flags in MachineFrameInformation and freezing |
| 2596 | /// the set of reserved registers. |
| 2597 | /// The default implementation just freezes the set of reserved registers. |
| 2598 | virtual void finalizeLowering(MachineFunction &MF) const; |
| 2599 | |
| 2600 | private: |
| 2601 | const TargetMachine &TM; |
| 2602 | |
| 2603 | /// Tells the code generator that the target has multiple (allocatable) |
| 2604 | /// condition registers that can be used to store the results of comparisons |
| 2605 | /// for use by selects and conditional branches. With multiple condition |
| 2606 | /// registers, the code generator will not aggressively sink comparisons into |
| 2607 | /// the blocks of their users. |
| 2608 | bool HasMultipleConditionRegisters; |
| 2609 | |
| 2610 | /// Tells the code generator that the target has BitExtract instructions. |
| 2611 | /// The code generator will aggressively sink "shift"s into the blocks of |
| 2612 | /// their users if the users will generate "and" instructions which can be |
| 2613 | /// combined with "shift" to BitExtract instructions. |
| 2614 | bool HasExtractBitsInsn; |
| 2615 | |
| 2616 | /// Tells the code generator to bypass slow divide or remainder |
| 2617 | /// instructions. For example, BypassSlowDivWidths[32,8] tells the code |
| 2618 | /// generator to bypass 32-bit integer div/rem with an 8-bit unsigned integer |
| 2619 | /// div/rem when the operands are positive and less than 256. |
| 2620 | DenseMap <unsigned int, unsigned int> BypassSlowDivWidths; |
| 2621 | |
| 2622 | /// Tells the code generator that it shouldn't generate extra flow control |
| 2623 | /// instructions and should attempt to combine flow control instructions via |
| 2624 | /// predication. |
| 2625 | bool JumpIsExpensive; |
| 2626 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2627 | /// This target prefers to use _setjmp to implement llvm.setjmp. |
| 2628 | /// |
| 2629 | /// Defaults to false. |
| 2630 | bool UseUnderscoreSetJmp; |
| 2631 | |
| 2632 | /// This target prefers to use _longjmp to implement llvm.longjmp. |
| 2633 | /// |
| 2634 | /// Defaults to false. |
| 2635 | bool UseUnderscoreLongJmp; |
| 2636 | |
| 2637 | /// Information about the contents of the high-bits in boolean values held in |
| 2638 | /// a type wider than i1. See getBooleanContents. |
| 2639 | BooleanContent BooleanContents; |
| 2640 | |
| 2641 | /// Information about the contents of the high-bits in boolean values held in |
| 2642 | /// a type wider than i1. See getBooleanContents. |
| 2643 | BooleanContent BooleanFloatContents; |
| 2644 | |
| 2645 | /// Information about the contents of the high-bits in boolean vector values |
| 2646 | /// when the element type is wider than i1. See getBooleanContents. |
| 2647 | BooleanContent BooleanVectorContents; |
| 2648 | |
| 2649 | /// The target scheduling preference: shortest possible total cycles or lowest |
| 2650 | /// register usage. |
| 2651 | Sched::Preference SchedPreferenceInfo; |
| 2652 | |
| 2653 | /// The size, in bytes, of the target's jmp_buf buffers |
| 2654 | unsigned JumpBufSize; |
| 2655 | |
| 2656 | /// The alignment, in bytes, of the target's jmp_buf buffers |
| 2657 | unsigned JumpBufAlignment; |
| 2658 | |
| 2659 | /// The minimum alignment that any argument on the stack needs to have. |
| 2660 | unsigned MinStackArgumentAlignment; |
| 2661 | |
| 2662 | /// The minimum function alignment (used when optimizing for size, and to |
| 2663 | /// prevent explicitly provided alignment from leading to incorrect code). |
| 2664 | unsigned MinFunctionAlignment; |
| 2665 | |
| 2666 | /// The preferred function alignment (used when alignment unspecified and |
| 2667 | /// optimizing for speed). |
| 2668 | unsigned PrefFunctionAlignment; |
| 2669 | |
| 2670 | /// The preferred loop alignment. |
| 2671 | unsigned PrefLoopAlignment; |
| 2672 | |
| 2673 | /// Size in bits of the maximum atomics size the backend supports. |
| 2674 | /// Accesses larger than this will be expanded by AtomicExpandPass. |
| 2675 | unsigned MaxAtomicSizeInBitsSupported; |
| 2676 | |
| 2677 | /// Size in bits of the minimum cmpxchg or ll/sc operation the |
| 2678 | /// backend supports. |
| 2679 | unsigned MinCmpXchgSizeInBits; |
| 2680 | |
| 2681 | /// This indicates if the target supports unaligned atomic operations. |
| 2682 | bool SupportsUnalignedAtomics; |
| 2683 | |
| 2684 | /// If set to a physical register, this specifies the register that |
| 2685 | /// llvm.savestack/llvm.restorestack should save and restore. |
| 2686 | unsigned StackPointerRegisterToSaveRestore; |
| 2687 | |
| 2688 | /// This indicates the default register class to use for each ValueType the |
| 2689 | /// target supports natively. |
| 2690 | const TargetRegisterClass *RegClassForVT[MVT::LAST_VALUETYPE]; |
| 2691 | unsigned char NumRegistersForVT[MVT::LAST_VALUETYPE]; |
| 2692 | MVT RegisterTypeForVT[MVT::LAST_VALUETYPE]; |
| 2693 | |
| 2694 | /// This indicates the "representative" register class to use for each |
| 2695 | /// ValueType the target supports natively. This information is used by the |
| 2696 | /// scheduler to track register pressure. By default, the representative |
| 2697 | /// register class is the largest legal super-reg register class of the |
| 2698 | /// register class of the specified type. e.g. On x86, i8, i16, and i32's |
| 2699 | /// representative class would be GR32. |
| 2700 | const TargetRegisterClass *RepRegClassForVT[MVT::LAST_VALUETYPE]; |
| 2701 | |
| 2702 | /// This indicates the "cost" of the "representative" register class for each |
| 2703 | /// ValueType. The cost is used by the scheduler to approximate register |
| 2704 | /// pressure. |
| 2705 | uint8_t RepRegClassCostForVT[MVT::LAST_VALUETYPE]; |
| 2706 | |
| 2707 | /// For any value types we are promoting or expanding, this contains the value |
| 2708 | /// type that we are changing to. For Expanded types, this contains one step |
| 2709 | /// of the expand (e.g. i64 -> i32), even if there are multiple steps required |
| 2710 | /// (e.g. i64 -> i16). For types natively supported by the system, this holds |
| 2711 | /// the same type (e.g. i32 -> i32). |
| 2712 | MVT TransformToType[MVT::LAST_VALUETYPE]; |
| 2713 | |
| 2714 | /// For each operation and each value type, keep a LegalizeAction that |
| 2715 | /// indicates how instruction selection should deal with the operation. Most |
| 2716 | /// operations are Legal (aka, supported natively by the target), but |
| 2717 | /// operations that are not should be described. Note that operations on |
| 2718 | /// non-legal value types are not described here. |
| 2719 | LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; |
| 2720 | |
| 2721 | /// For each load extension type and each value type, keep a LegalizeAction |
| 2722 | /// that indicates how instruction selection should deal with a load of a |
| 2723 | /// specific value type and extension type. Uses 4-bits to store the action |
| 2724 | /// for each of the 4 load ext types. |
| 2725 | uint16_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; |
| 2726 | |
| 2727 | /// For each value type pair keep a LegalizeAction that indicates whether a |
| 2728 | /// truncating store of a specific value type and truncating type is legal. |
| 2729 | LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; |
| 2730 | |
| 2731 | /// For each indexed mode and each value type, keep a pair of LegalizeAction |
| 2732 | /// that indicates how instruction selection should deal with the load / |
| 2733 | /// store. |
| 2734 | /// |
| 2735 | /// The first dimension is the value_type for the reference. The second |
| 2736 | /// dimension represents the various modes for load store. |
| 2737 | uint8_t IndexedModeActions[MVT::LAST_VALUETYPE][ISD::LAST_INDEXED_MODE]; |
| 2738 | |
| 2739 | /// For each condition code (ISD::CondCode) keep a LegalizeAction that |
| 2740 | /// indicates how instruction selection should deal with the condition code. |
| 2741 | /// |
| 2742 | /// Because each CC action takes up 4 bits, we need to have the array size be |
| 2743 | /// large enough to fit all of the value types. This can be done by rounding |
| 2744 | /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. |
| 2745 | uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; |
| 2746 | |
| 2747 | protected: |
| 2748 | ValueTypeActionImpl ValueTypeActions; |
| 2749 | |
| 2750 | private: |
| 2751 | LegalizeKind getTypeConversion(LLVMContext &Context, EVT VT) const; |
| 2752 | |
| 2753 | /// Targets can specify ISD nodes that they would like PerformDAGCombine |
| 2754 | /// callbacks for by calling setTargetDAGCombine(), which sets a bit in this |
| 2755 | /// array. |
| 2756 | unsigned char |
| 2757 | TargetDAGCombineArray[(ISD::BUILTIN_OP_END+CHAR_BIT-1)/CHAR_BIT]; |
| 2758 | |
| 2759 | /// For operations that must be promoted to a specific type, this holds the |
| 2760 | /// destination type. This map should be sparse, so don't hold it as an |
| 2761 | /// array. |
| 2762 | /// |
| 2763 | /// Targets add entries to this map with AddPromotedToType(..), clients access |
| 2764 | /// this with getTypeToPromoteTo(..). |
| 2765 | std::map<std::pair<unsigned, MVT::SimpleValueType>, MVT::SimpleValueType> |
| 2766 | PromoteToType; |
| 2767 | |
| 2768 | /// Stores the name each libcall. |
| 2769 | const char *LibcallRoutineNames[RTLIB::UNKNOWN_LIBCALL + 1]; |
| 2770 | |
| 2771 | /// The ISD::CondCode that should be used to test the result of each of the |
| 2772 | /// comparison libcall against zero. |
| 2773 | ISD::CondCode CmpLibcallCCs[RTLIB::UNKNOWN_LIBCALL]; |
| 2774 | |
| 2775 | /// Stores the CallingConv that should be used for each libcall. |
| 2776 | CallingConv::ID LibcallCallingConvs[RTLIB::UNKNOWN_LIBCALL]; |
| 2777 | |
| 2778 | /// Set default libcall names and calling conventions. |
| 2779 | void InitLibcalls(const Triple &TT); |
| 2780 | |
| 2781 | protected: |
| 2782 | /// Return true if the extension represented by \p I is free. |
| 2783 | /// \pre \p I is a sign, zero, or fp extension and |
| 2784 | /// is[Z|FP]ExtFree of the related types is not true. |
| 2785 | virtual bool isExtFreeImpl(const Instruction *I) const { return false; } |
| 2786 | |
| 2787 | /// Depth that GatherAllAliases should should continue looking for chain |
| 2788 | /// dependencies when trying to find a more preferable chain. As an |
| 2789 | /// approximation, this should be more than the number of consecutive stores |
| 2790 | /// expected to be merged. |
| 2791 | unsigned GatherAllAliasesMaxDepth; |
| 2792 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 2793 | /// Specify maximum number of store instructions per memset call. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2794 | /// |
| 2795 | /// When lowering \@llvm.memset this field specifies the maximum number of |
| 2796 | /// store operations that may be substituted for the call to memset. Targets |
| 2797 | /// must set this value based on the cost threshold for that target. Targets |
| 2798 | /// should assume that the memset will be done using as many of the largest |
| 2799 | /// store operations first, followed by smaller ones, if necessary, per |
| 2800 | /// alignment restrictions. For example, storing 9 bytes on a 32-bit machine |
| 2801 | /// with 16-bit alignment would result in four 2-byte stores and one 1-byte |
| 2802 | /// store. This only applies to setting a constant array of a constant size. |
| 2803 | unsigned MaxStoresPerMemset; |
| 2804 | |
| 2805 | /// Maximum number of stores operations that may be substituted for the call |
| 2806 | /// to memset, used for functions with OptSize attribute. |
| 2807 | unsigned MaxStoresPerMemsetOptSize; |
| 2808 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 2809 | /// Specify maximum bytes of store instructions per memcpy call. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2810 | /// |
| 2811 | /// When lowering \@llvm.memcpy this field specifies the maximum number of |
| 2812 | /// store operations that may be substituted for a call to memcpy. Targets |
| 2813 | /// must set this value based on the cost threshold for that target. Targets |
| 2814 | /// should assume that the memcpy will be done using as many of the largest |
| 2815 | /// store operations first, followed by smaller ones, if necessary, per |
| 2816 | /// alignment restrictions. For example, storing 7 bytes on a 32-bit machine |
| 2817 | /// with 32-bit alignment would result in one 4-byte store, a one 2-byte store |
| 2818 | /// and one 1-byte store. This only applies to copying a constant array of |
| 2819 | /// constant size. |
| 2820 | unsigned MaxStoresPerMemcpy; |
| 2821 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 2822 | |
| 2823 | /// \brief Specify max number of store instructions to glue in inlined memcpy. |
| 2824 | /// |
| 2825 | /// When memcpy is inlined based on MaxStoresPerMemcpy, specify maximum number |
| 2826 | /// of store instructions to keep together. This helps in pairing and |
| 2827 | // vectorization later on. |
| 2828 | unsigned MaxGluedStoresPerMemcpy = 0; |
| 2829 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2830 | /// Maximum number of store operations that may be substituted for a call to |
| 2831 | /// memcpy, used for functions with OptSize attribute. |
| 2832 | unsigned MaxStoresPerMemcpyOptSize; |
| 2833 | unsigned MaxLoadsPerMemcmp; |
| 2834 | unsigned MaxLoadsPerMemcmpOptSize; |
| 2835 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 2836 | /// Specify maximum bytes of store instructions per memmove call. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2837 | /// |
| 2838 | /// When lowering \@llvm.memmove this field specifies the maximum number of |
| 2839 | /// store instructions that may be substituted for a call to memmove. Targets |
| 2840 | /// must set this value based on the cost threshold for that target. Targets |
| 2841 | /// should assume that the memmove will be done using as many of the largest |
| 2842 | /// store operations first, followed by smaller ones, if necessary, per |
| 2843 | /// alignment restrictions. For example, moving 9 bytes on a 32-bit machine |
| 2844 | /// with 8-bit alignment would result in nine 1-byte stores. This only |
| 2845 | /// applies to copying a constant array of constant size. |
| 2846 | unsigned MaxStoresPerMemmove; |
| 2847 | |
| 2848 | /// Maximum number of store instructions that may be substituted for a call to |
| 2849 | /// memmove, used for functions with OptSize attribute. |
| 2850 | unsigned MaxStoresPerMemmoveOptSize; |
| 2851 | |
| 2852 | /// Tells the code generator that select is more expensive than a branch if |
| 2853 | /// the branch is usually predicted right. |
| 2854 | bool PredictableSelectIsExpensive; |
| 2855 | |
| 2856 | /// \see enableExtLdPromotion. |
| 2857 | bool EnableExtLdPromotion; |
| 2858 | |
| 2859 | /// Return true if the value types that can be represented by the specified |
| 2860 | /// register class are all legal. |
| 2861 | bool isLegalRC(const TargetRegisterInfo &TRI, |
| 2862 | const TargetRegisterClass &RC) const; |
| 2863 | |
| 2864 | /// Replace/modify any TargetFrameIndex operands with a targte-dependent |
| 2865 | /// sequence of memory operands that is recognized by PrologEpilogInserter. |
| 2866 | MachineBasicBlock *emitPatchPoint(MachineInstr &MI, |
| 2867 | MachineBasicBlock *MBB) const; |
| 2868 | |
| 2869 | /// Replace/modify the XRay custom event operands with target-dependent |
| 2870 | /// details. |
| 2871 | MachineBasicBlock *emitXRayCustomEvent(MachineInstr &MI, |
| 2872 | MachineBasicBlock *MBB) const; |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 2873 | |
| 2874 | /// Replace/modify the XRay typed event operands with target-dependent |
| 2875 | /// details. |
| 2876 | MachineBasicBlock *emitXRayTypedEvent(MachineInstr &MI, |
| 2877 | MachineBasicBlock *MBB) const; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2878 | }; |
| 2879 | |
| 2880 | /// This class defines information used to lower LLVM code to legal SelectionDAG |
| 2881 | /// operators that the target instruction selector can accept natively. |
| 2882 | /// |
| 2883 | /// This class also defines callbacks that targets must implement to lower |
| 2884 | /// target-specific constructs to SelectionDAG operators. |
| 2885 | class TargetLowering : public TargetLoweringBase { |
| 2886 | public: |
| 2887 | struct DAGCombinerInfo; |
| 2888 | |
| 2889 | TargetLowering(const TargetLowering &) = delete; |
| 2890 | TargetLowering &operator=(const TargetLowering &) = delete; |
| 2891 | |
| 2892 | /// NOTE: The TargetMachine owns TLOF. |
| 2893 | explicit TargetLowering(const TargetMachine &TM); |
| 2894 | |
| 2895 | bool isPositionIndependent() const; |
| 2896 | |
| 2897 | virtual bool isSDNodeSourceOfDivergence(const SDNode *N, |
| 2898 | FunctionLoweringInfo *FLI, |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 2899 | LegacyDivergenceAnalysis *DA) const { |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2900 | return false; |
| 2901 | } |
| 2902 | |
| 2903 | virtual bool isSDNodeAlwaysUniform(const SDNode * N) const { |
| 2904 | return false; |
| 2905 | } |
| 2906 | |
| 2907 | /// Returns true by value, base pointer and offset pointer and addressing mode |
| 2908 | /// by reference if the node's address can be legally represented as |
| 2909 | /// pre-indexed load / store address. |
| 2910 | virtual bool getPreIndexedAddressParts(SDNode * /*N*/, SDValue &/*Base*/, |
| 2911 | SDValue &/*Offset*/, |
| 2912 | ISD::MemIndexedMode &/*AM*/, |
| 2913 | SelectionDAG &/*DAG*/) const { |
| 2914 | return false; |
| 2915 | } |
| 2916 | |
| 2917 | /// Returns true by value, base pointer and offset pointer and addressing mode |
| 2918 | /// by reference if this node can be combined with a load / store to form a |
| 2919 | /// post-indexed load / store. |
| 2920 | virtual bool getPostIndexedAddressParts(SDNode * /*N*/, SDNode * /*Op*/, |
| 2921 | SDValue &/*Base*/, |
| 2922 | SDValue &/*Offset*/, |
| 2923 | ISD::MemIndexedMode &/*AM*/, |
| 2924 | SelectionDAG &/*DAG*/) const { |
| 2925 | return false; |
| 2926 | } |
| 2927 | |
| 2928 | /// Return the entry encoding for a jump table in the current function. The |
| 2929 | /// returned value is a member of the MachineJumpTableInfo::JTEntryKind enum. |
| 2930 | virtual unsigned getJumpTableEncoding() const; |
| 2931 | |
| 2932 | virtual const MCExpr * |
| 2933 | LowerCustomJumpTableEntry(const MachineJumpTableInfo * /*MJTI*/, |
| 2934 | const MachineBasicBlock * /*MBB*/, unsigned /*uid*/, |
| 2935 | MCContext &/*Ctx*/) const { |
| 2936 | llvm_unreachable("Need to implement this hook if target has custom JTIs"); |
| 2937 | } |
| 2938 | |
| 2939 | /// Returns relocation base for the given PIC jumptable. |
| 2940 | virtual SDValue getPICJumpTableRelocBase(SDValue Table, |
| 2941 | SelectionDAG &DAG) const; |
| 2942 | |
| 2943 | /// This returns the relocation base for the given PIC jumptable, the same as |
| 2944 | /// getPICJumpTableRelocBase, but as an MCExpr. |
| 2945 | virtual const MCExpr * |
| 2946 | getPICJumpTableRelocBaseExpr(const MachineFunction *MF, |
| 2947 | unsigned JTI, MCContext &Ctx) const; |
| 2948 | |
| 2949 | /// Return true if folding a constant offset with the given GlobalAddress is |
| 2950 | /// legal. It is frequently not legal in PIC relocation models. |
| 2951 | virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const; |
| 2952 | |
| 2953 | bool isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, |
| 2954 | SDValue &Chain) const; |
| 2955 | |
| 2956 | void softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, |
| 2957 | SDValue &NewRHS, ISD::CondCode &CCCode, |
| 2958 | const SDLoc &DL) const; |
| 2959 | |
| 2960 | /// Returns a pair of (return value, chain). |
| 2961 | /// It is an error to pass RTLIB::UNKNOWN_LIBCALL as \p LC. |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 2962 | std::pair<SDValue, SDValue> makeLibCall( |
| 2963 | SelectionDAG &DAG, RTLIB::Libcall LC, EVT RetVT, ArrayRef<SDValue> Ops, |
| 2964 | bool isSigned, const SDLoc &dl, bool doesNotReturn = false, |
| 2965 | bool isReturnValueUsed = true, bool isPostTypeLegalization = false) const; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 2966 | |
| 2967 | /// Check whether parameters to a call that are passed in callee saved |
| 2968 | /// registers are the same as from the calling function. This needs to be |
| 2969 | /// checked for tail call eligibility. |
| 2970 | bool parametersInCSRMatch(const MachineRegisterInfo &MRI, |
| 2971 | const uint32_t *CallerPreservedMask, |
| 2972 | const SmallVectorImpl<CCValAssign> &ArgLocs, |
| 2973 | const SmallVectorImpl<SDValue> &OutVals) const; |
| 2974 | |
| 2975 | //===--------------------------------------------------------------------===// |
| 2976 | // TargetLowering Optimization Methods |
| 2977 | // |
| 2978 | |
| 2979 | /// A convenience struct that encapsulates a DAG, and two SDValues for |
| 2980 | /// returning information from TargetLowering to its clients that want to |
| 2981 | /// combine. |
| 2982 | struct TargetLoweringOpt { |
| 2983 | SelectionDAG &DAG; |
| 2984 | bool LegalTys; |
| 2985 | bool LegalOps; |
| 2986 | SDValue Old; |
| 2987 | SDValue New; |
| 2988 | |
| 2989 | explicit TargetLoweringOpt(SelectionDAG &InDAG, |
| 2990 | bool LT, bool LO) : |
| 2991 | DAG(InDAG), LegalTys(LT), LegalOps(LO) {} |
| 2992 | |
| 2993 | bool LegalTypes() const { return LegalTys; } |
| 2994 | bool LegalOperations() const { return LegalOps; } |
| 2995 | |
| 2996 | bool CombineTo(SDValue O, SDValue N) { |
| 2997 | Old = O; |
| 2998 | New = N; |
| 2999 | return true; |
| 3000 | } |
| 3001 | }; |
| 3002 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 3003 | /// Determines the optimal series of memory ops to replace the memset / memcpy. |
| 3004 | /// Return true if the number of memory ops is below the threshold (Limit). |
| 3005 | /// It returns the types of the sequence of memory ops to perform |
| 3006 | /// memset / memcpy by reference. |
| 3007 | bool findOptimalMemOpLowering(std::vector<EVT> &MemOps, |
| 3008 | unsigned Limit, uint64_t Size, |
| 3009 | unsigned DstAlign, unsigned SrcAlign, |
| 3010 | bool IsMemset, |
| 3011 | bool ZeroMemset, |
| 3012 | bool MemcpyStrSrc, |
| 3013 | bool AllowOverlap, |
| 3014 | unsigned DstAS, unsigned SrcAS, |
| 3015 | const AttributeList &FuncAttributes) const; |
| 3016 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3017 | /// Check to see if the specified operand of the specified instruction is a |
| 3018 | /// constant integer. If so, check to see if there are any bits set in the |
| 3019 | /// constant that are not demanded. If so, shrink the constant and return |
| 3020 | /// true. |
| 3021 | bool ShrinkDemandedConstant(SDValue Op, const APInt &Demanded, |
| 3022 | TargetLoweringOpt &TLO) const; |
| 3023 | |
| 3024 | // Target hook to do target-specific const optimization, which is called by |
| 3025 | // ShrinkDemandedConstant. This function should return true if the target |
| 3026 | // doesn't want ShrinkDemandedConstant to further optimize the constant. |
| 3027 | virtual bool targetShrinkDemandedConstant(SDValue Op, const APInt &Demanded, |
| 3028 | TargetLoweringOpt &TLO) const { |
| 3029 | return false; |
| 3030 | } |
| 3031 | |
| 3032 | /// Convert x+y to (VT)((SmallVT)x+(SmallVT)y) if the casts are free. This |
| 3033 | /// uses isZExtFree and ZERO_EXTEND for the widening cast, but it could be |
| 3034 | /// generalized for targets with other types of implicit widening casts. |
| 3035 | bool ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, |
| 3036 | TargetLoweringOpt &TLO) const; |
| 3037 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3038 | /// Look at Op. At this point, we know that only the DemandedBits bits of the |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3039 | /// result of Op are ever used downstream. If we can use this information to |
| 3040 | /// simplify Op, create a new simplified DAG node and return true, returning |
| 3041 | /// the original and new nodes in Old and New. Otherwise, analyze the |
| 3042 | /// expression and return a mask of KnownOne and KnownZero bits for the |
| 3043 | /// expression (used to simplify the caller). The KnownZero/One bits may only |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3044 | /// be accurate for those bits in the Demanded masks. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3045 | /// \p AssumeSingleUse When this parameter is true, this function will |
| 3046 | /// attempt to simplify \p Op even if there are multiple uses. |
| 3047 | /// Callers are responsible for correctly updating the DAG based on the |
| 3048 | /// results of this function, because simply replacing replacing TLO.Old |
| 3049 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
| 3050 | /// has multiple uses. |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3051 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
| 3052 | const APInt &DemandedElts, KnownBits &Known, |
| 3053 | TargetLoweringOpt &TLO, unsigned Depth = 0, |
| 3054 | bool AssumeSingleUse = false) const; |
| 3055 | |
| 3056 | /// Helper wrapper around SimplifyDemandedBits, demanding all elements. |
| 3057 | /// Adds Op back to the worklist upon success. |
| 3058 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedBits, |
| 3059 | KnownBits &Known, TargetLoweringOpt &TLO, |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3060 | unsigned Depth = 0, |
| 3061 | bool AssumeSingleUse = false) const; |
| 3062 | |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 3063 | /// Helper wrapper around SimplifyDemandedBits. |
| 3064 | /// Adds Op back to the worklist upon success. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3065 | bool SimplifyDemandedBits(SDValue Op, const APInt &DemandedMask, |
| 3066 | DAGCombinerInfo &DCI) const; |
| 3067 | |
| 3068 | /// Look at Vector Op. At this point, we know that only the DemandedElts |
| 3069 | /// elements of the result of Op are ever used downstream. If we can use |
| 3070 | /// this information to simplify Op, create a new simplified DAG node and |
| 3071 | /// return true, storing the original and new nodes in TLO. |
| 3072 | /// Otherwise, analyze the expression and return a mask of KnownUndef and |
| 3073 | /// KnownZero elements for the expression (used to simplify the caller). |
| 3074 | /// The KnownUndef/Zero elements may only be accurate for those bits |
| 3075 | /// in the DemandedMask. |
| 3076 | /// \p AssumeSingleUse When this parameter is true, this function will |
| 3077 | /// attempt to simplify \p Op even if there are multiple uses. |
| 3078 | /// Callers are responsible for correctly updating the DAG based on the |
| 3079 | /// results of this function, because simply replacing replacing TLO.Old |
| 3080 | /// with TLO.New will be incorrect when this parameter is true and TLO.Old |
| 3081 | /// has multiple uses. |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 3082 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedEltMask, |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3083 | APInt &KnownUndef, APInt &KnownZero, |
| 3084 | TargetLoweringOpt &TLO, unsigned Depth = 0, |
| 3085 | bool AssumeSingleUse = false) const; |
| 3086 | |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 3087 | /// Helper wrapper around SimplifyDemandedVectorElts. |
| 3088 | /// Adds Op back to the worklist upon success. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3089 | bool SimplifyDemandedVectorElts(SDValue Op, const APInt &DemandedElts, |
| 3090 | APInt &KnownUndef, APInt &KnownZero, |
| 3091 | DAGCombinerInfo &DCI) const; |
| 3092 | |
| 3093 | /// Determine which of the bits specified in Mask are known to be either zero |
| 3094 | /// or one and return them in the KnownZero/KnownOne bitsets. The DemandedElts |
| 3095 | /// argument allows us to only collect the known bits that are shared by the |
| 3096 | /// requested vector elements. |
| 3097 | virtual void computeKnownBitsForTargetNode(const SDValue Op, |
| 3098 | KnownBits &Known, |
| 3099 | const APInt &DemandedElts, |
| 3100 | const SelectionDAG &DAG, |
| 3101 | unsigned Depth = 0) const; |
| 3102 | |
| 3103 | /// Determine which of the bits of FrameIndex \p FIOp are known to be 0. |
| 3104 | /// Default implementation computes low bits based on alignment |
| 3105 | /// information. This should preserve known bits passed into it. |
| 3106 | virtual void computeKnownBitsForFrameIndex(const SDValue FIOp, |
| 3107 | KnownBits &Known, |
| 3108 | const APInt &DemandedElts, |
| 3109 | const SelectionDAG &DAG, |
| 3110 | unsigned Depth = 0) const; |
| 3111 | |
| 3112 | /// This method can be implemented by targets that want to expose additional |
| 3113 | /// information about sign bits to the DAG Combiner. The DemandedElts |
| 3114 | /// argument allows us to only collect the minimum sign bits that are shared |
| 3115 | /// by the requested vector elements. |
| 3116 | virtual unsigned ComputeNumSignBitsForTargetNode(SDValue Op, |
| 3117 | const APInt &DemandedElts, |
| 3118 | const SelectionDAG &DAG, |
| 3119 | unsigned Depth = 0) const; |
| 3120 | |
| 3121 | /// Attempt to simplify any target nodes based on the demanded vector |
| 3122 | /// elements, returning true on success. Otherwise, analyze the expression and |
| 3123 | /// return a mask of KnownUndef and KnownZero elements for the expression |
| 3124 | /// (used to simplify the caller). The KnownUndef/Zero elements may only be |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3125 | /// accurate for those bits in the DemandedMask. |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3126 | virtual bool SimplifyDemandedVectorEltsForTargetNode( |
| 3127 | SDValue Op, const APInt &DemandedElts, APInt &KnownUndef, |
| 3128 | APInt &KnownZero, TargetLoweringOpt &TLO, unsigned Depth = 0) const; |
| 3129 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3130 | /// Attempt to simplify any target nodes based on the demanded bits/elts, |
| 3131 | /// returning true on success. Otherwise, analyze the |
| 3132 | /// expression and return a mask of KnownOne and KnownZero bits for the |
| 3133 | /// expression (used to simplify the caller). The KnownZero/One bits may only |
| 3134 | /// be accurate for those bits in the Demanded masks. |
| 3135 | virtual bool SimplifyDemandedBitsForTargetNode(SDValue Op, |
| 3136 | const APInt &DemandedBits, |
| 3137 | const APInt &DemandedElts, |
| 3138 | KnownBits &Known, |
| 3139 | TargetLoweringOpt &TLO, |
| 3140 | unsigned Depth = 0) const; |
| 3141 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 3142 | /// This method returns the constant pool value that will be loaded by LD. |
| 3143 | /// NOTE: You must check for implicit extensions of the constant by LD. |
| 3144 | virtual const Constant *getTargetConstantFromLoad(LoadSDNode *LD) const; |
| 3145 | |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 3146 | /// If \p SNaN is false, \returns true if \p Op is known to never be any |
| 3147 | /// NaN. If \p sNaN is true, returns if \p Op is known to never be a signaling |
| 3148 | /// NaN. |
| 3149 | virtual bool isKnownNeverNaNForTargetNode(SDValue Op, |
| 3150 | const SelectionDAG &DAG, |
| 3151 | bool SNaN = false, |
| 3152 | unsigned Depth = 0) const; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3153 | struct DAGCombinerInfo { |
| 3154 | void *DC; // The DAG Combiner object. |
| 3155 | CombineLevel Level; |
| 3156 | bool CalledByLegalizer; |
| 3157 | |
| 3158 | public: |
| 3159 | SelectionDAG &DAG; |
| 3160 | |
| 3161 | DAGCombinerInfo(SelectionDAG &dag, CombineLevel level, bool cl, void *dc) |
| 3162 | : DC(dc), Level(level), CalledByLegalizer(cl), DAG(dag) {} |
| 3163 | |
| 3164 | bool isBeforeLegalize() const { return Level == BeforeLegalizeTypes; } |
| 3165 | bool isBeforeLegalizeOps() const { return Level < AfterLegalizeVectorOps; } |
| 3166 | bool isAfterLegalizeDAG() const { |
| 3167 | return Level == AfterLegalizeDAG; |
| 3168 | } |
| 3169 | CombineLevel getDAGCombineLevel() { return Level; } |
| 3170 | bool isCalledByLegalizer() const { return CalledByLegalizer; } |
| 3171 | |
| 3172 | void AddToWorklist(SDNode *N); |
| 3173 | SDValue CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo = true); |
| 3174 | SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true); |
| 3175 | SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo = true); |
| 3176 | |
| 3177 | void CommitTargetLoweringOpt(const TargetLoweringOpt &TLO); |
| 3178 | }; |
| 3179 | |
| 3180 | /// Return if the N is a constant or constant vector equal to the true value |
| 3181 | /// from getBooleanContents(). |
| 3182 | bool isConstTrueVal(const SDNode *N) const; |
| 3183 | |
| 3184 | /// Return if the N is a constant or constant vector equal to the false value |
| 3185 | /// from getBooleanContents(). |
| 3186 | bool isConstFalseVal(const SDNode *N) const; |
| 3187 | |
| 3188 | /// Return if \p N is a True value when extended to \p VT. |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 3189 | bool isExtendedTrueVal(const ConstantSDNode *N, EVT VT, bool SExt) const; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3190 | |
| 3191 | /// Try to simplify a setcc built with the specified operands and cc. If it is |
| 3192 | /// unable to simplify it, return a null SDValue. |
| 3193 | SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
| 3194 | bool foldBooleans, DAGCombinerInfo &DCI, |
| 3195 | const SDLoc &dl) const; |
| 3196 | |
| 3197 | // For targets which wrap address, unwrap for analysis. |
| 3198 | virtual SDValue unwrapAddress(SDValue N) const { return N; } |
| 3199 | |
| 3200 | /// Returns true (and the GlobalValue and the offset) if the node is a |
| 3201 | /// GlobalAddress + offset. |
| 3202 | virtual bool |
| 3203 | isGAPlusOffset(SDNode *N, const GlobalValue* &GA, int64_t &Offset) const; |
| 3204 | |
| 3205 | /// This method will be invoked for all target nodes and for any |
| 3206 | /// target-independent nodes that the target has registered with invoke it |
| 3207 | /// for. |
| 3208 | /// |
| 3209 | /// The semantics are as follows: |
| 3210 | /// Return Value: |
| 3211 | /// SDValue.Val == 0 - No change was made |
| 3212 | /// SDValue.Val == N - N was replaced, is dead, and is already handled. |
| 3213 | /// otherwise - N should be replaced by the returned Operand. |
| 3214 | /// |
| 3215 | /// In addition, methods provided by DAGCombinerInfo may be used to perform |
| 3216 | /// more complex transformations. |
| 3217 | /// |
| 3218 | virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; |
| 3219 | |
Andrew Scull | 0372a57 | 2018-11-16 15:47:06 +0000 | [diff] [blame] | 3220 | /// Return true if it is profitable to move this shift by a constant amount |
| 3221 | /// though its operand, adjusting any immediate operands as necessary to |
| 3222 | /// preserve semantics. This transformation may not be desirable if it |
| 3223 | /// disrupts a particularly auspicious target-specific tree (e.g. bitfield |
| 3224 | /// extraction in AArch64). By default, it returns true. |
| 3225 | /// |
| 3226 | /// @param N the shift node |
| 3227 | /// @param Level the current DAGCombine legalization level. |
| 3228 | virtual bool isDesirableToCommuteWithShift(const SDNode *N, |
| 3229 | CombineLevel Level) const { |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3230 | return true; |
| 3231 | } |
| 3232 | |
| 3233 | // Return true if it is profitable to combine a BUILD_VECTOR with a stride-pattern |
| 3234 | // to a shuffle and a truncate. |
| 3235 | // Example of such a combine: |
| 3236 | // v4i32 build_vector((extract_elt V, 1), |
| 3237 | // (extract_elt V, 3), |
| 3238 | // (extract_elt V, 5), |
| 3239 | // (extract_elt V, 7)) |
| 3240 | // --> |
| 3241 | // v4i32 truncate (bitcast (shuffle<1,u,3,u,5,u,7,u> V, u) to v4i64) |
| 3242 | virtual bool isDesirableToCombineBuildVectorToShuffleTruncate( |
| 3243 | ArrayRef<int> ShuffleMask, EVT SrcVT, EVT TruncVT) const { |
| 3244 | return false; |
| 3245 | } |
| 3246 | |
| 3247 | /// Return true if the target has native support for the specified value type |
| 3248 | /// and it is 'desirable' to use the type for the given node type. e.g. On x86 |
| 3249 | /// i16 is legal, but undesirable since i16 instruction encodings are longer |
| 3250 | /// and some i16 instructions are slow. |
| 3251 | virtual bool isTypeDesirableForOp(unsigned /*Opc*/, EVT VT) const { |
| 3252 | // By default, assume all legal types are desirable. |
| 3253 | return isTypeLegal(VT); |
| 3254 | } |
| 3255 | |
| 3256 | /// Return true if it is profitable for dag combiner to transform a floating |
| 3257 | /// point op of specified opcode to a equivalent op of an integer |
| 3258 | /// type. e.g. f32 load -> i32 load can be profitable on ARM. |
| 3259 | virtual bool isDesirableToTransformToIntegerOp(unsigned /*Opc*/, |
| 3260 | EVT /*VT*/) const { |
| 3261 | return false; |
| 3262 | } |
| 3263 | |
| 3264 | /// This method query the target whether it is beneficial for dag combiner to |
| 3265 | /// promote the specified node. If true, it should return the desired |
| 3266 | /// promotion type by reference. |
| 3267 | virtual bool IsDesirableToPromoteOp(SDValue /*Op*/, EVT &/*PVT*/) const { |
| 3268 | return false; |
| 3269 | } |
| 3270 | |
| 3271 | /// Return true if the target supports swifterror attribute. It optimizes |
| 3272 | /// loads and stores to reading and writing a specific register. |
| 3273 | virtual bool supportSwiftError() const { |
| 3274 | return false; |
| 3275 | } |
| 3276 | |
| 3277 | /// Return true if the target supports that a subset of CSRs for the given |
| 3278 | /// machine function is handled explicitly via copies. |
| 3279 | virtual bool supportSplitCSR(MachineFunction *MF) const { |
| 3280 | return false; |
| 3281 | } |
| 3282 | |
| 3283 | /// Perform necessary initialization to handle a subset of CSRs explicitly |
| 3284 | /// via copies. This function is called at the beginning of instruction |
| 3285 | /// selection. |
| 3286 | virtual void initializeSplitCSR(MachineBasicBlock *Entry) const { |
| 3287 | llvm_unreachable("Not Implemented"); |
| 3288 | } |
| 3289 | |
| 3290 | /// Insert explicit copies in entry and exit blocks. We copy a subset of |
| 3291 | /// CSRs to virtual registers in the entry block, and copy them back to |
| 3292 | /// physical registers in the exit blocks. This function is called at the end |
| 3293 | /// of instruction selection. |
| 3294 | virtual void insertCopiesSplitCSR( |
| 3295 | MachineBasicBlock *Entry, |
| 3296 | const SmallVectorImpl<MachineBasicBlock *> &Exits) const { |
| 3297 | llvm_unreachable("Not Implemented"); |
| 3298 | } |
| 3299 | |
| 3300 | //===--------------------------------------------------------------------===// |
| 3301 | // Lowering methods - These methods must be implemented by targets so that |
| 3302 | // the SelectionDAGBuilder code knows how to lower these. |
| 3303 | // |
| 3304 | |
| 3305 | /// This hook must be implemented to lower the incoming (formal) arguments, |
| 3306 | /// described by the Ins array, into the specified DAG. The implementation |
| 3307 | /// should fill in the InVals array with legal-type argument values, and |
| 3308 | /// return the resulting token chain value. |
| 3309 | virtual SDValue LowerFormalArguments( |
| 3310 | SDValue /*Chain*/, CallingConv::ID /*CallConv*/, bool /*isVarArg*/, |
| 3311 | const SmallVectorImpl<ISD::InputArg> & /*Ins*/, const SDLoc & /*dl*/, |
| 3312 | SelectionDAG & /*DAG*/, SmallVectorImpl<SDValue> & /*InVals*/) const { |
| 3313 | llvm_unreachable("Not Implemented"); |
| 3314 | } |
| 3315 | |
| 3316 | /// This structure contains all information that is necessary for lowering |
| 3317 | /// calls. It is passed to TLI::LowerCallTo when the SelectionDAG builder |
| 3318 | /// needs to lower a call, and targets will see this struct in their LowerCall |
| 3319 | /// implementation. |
| 3320 | struct CallLoweringInfo { |
| 3321 | SDValue Chain; |
| 3322 | Type *RetTy = nullptr; |
| 3323 | bool RetSExt : 1; |
| 3324 | bool RetZExt : 1; |
| 3325 | bool IsVarArg : 1; |
| 3326 | bool IsInReg : 1; |
| 3327 | bool DoesNotReturn : 1; |
| 3328 | bool IsReturnValueUsed : 1; |
| 3329 | bool IsConvergent : 1; |
| 3330 | bool IsPatchPoint : 1; |
| 3331 | |
| 3332 | // IsTailCall should be modified by implementations of |
| 3333 | // TargetLowering::LowerCall that perform tail call conversions. |
| 3334 | bool IsTailCall = false; |
| 3335 | |
| 3336 | // Is Call lowering done post SelectionDAG type legalization. |
| 3337 | bool IsPostTypeLegalization = false; |
| 3338 | |
| 3339 | unsigned NumFixedArgs = -1; |
| 3340 | CallingConv::ID CallConv = CallingConv::C; |
| 3341 | SDValue Callee; |
| 3342 | ArgListTy Args; |
| 3343 | SelectionDAG &DAG; |
| 3344 | SDLoc DL; |
| 3345 | ImmutableCallSite CS; |
| 3346 | SmallVector<ISD::OutputArg, 32> Outs; |
| 3347 | SmallVector<SDValue, 32> OutVals; |
| 3348 | SmallVector<ISD::InputArg, 32> Ins; |
| 3349 | SmallVector<SDValue, 4> InVals; |
| 3350 | |
| 3351 | CallLoweringInfo(SelectionDAG &DAG) |
| 3352 | : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false), |
| 3353 | DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false), |
| 3354 | IsPatchPoint(false), DAG(DAG) {} |
| 3355 | |
| 3356 | CallLoweringInfo &setDebugLoc(const SDLoc &dl) { |
| 3357 | DL = dl; |
| 3358 | return *this; |
| 3359 | } |
| 3360 | |
| 3361 | CallLoweringInfo &setChain(SDValue InChain) { |
| 3362 | Chain = InChain; |
| 3363 | return *this; |
| 3364 | } |
| 3365 | |
| 3366 | // setCallee with target/module-specific attributes |
| 3367 | CallLoweringInfo &setLibCallee(CallingConv::ID CC, Type *ResultType, |
| 3368 | SDValue Target, ArgListTy &&ArgsList) { |
| 3369 | RetTy = ResultType; |
| 3370 | Callee = Target; |
| 3371 | CallConv = CC; |
| 3372 | NumFixedArgs = ArgsList.size(); |
| 3373 | Args = std::move(ArgsList); |
| 3374 | |
| 3375 | DAG.getTargetLoweringInfo().markLibCallAttributes( |
| 3376 | &(DAG.getMachineFunction()), CC, Args); |
| 3377 | return *this; |
| 3378 | } |
| 3379 | |
| 3380 | CallLoweringInfo &setCallee(CallingConv::ID CC, Type *ResultType, |
| 3381 | SDValue Target, ArgListTy &&ArgsList) { |
| 3382 | RetTy = ResultType; |
| 3383 | Callee = Target; |
| 3384 | CallConv = CC; |
| 3385 | NumFixedArgs = ArgsList.size(); |
| 3386 | Args = std::move(ArgsList); |
| 3387 | return *this; |
| 3388 | } |
| 3389 | |
| 3390 | CallLoweringInfo &setCallee(Type *ResultType, FunctionType *FTy, |
| 3391 | SDValue Target, ArgListTy &&ArgsList, |
| 3392 | ImmutableCallSite Call) { |
| 3393 | RetTy = ResultType; |
| 3394 | |
| 3395 | IsInReg = Call.hasRetAttr(Attribute::InReg); |
| 3396 | DoesNotReturn = |
| 3397 | Call.doesNotReturn() || |
| 3398 | (!Call.isInvoke() && |
| 3399 | isa<UnreachableInst>(Call.getInstruction()->getNextNode())); |
| 3400 | IsVarArg = FTy->isVarArg(); |
| 3401 | IsReturnValueUsed = !Call.getInstruction()->use_empty(); |
| 3402 | RetSExt = Call.hasRetAttr(Attribute::SExt); |
| 3403 | RetZExt = Call.hasRetAttr(Attribute::ZExt); |
| 3404 | |
| 3405 | Callee = Target; |
| 3406 | |
| 3407 | CallConv = Call.getCallingConv(); |
| 3408 | NumFixedArgs = FTy->getNumParams(); |
| 3409 | Args = std::move(ArgsList); |
| 3410 | |
| 3411 | CS = Call; |
| 3412 | |
| 3413 | return *this; |
| 3414 | } |
| 3415 | |
| 3416 | CallLoweringInfo &setInRegister(bool Value = true) { |
| 3417 | IsInReg = Value; |
| 3418 | return *this; |
| 3419 | } |
| 3420 | |
| 3421 | CallLoweringInfo &setNoReturn(bool Value = true) { |
| 3422 | DoesNotReturn = Value; |
| 3423 | return *this; |
| 3424 | } |
| 3425 | |
| 3426 | CallLoweringInfo &setVarArg(bool Value = true) { |
| 3427 | IsVarArg = Value; |
| 3428 | return *this; |
| 3429 | } |
| 3430 | |
| 3431 | CallLoweringInfo &setTailCall(bool Value = true) { |
| 3432 | IsTailCall = Value; |
| 3433 | return *this; |
| 3434 | } |
| 3435 | |
| 3436 | CallLoweringInfo &setDiscardResult(bool Value = true) { |
| 3437 | IsReturnValueUsed = !Value; |
| 3438 | return *this; |
| 3439 | } |
| 3440 | |
| 3441 | CallLoweringInfo &setConvergent(bool Value = true) { |
| 3442 | IsConvergent = Value; |
| 3443 | return *this; |
| 3444 | } |
| 3445 | |
| 3446 | CallLoweringInfo &setSExtResult(bool Value = true) { |
| 3447 | RetSExt = Value; |
| 3448 | return *this; |
| 3449 | } |
| 3450 | |
| 3451 | CallLoweringInfo &setZExtResult(bool Value = true) { |
| 3452 | RetZExt = Value; |
| 3453 | return *this; |
| 3454 | } |
| 3455 | |
| 3456 | CallLoweringInfo &setIsPatchPoint(bool Value = true) { |
| 3457 | IsPatchPoint = Value; |
| 3458 | return *this; |
| 3459 | } |
| 3460 | |
| 3461 | CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) { |
| 3462 | IsPostTypeLegalization = Value; |
| 3463 | return *this; |
| 3464 | } |
| 3465 | |
| 3466 | ArgListTy &getArgs() { |
| 3467 | return Args; |
| 3468 | } |
| 3469 | }; |
| 3470 | |
| 3471 | /// This function lowers an abstract call to a function into an actual call. |
| 3472 | /// This returns a pair of operands. The first element is the return value |
| 3473 | /// for the function (if RetTy is not VoidTy). The second element is the |
| 3474 | /// outgoing token chain. It calls LowerCall to do the actual lowering. |
| 3475 | std::pair<SDValue, SDValue> LowerCallTo(CallLoweringInfo &CLI) const; |
| 3476 | |
| 3477 | /// This hook must be implemented to lower calls into the specified |
| 3478 | /// DAG. The outgoing arguments to the call are described by the Outs array, |
| 3479 | /// and the values to be returned by the call are described by the Ins |
| 3480 | /// array. The implementation should fill in the InVals array with legal-type |
| 3481 | /// return values from the call, and return the resulting token chain value. |
| 3482 | virtual SDValue |
| 3483 | LowerCall(CallLoweringInfo &/*CLI*/, |
| 3484 | SmallVectorImpl<SDValue> &/*InVals*/) const { |
| 3485 | llvm_unreachable("Not Implemented"); |
| 3486 | } |
| 3487 | |
| 3488 | /// Target-specific cleanup for formal ByVal parameters. |
| 3489 | virtual void HandleByVal(CCState *, unsigned &, unsigned) const {} |
| 3490 | |
| 3491 | /// This hook should be implemented to check whether the return values |
| 3492 | /// described by the Outs array can fit into the return registers. If false |
| 3493 | /// is returned, an sret-demotion is performed. |
| 3494 | virtual bool CanLowerReturn(CallingConv::ID /*CallConv*/, |
| 3495 | MachineFunction &/*MF*/, bool /*isVarArg*/, |
| 3496 | const SmallVectorImpl<ISD::OutputArg> &/*Outs*/, |
| 3497 | LLVMContext &/*Context*/) const |
| 3498 | { |
| 3499 | // Return true by default to get preexisting behavior. |
| 3500 | return true; |
| 3501 | } |
| 3502 | |
| 3503 | /// This hook must be implemented to lower outgoing return values, described |
| 3504 | /// by the Outs array, into the specified DAG. The implementation should |
| 3505 | /// return the resulting token chain value. |
| 3506 | virtual SDValue LowerReturn(SDValue /*Chain*/, CallingConv::ID /*CallConv*/, |
| 3507 | bool /*isVarArg*/, |
| 3508 | const SmallVectorImpl<ISD::OutputArg> & /*Outs*/, |
| 3509 | const SmallVectorImpl<SDValue> & /*OutVals*/, |
| 3510 | const SDLoc & /*dl*/, |
| 3511 | SelectionDAG & /*DAG*/) const { |
| 3512 | llvm_unreachable("Not Implemented"); |
| 3513 | } |
| 3514 | |
| 3515 | /// Return true if result of the specified node is used by a return node |
| 3516 | /// only. It also compute and return the input chain for the tail call. |
| 3517 | /// |
| 3518 | /// This is used to determine whether it is possible to codegen a libcall as |
| 3519 | /// tail call at legalization time. |
| 3520 | virtual bool isUsedByReturnOnly(SDNode *, SDValue &/*Chain*/) const { |
| 3521 | return false; |
| 3522 | } |
| 3523 | |
| 3524 | /// Return true if the target may be able emit the call instruction as a tail |
| 3525 | /// call. This is used by optimization passes to determine if it's profitable |
| 3526 | /// to duplicate return instructions to enable tailcall optimization. |
| 3527 | virtual bool mayBeEmittedAsTailCall(const CallInst *) const { |
| 3528 | return false; |
| 3529 | } |
| 3530 | |
| 3531 | /// Return the builtin name for the __builtin___clear_cache intrinsic |
| 3532 | /// Default is to invoke the clear cache library call |
| 3533 | virtual const char * getClearCacheBuiltinName() const { |
| 3534 | return "__clear_cache"; |
| 3535 | } |
| 3536 | |
| 3537 | /// Return the register ID of the name passed in. Used by named register |
| 3538 | /// global variables extension. There is no target-independent behaviour |
| 3539 | /// so the default action is to bail. |
| 3540 | virtual unsigned getRegisterByName(const char* RegName, EVT VT, |
| 3541 | SelectionDAG &DAG) const { |
| 3542 | report_fatal_error("Named registers not implemented for this target"); |
| 3543 | } |
| 3544 | |
| 3545 | /// Return the type that should be used to zero or sign extend a |
| 3546 | /// zeroext/signext integer return value. FIXME: Some C calling conventions |
| 3547 | /// require the return type to be promoted, but this is not true all the time, |
| 3548 | /// e.g. i1/i8/i16 on x86/x86_64. It is also not necessary for non-C calling |
| 3549 | /// conventions. The frontend should handle this and include all of the |
| 3550 | /// necessary information. |
| 3551 | virtual EVT getTypeForExtReturn(LLVMContext &Context, EVT VT, |
| 3552 | ISD::NodeType /*ExtendKind*/) const { |
| 3553 | EVT MinVT = getRegisterType(Context, MVT::i32); |
| 3554 | return VT.bitsLT(MinVT) ? MinVT : VT; |
| 3555 | } |
| 3556 | |
| 3557 | /// For some targets, an LLVM struct type must be broken down into multiple |
| 3558 | /// simple types, but the calling convention specifies that the entire struct |
| 3559 | /// must be passed in a block of consecutive registers. |
| 3560 | virtual bool |
| 3561 | functionArgumentNeedsConsecutiveRegisters(Type *Ty, CallingConv::ID CallConv, |
| 3562 | bool isVarArg) const { |
| 3563 | return false; |
| 3564 | } |
| 3565 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 3566 | /// For most targets, an LLVM type must be broken down into multiple |
| 3567 | /// smaller types. Usually the halves are ordered according to the endianness |
| 3568 | /// but for some platform that would break. So this method will default to |
| 3569 | /// matching the endianness but can be overridden. |
| 3570 | virtual bool |
| 3571 | shouldSplitFunctionArgumentsAsLittleEndian(const DataLayout &DL) const { |
| 3572 | return DL.isLittleEndian(); |
| 3573 | } |
| 3574 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3575 | /// Returns a 0 terminated array of registers that can be safely used as |
| 3576 | /// scratch registers. |
| 3577 | virtual const MCPhysReg *getScratchRegisters(CallingConv::ID CC) const { |
| 3578 | return nullptr; |
| 3579 | } |
| 3580 | |
| 3581 | /// This callback is used to prepare for a volatile or atomic load. |
| 3582 | /// It takes a chain node as input and returns the chain for the load itself. |
| 3583 | /// |
| 3584 | /// Having a callback like this is necessary for targets like SystemZ, |
| 3585 | /// which allows a CPU to reuse the result of a previous load indefinitely, |
| 3586 | /// even if a cache-coherent store is performed by another CPU. The default |
| 3587 | /// implementation does nothing. |
| 3588 | virtual SDValue prepareVolatileOrAtomicLoad(SDValue Chain, const SDLoc &DL, |
| 3589 | SelectionDAG &DAG) const { |
| 3590 | return Chain; |
| 3591 | } |
| 3592 | |
| 3593 | /// This callback is used to inspect load/store instructions and add |
| 3594 | /// target-specific MachineMemOperand flags to them. The default |
| 3595 | /// implementation does nothing. |
| 3596 | virtual MachineMemOperand::Flags getMMOFlags(const Instruction &I) const { |
| 3597 | return MachineMemOperand::MONone; |
| 3598 | } |
| 3599 | |
| 3600 | /// This callback is invoked by the type legalizer to legalize nodes with an |
| 3601 | /// illegal operand type but legal result types. It replaces the |
| 3602 | /// LowerOperation callback in the type Legalizer. The reason we can not do |
| 3603 | /// away with LowerOperation entirely is that LegalizeDAG isn't yet ready to |
| 3604 | /// use this callback. |
| 3605 | /// |
| 3606 | /// TODO: Consider merging with ReplaceNodeResults. |
| 3607 | /// |
| 3608 | /// The target places new result values for the node in Results (their number |
| 3609 | /// and types must exactly match those of the original return values of |
| 3610 | /// the node), or leaves Results empty, which indicates that the node is not |
| 3611 | /// to be custom lowered after all. |
| 3612 | /// The default implementation calls LowerOperation. |
| 3613 | virtual void LowerOperationWrapper(SDNode *N, |
| 3614 | SmallVectorImpl<SDValue> &Results, |
| 3615 | SelectionDAG &DAG) const; |
| 3616 | |
| 3617 | /// This callback is invoked for operations that are unsupported by the |
| 3618 | /// target, which are registered to use 'custom' lowering, and whose defined |
| 3619 | /// values are all legal. If the target has no operations that require custom |
| 3620 | /// lowering, it need not implement this. The default implementation of this |
| 3621 | /// aborts. |
| 3622 | virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG) const; |
| 3623 | |
| 3624 | /// This callback is invoked when a node result type is illegal for the |
| 3625 | /// target, and the operation was registered to use 'custom' lowering for that |
| 3626 | /// result type. The target places new result values for the node in Results |
| 3627 | /// (their number and types must exactly match those of the original return |
| 3628 | /// values of the node), or leaves Results empty, which indicates that the |
| 3629 | /// node is not to be custom lowered after all. |
| 3630 | /// |
| 3631 | /// If the target has no operations that require custom lowering, it need not |
| 3632 | /// implement this. The default implementation aborts. |
| 3633 | virtual void ReplaceNodeResults(SDNode * /*N*/, |
| 3634 | SmallVectorImpl<SDValue> &/*Results*/, |
| 3635 | SelectionDAG &/*DAG*/) const { |
| 3636 | llvm_unreachable("ReplaceNodeResults not implemented for this target!"); |
| 3637 | } |
| 3638 | |
| 3639 | /// This method returns the name of a target specific DAG node. |
| 3640 | virtual const char *getTargetNodeName(unsigned Opcode) const; |
| 3641 | |
| 3642 | /// This method returns a target specific FastISel object, or null if the |
| 3643 | /// target does not support "fast" ISel. |
| 3644 | virtual FastISel *createFastISel(FunctionLoweringInfo &, |
| 3645 | const TargetLibraryInfo *) const { |
| 3646 | return nullptr; |
| 3647 | } |
| 3648 | |
| 3649 | bool verifyReturnAddressArgumentIsConstant(SDValue Op, |
| 3650 | SelectionDAG &DAG) const; |
| 3651 | |
| 3652 | //===--------------------------------------------------------------------===// |
| 3653 | // Inline Asm Support hooks |
| 3654 | // |
| 3655 | |
| 3656 | /// This hook allows the target to expand an inline asm call to be explicit |
| 3657 | /// llvm code if it wants to. This is useful for turning simple inline asms |
| 3658 | /// into LLVM intrinsics, which gives the compiler more information about the |
| 3659 | /// behavior of the code. |
| 3660 | virtual bool ExpandInlineAsm(CallInst *) const { |
| 3661 | return false; |
| 3662 | } |
| 3663 | |
| 3664 | enum ConstraintType { |
| 3665 | C_Register, // Constraint represents specific register(s). |
| 3666 | C_RegisterClass, // Constraint represents any of register(s) in class. |
| 3667 | C_Memory, // Memory constraint. |
| 3668 | C_Other, // Something else. |
| 3669 | C_Unknown // Unsupported constraint. |
| 3670 | }; |
| 3671 | |
| 3672 | enum ConstraintWeight { |
| 3673 | // Generic weights. |
| 3674 | CW_Invalid = -1, // No match. |
| 3675 | CW_Okay = 0, // Acceptable. |
| 3676 | CW_Good = 1, // Good weight. |
| 3677 | CW_Better = 2, // Better weight. |
| 3678 | CW_Best = 3, // Best weight. |
| 3679 | |
| 3680 | // Well-known weights. |
| 3681 | CW_SpecificReg = CW_Okay, // Specific register operands. |
| 3682 | CW_Register = CW_Good, // Register operands. |
| 3683 | CW_Memory = CW_Better, // Memory operands. |
| 3684 | CW_Constant = CW_Best, // Constant operand. |
| 3685 | CW_Default = CW_Okay // Default or don't know type. |
| 3686 | }; |
| 3687 | |
| 3688 | /// This contains information for each constraint that we are lowering. |
| 3689 | struct AsmOperandInfo : public InlineAsm::ConstraintInfo { |
| 3690 | /// This contains the actual string for the code, like "m". TargetLowering |
| 3691 | /// picks the 'best' code from ConstraintInfo::Codes that most closely |
| 3692 | /// matches the operand. |
| 3693 | std::string ConstraintCode; |
| 3694 | |
| 3695 | /// Information about the constraint code, e.g. Register, RegisterClass, |
| 3696 | /// Memory, Other, Unknown. |
| 3697 | TargetLowering::ConstraintType ConstraintType = TargetLowering::C_Unknown; |
| 3698 | |
| 3699 | /// If this is the result output operand or a clobber, this is null, |
| 3700 | /// otherwise it is the incoming operand to the CallInst. This gets |
| 3701 | /// modified as the asm is processed. |
| 3702 | Value *CallOperandVal = nullptr; |
| 3703 | |
| 3704 | /// The ValueType for the operand value. |
| 3705 | MVT ConstraintVT = MVT::Other; |
| 3706 | |
| 3707 | /// Copy constructor for copying from a ConstraintInfo. |
| 3708 | AsmOperandInfo(InlineAsm::ConstraintInfo Info) |
| 3709 | : InlineAsm::ConstraintInfo(std::move(Info)) {} |
| 3710 | |
| 3711 | /// Return true of this is an input operand that is a matching constraint |
| 3712 | /// like "4". |
| 3713 | bool isMatchingInputConstraint() const; |
| 3714 | |
| 3715 | /// If this is an input matching constraint, this method returns the output |
| 3716 | /// operand it matches. |
| 3717 | unsigned getMatchedOperand() const; |
| 3718 | }; |
| 3719 | |
| 3720 | using AsmOperandInfoVector = std::vector<AsmOperandInfo>; |
| 3721 | |
| 3722 | /// Split up the constraint string from the inline assembly value into the |
| 3723 | /// specific constraints and their prefixes, and also tie in the associated |
| 3724 | /// operand values. If this returns an empty vector, and if the constraint |
| 3725 | /// string itself isn't empty, there was an error parsing. |
| 3726 | virtual AsmOperandInfoVector ParseConstraints(const DataLayout &DL, |
| 3727 | const TargetRegisterInfo *TRI, |
| 3728 | ImmutableCallSite CS) const; |
| 3729 | |
| 3730 | /// Examine constraint type and operand type and determine a weight value. |
| 3731 | /// The operand object must already have been set up with the operand type. |
| 3732 | virtual ConstraintWeight getMultipleConstraintMatchWeight( |
| 3733 | AsmOperandInfo &info, int maIndex) const; |
| 3734 | |
| 3735 | /// Examine constraint string and operand type and determine a weight value. |
| 3736 | /// The operand object must already have been set up with the operand type. |
| 3737 | virtual ConstraintWeight getSingleConstraintMatchWeight( |
| 3738 | AsmOperandInfo &info, const char *constraint) const; |
| 3739 | |
| 3740 | /// Determines the constraint code and constraint type to use for the specific |
| 3741 | /// AsmOperandInfo, setting OpInfo.ConstraintCode and OpInfo.ConstraintType. |
| 3742 | /// If the actual operand being passed in is available, it can be passed in as |
| 3743 | /// Op, otherwise an empty SDValue can be passed. |
| 3744 | virtual void ComputeConstraintToUse(AsmOperandInfo &OpInfo, |
| 3745 | SDValue Op, |
| 3746 | SelectionDAG *DAG = nullptr) const; |
| 3747 | |
| 3748 | /// Given a constraint, return the type of constraint it is for this target. |
| 3749 | virtual ConstraintType getConstraintType(StringRef Constraint) const; |
| 3750 | |
| 3751 | /// Given a physical register constraint (e.g. {edx}), return the register |
| 3752 | /// number and the register class for the register. |
| 3753 | /// |
| 3754 | /// Given a register class constraint, like 'r', if this corresponds directly |
| 3755 | /// to an LLVM register class, return a register of 0 and the register class |
| 3756 | /// pointer. |
| 3757 | /// |
| 3758 | /// This should only be used for C_Register constraints. On error, this |
| 3759 | /// returns a register number of 0 and a null register class pointer. |
| 3760 | virtual std::pair<unsigned, const TargetRegisterClass *> |
| 3761 | getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, |
| 3762 | StringRef Constraint, MVT VT) const; |
| 3763 | |
| 3764 | virtual unsigned getInlineAsmMemConstraint(StringRef ConstraintCode) const { |
| 3765 | if (ConstraintCode == "i") |
| 3766 | return InlineAsm::Constraint_i; |
| 3767 | else if (ConstraintCode == "m") |
| 3768 | return InlineAsm::Constraint_m; |
| 3769 | return InlineAsm::Constraint_Unknown; |
| 3770 | } |
| 3771 | |
| 3772 | /// Try to replace an X constraint, which matches anything, with another that |
| 3773 | /// has more specific requirements based on the type of the corresponding |
| 3774 | /// operand. This returns null if there is no replacement to make. |
| 3775 | virtual const char *LowerXConstraint(EVT ConstraintVT) const; |
| 3776 | |
| 3777 | /// Lower the specified operand into the Ops vector. If it is invalid, don't |
| 3778 | /// add anything to Ops. |
| 3779 | virtual void LowerAsmOperandForConstraint(SDValue Op, std::string &Constraint, |
| 3780 | std::vector<SDValue> &Ops, |
| 3781 | SelectionDAG &DAG) const; |
| 3782 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3783 | // Lower custom output constraints. If invalid, return SDValue(). |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 3784 | virtual SDValue LowerAsmOutputForConstraint(SDValue &Chain, SDValue &Flag, |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3785 | SDLoc DL, |
| 3786 | const AsmOperandInfo &OpInfo, |
| 3787 | SelectionDAG &DAG) const; |
| 3788 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3789 | //===--------------------------------------------------------------------===// |
| 3790 | // Div utility functions |
| 3791 | // |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 3792 | SDValue BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, |
| 3793 | SmallVectorImpl<SDNode *> &Created) const; |
| 3794 | SDValue BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, |
| 3795 | SmallVectorImpl<SDNode *> &Created) const; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3796 | |
| 3797 | /// Targets may override this function to provide custom SDIV lowering for |
| 3798 | /// power-of-2 denominators. If the target returns an empty SDValue, LLVM |
| 3799 | /// assumes SDIV is expensive and replaces it with a series of other integer |
| 3800 | /// operations. |
| 3801 | virtual SDValue BuildSDIVPow2(SDNode *N, const APInt &Divisor, |
| 3802 | SelectionDAG &DAG, |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 3803 | SmallVectorImpl<SDNode *> &Created) const; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3804 | |
| 3805 | /// Indicate whether this target prefers to combine FDIVs with the same |
| 3806 | /// divisor. If the transform should never be done, return zero. If the |
| 3807 | /// transform should be done, return the minimum number of divisor uses |
| 3808 | /// that must exist. |
| 3809 | virtual unsigned combineRepeatedFPDivisors() const { |
| 3810 | return 0; |
| 3811 | } |
| 3812 | |
| 3813 | /// Hooks for building estimates in place of slower divisions and square |
| 3814 | /// roots. |
| 3815 | |
| 3816 | /// Return either a square root or its reciprocal estimate value for the input |
| 3817 | /// operand. |
| 3818 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or |
| 3819 | /// 'Enabled' as set by a potential default override attribute. |
| 3820 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson |
| 3821 | /// refinement iterations required to generate a sufficient (though not |
| 3822 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. |
| 3823 | /// The boolean UseOneConstNR output is used to select a Newton-Raphson |
| 3824 | /// algorithm implementation that uses either one or two constants. |
| 3825 | /// The boolean Reciprocal is used to select whether the estimate is for the |
| 3826 | /// square root of the input operand or the reciprocal of its square root. |
| 3827 | /// A target may choose to implement its own refinement within this function. |
| 3828 | /// If that's true, then return '0' as the number of RefinementSteps to avoid |
| 3829 | /// any further refinement of the estimate. |
| 3830 | /// An empty SDValue return means no estimate sequence can be created. |
| 3831 | virtual SDValue getSqrtEstimate(SDValue Operand, SelectionDAG &DAG, |
| 3832 | int Enabled, int &RefinementSteps, |
| 3833 | bool &UseOneConstNR, bool Reciprocal) const { |
| 3834 | return SDValue(); |
| 3835 | } |
| 3836 | |
| 3837 | /// Return a reciprocal estimate value for the input operand. |
| 3838 | /// \p Enabled is a ReciprocalEstimate enum with value either 'Unspecified' or |
| 3839 | /// 'Enabled' as set by a potential default override attribute. |
| 3840 | /// If \p RefinementSteps is 'Unspecified', the number of Newton-Raphson |
| 3841 | /// refinement iterations required to generate a sufficient (though not |
| 3842 | /// necessarily IEEE-754 compliant) estimate is returned in that parameter. |
| 3843 | /// A target may choose to implement its own refinement within this function. |
| 3844 | /// If that's true, then return '0' as the number of RefinementSteps to avoid |
| 3845 | /// any further refinement of the estimate. |
| 3846 | /// An empty SDValue return means no estimate sequence can be created. |
| 3847 | virtual SDValue getRecipEstimate(SDValue Operand, SelectionDAG &DAG, |
| 3848 | int Enabled, int &RefinementSteps) const { |
| 3849 | return SDValue(); |
| 3850 | } |
| 3851 | |
| 3852 | //===--------------------------------------------------------------------===// |
| 3853 | // Legalization utility functions |
| 3854 | // |
| 3855 | |
| 3856 | /// Expand a MUL or [US]MUL_LOHI of n-bit values into two or four nodes, |
| 3857 | /// respectively, each computing an n/2-bit part of the result. |
| 3858 | /// \param Result A vector that will be filled with the parts of the result |
| 3859 | /// in little-endian order. |
| 3860 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter |
| 3861 | /// if you want to control how low bits are extracted from the LHS. |
| 3862 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. |
| 3863 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning |
| 3864 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. |
| 3865 | /// \returns true if the node has been expanded, false if it has not |
| 3866 | bool expandMUL_LOHI(unsigned Opcode, EVT VT, SDLoc dl, SDValue LHS, |
| 3867 | SDValue RHS, SmallVectorImpl<SDValue> &Result, EVT HiLoVT, |
| 3868 | SelectionDAG &DAG, MulExpansionKind Kind, |
| 3869 | SDValue LL = SDValue(), SDValue LH = SDValue(), |
| 3870 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; |
| 3871 | |
| 3872 | /// Expand a MUL into two nodes. One that computes the high bits of |
| 3873 | /// the result and one that computes the low bits. |
| 3874 | /// \param HiLoVT The value type to use for the Lo and Hi nodes. |
| 3875 | /// \param LL Low bits of the LHS of the MUL. You can use this parameter |
| 3876 | /// if you want to control how low bits are extracted from the LHS. |
| 3877 | /// \param LH High bits of the LHS of the MUL. See LL for meaning. |
| 3878 | /// \param RL Low bits of the RHS of the MUL. See LL for meaning |
| 3879 | /// \param RH High bits of the RHS of the MUL. See LL for meaning. |
| 3880 | /// \returns true if the node has been expanded. false if it has not |
| 3881 | bool expandMUL(SDNode *N, SDValue &Lo, SDValue &Hi, EVT HiLoVT, |
| 3882 | SelectionDAG &DAG, MulExpansionKind Kind, |
| 3883 | SDValue LL = SDValue(), SDValue LH = SDValue(), |
| 3884 | SDValue RL = SDValue(), SDValue RH = SDValue()) const; |
| 3885 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3886 | /// Expand funnel shift. |
| 3887 | /// \param N Node to expand |
| 3888 | /// \param Result output after conversion |
| 3889 | /// \returns True, if the expansion was successful, false otherwise |
| 3890 | bool expandFunnelShift(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
| 3891 | |
| 3892 | /// Expand rotations. |
| 3893 | /// \param N Node to expand |
| 3894 | /// \param Result output after conversion |
| 3895 | /// \returns True, if the expansion was successful, false otherwise |
| 3896 | bool expandROT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
| 3897 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3898 | /// Expand float(f32) to SINT(i64) conversion |
| 3899 | /// \param N Node to expand |
| 3900 | /// \param Result output after conversion |
| 3901 | /// \returns True, if the expansion was successful, false otherwise |
| 3902 | bool expandFP_TO_SINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
| 3903 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3904 | /// Expand float to UINT conversion |
| 3905 | /// \param N Node to expand |
| 3906 | /// \param Result output after conversion |
| 3907 | /// \returns True, if the expansion was successful, false otherwise |
| 3908 | bool expandFP_TO_UINT(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
| 3909 | |
| 3910 | /// Expand UINT(i64) to double(f64) conversion |
| 3911 | /// \param N Node to expand |
| 3912 | /// \param Result output after conversion |
| 3913 | /// \returns True, if the expansion was successful, false otherwise |
| 3914 | bool expandUINT_TO_FP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
| 3915 | |
| 3916 | /// Expand fminnum/fmaxnum into fminnum_ieee/fmaxnum_ieee with quieted inputs. |
| 3917 | SDValue expandFMINNUM_FMAXNUM(SDNode *N, SelectionDAG &DAG) const; |
| 3918 | |
| 3919 | /// Expand CTPOP nodes. Expands vector/scalar CTPOP nodes, |
| 3920 | /// vector nodes can only succeed if all operations are legal/custom. |
| 3921 | /// \param N Node to expand |
| 3922 | /// \param Result output after conversion |
| 3923 | /// \returns True, if the expansion was successful, false otherwise |
| 3924 | bool expandCTPOP(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
| 3925 | |
| 3926 | /// Expand CTLZ/CTLZ_ZERO_UNDEF nodes. Expands vector/scalar CTLZ nodes, |
| 3927 | /// vector nodes can only succeed if all operations are legal/custom. |
| 3928 | /// \param N Node to expand |
| 3929 | /// \param Result output after conversion |
| 3930 | /// \returns True, if the expansion was successful, false otherwise |
| 3931 | bool expandCTLZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
| 3932 | |
| 3933 | /// Expand CTTZ/CTTZ_ZERO_UNDEF nodes. Expands vector/scalar CTTZ nodes, |
| 3934 | /// vector nodes can only succeed if all operations are legal/custom. |
| 3935 | /// \param N Node to expand |
| 3936 | /// \param Result output after conversion |
| 3937 | /// \returns True, if the expansion was successful, false otherwise |
| 3938 | bool expandCTTZ(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
| 3939 | |
| 3940 | /// Expand ABS nodes. Expands vector/scalar ABS nodes, |
| 3941 | /// vector nodes can only succeed if all operations are legal/custom. |
| 3942 | /// (ABS x) -> (XOR (ADD x, (SRA x, type_size)), (SRA x, type_size)) |
| 3943 | /// \param N Node to expand |
| 3944 | /// \param Result output after conversion |
| 3945 | /// \returns True, if the expansion was successful, false otherwise |
| 3946 | bool expandABS(SDNode *N, SDValue &Result, SelectionDAG &DAG) const; |
| 3947 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3948 | /// Turn load of vector type into a load of the individual elements. |
| 3949 | /// \param LD load to expand |
| 3950 | /// \returns MERGE_VALUEs of the scalar loads with their chains. |
| 3951 | SDValue scalarizeVectorLoad(LoadSDNode *LD, SelectionDAG &DAG) const; |
| 3952 | |
| 3953 | // Turn a store of a vector type into stores of the individual elements. |
| 3954 | /// \param ST Store with a vector value type |
| 3955 | /// \returns MERGE_VALUs of the individual store chains. |
| 3956 | SDValue scalarizeVectorStore(StoreSDNode *ST, SelectionDAG &DAG) const; |
| 3957 | |
| 3958 | /// Expands an unaligned load to 2 half-size loads for an integer, and |
| 3959 | /// possibly more for vectors. |
| 3960 | std::pair<SDValue, SDValue> expandUnalignedLoad(LoadSDNode *LD, |
| 3961 | SelectionDAG &DAG) const; |
| 3962 | |
| 3963 | /// Expands an unaligned store to 2 half-size stores for integer values, and |
| 3964 | /// possibly more for vectors. |
| 3965 | SDValue expandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG) const; |
| 3966 | |
| 3967 | /// Increments memory address \p Addr according to the type of the value |
| 3968 | /// \p DataVT that should be stored. If the data is stored in compressed |
| 3969 | /// form, the memory address should be incremented according to the number of |
| 3970 | /// the stored elements. This number is equal to the number of '1's bits |
| 3971 | /// in the \p Mask. |
| 3972 | /// \p DataVT is a vector type. \p Mask is a vector value. |
| 3973 | /// \p DataVT and \p Mask have the same number of vector elements. |
| 3974 | SDValue IncrementMemoryAddress(SDValue Addr, SDValue Mask, const SDLoc &DL, |
| 3975 | EVT DataVT, SelectionDAG &DAG, |
| 3976 | bool IsCompressedMemory) const; |
| 3977 | |
| 3978 | /// Get a pointer to vector element \p Idx located in memory for a vector of |
| 3979 | /// type \p VecVT starting at a base address of \p VecPtr. If \p Idx is out of |
| 3980 | /// bounds the returned pointer is unspecified, but will be within the vector |
| 3981 | /// bounds. |
| 3982 | SDValue getVectorElementPointer(SelectionDAG &DAG, SDValue VecPtr, EVT VecVT, |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 3983 | SDValue Index) const; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 3984 | |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 3985 | /// Method for building the DAG expansion of ISD::[US][ADD|SUB]SAT. This |
| 3986 | /// method accepts integers as its arguments. |
| 3987 | SDValue expandAddSubSat(SDNode *Node, SelectionDAG &DAG) const; |
| 3988 | |
| 3989 | /// Method for building the DAG expansion of ISD::SMULFIX. This method accepts |
| 3990 | /// integers as its arguments. |
| 3991 | SDValue expandFixedPointMul(SDNode *Node, SelectionDAG &DAG) const; |
| 3992 | |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 3993 | /// Method for building the DAG expansion of ISD::U(ADD|SUB)O. Expansion |
| 3994 | /// always suceeds and populates the Result and Overflow arguments. |
| 3995 | void expandUADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
| 3996 | SelectionDAG &DAG) const; |
| 3997 | |
| 3998 | /// Method for building the DAG expansion of ISD::S(ADD|SUB)O. Expansion |
| 3999 | /// always suceeds and populates the Result and Overflow arguments. |
| 4000 | void expandSADDSUBO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
| 4001 | SelectionDAG &DAG) const; |
| 4002 | |
| 4003 | /// Method for building the DAG expansion of ISD::[US]MULO. Returns whether |
| 4004 | /// expansion was successful and populates the Result and Overflow arguments. |
| 4005 | bool expandMULO(SDNode *Node, SDValue &Result, SDValue &Overflow, |
| 4006 | SelectionDAG &DAG) const; |
| 4007 | |
| 4008 | /// Expand a VECREDUCE_* into an explicit calculation. If Count is specified, |
| 4009 | /// only the first Count elements of the vector are used. |
| 4010 | SDValue expandVecReduce(SDNode *Node, SelectionDAG &DAG) const; |
| 4011 | |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 4012 | //===--------------------------------------------------------------------===// |
| 4013 | // Instruction Emitting Hooks |
| 4014 | // |
| 4015 | |
| 4016 | /// This method should be implemented by targets that mark instructions with |
| 4017 | /// the 'usesCustomInserter' flag. These instructions are special in various |
| 4018 | /// ways, which require special support to insert. The specified MachineInstr |
| 4019 | /// is created but not inserted into any basic blocks, and this method is |
| 4020 | /// called to expand it into a sequence of instructions, potentially also |
| 4021 | /// creating new basic blocks and control flow. |
| 4022 | /// As long as the returned basic block is different (i.e., we created a new |
| 4023 | /// one), the custom inserter is free to modify the rest of \p MBB. |
| 4024 | virtual MachineBasicBlock * |
| 4025 | EmitInstrWithCustomInserter(MachineInstr &MI, MachineBasicBlock *MBB) const; |
| 4026 | |
| 4027 | /// This method should be implemented by targets that mark instructions with |
| 4028 | /// the 'hasPostISelHook' flag. These instructions must be adjusted after |
| 4029 | /// instruction selection by target hooks. e.g. To fill in optional defs for |
| 4030 | /// ARM 's' setting instructions. |
| 4031 | virtual void AdjustInstrPostInstrSelection(MachineInstr &MI, |
| 4032 | SDNode *Node) const; |
| 4033 | |
| 4034 | /// If this function returns true, SelectionDAGBuilder emits a |
| 4035 | /// LOAD_STACK_GUARD node when it is lowering Intrinsic::stackprotector. |
| 4036 | virtual bool useLoadStackGuardNode() const { |
| 4037 | return false; |
| 4038 | } |
| 4039 | |
| 4040 | virtual SDValue emitStackGuardXorFP(SelectionDAG &DAG, SDValue Val, |
| 4041 | const SDLoc &DL) const { |
| 4042 | llvm_unreachable("not implemented for this target"); |
| 4043 | } |
| 4044 | |
| 4045 | /// Lower TLS global address SDNode for target independent emulated TLS model. |
| 4046 | virtual SDValue LowerToTLSEmulatedModel(const GlobalAddressSDNode *GA, |
| 4047 | SelectionDAG &DAG) const; |
| 4048 | |
| 4049 | /// Expands target specific indirect branch for the case of JumpTable |
| 4050 | /// expanasion. |
| 4051 | virtual SDValue expandIndirectJTBranch(const SDLoc& dl, SDValue Value, SDValue Addr, |
| 4052 | SelectionDAG &DAG) const { |
| 4053 | return DAG.getNode(ISD::BRIND, dl, MVT::Other, Value, Addr); |
| 4054 | } |
| 4055 | |
| 4056 | // seteq(x, 0) -> truncate(srl(ctlz(zext(x)), log2(#bits))) |
| 4057 | // If we're comparing for equality to zero and isCtlzFast is true, expose the |
| 4058 | // fact that this can be implemented as a ctlz/srl pair, so that the dag |
| 4059 | // combiner can fold the new nodes. |
| 4060 | SDValue lowerCmpEqZeroToCtlzSrl(SDValue Op, SelectionDAG &DAG) const; |
| 4061 | |
| 4062 | private: |
Andrew Walbran | 16937d0 | 2019-10-22 13:54:20 +0100 | [diff] [blame] | 4063 | SDValue foldSetCCWithAnd(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
| 4064 | const SDLoc &DL, DAGCombinerInfo &DCI) const; |
| 4065 | SDValue foldSetCCWithBinOp(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, |
| 4066 | const SDLoc &DL, DAGCombinerInfo &DCI) const; |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 4067 | |
| 4068 | SDValue optimizeSetCCOfSignedTruncationCheck(EVT SCCVT, SDValue N0, |
| 4069 | SDValue N1, ISD::CondCode Cond, |
| 4070 | DAGCombinerInfo &DCI, |
| 4071 | const SDLoc &DL) const; |
Andrew Walbran | 3d2c197 | 2020-04-07 12:24:26 +0100 | [diff] [blame^] | 4072 | |
| 4073 | SDValue prepareUREMEqFold(EVT SETCCVT, SDValue REMNode, |
| 4074 | SDValue CompTargetNode, ISD::CondCode Cond, |
| 4075 | DAGCombinerInfo &DCI, const SDLoc &DL, |
| 4076 | SmallVectorImpl<SDNode *> &Created) const; |
| 4077 | SDValue buildUREMEqFold(EVT SETCCVT, SDValue REMNode, SDValue CompTargetNode, |
| 4078 | ISD::CondCode Cond, DAGCombinerInfo &DCI, |
| 4079 | const SDLoc &DL) const; |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 4080 | }; |
| 4081 | |
| 4082 | /// Given an LLVM IR type and return type attributes, compute the return value |
| 4083 | /// EVTs and flags, and optionally also the offsets, if the return value is |
| 4084 | /// being lowered to memory. |
Andrew Scull | cdfcccc | 2018-10-05 20:58:37 +0100 | [diff] [blame] | 4085 | void GetReturnInfo(CallingConv::ID CC, Type *ReturnType, AttributeList attr, |
Andrew Scull | 5e1ddfa | 2018-08-14 10:06:54 +0100 | [diff] [blame] | 4086 | SmallVectorImpl<ISD::OutputArg> &Outs, |
| 4087 | const TargetLowering &TLI, const DataLayout &DL); |
| 4088 | |
| 4089 | } // end namespace llvm |
| 4090 | |
| 4091 | #endif // LLVM_CODEGEN_TARGETLOWERING_H |